update doc

ecmwf-lab · Mar 24, 2024 · 2fdc18f · 2fdc18f
1 parent 876502a
commit 2fdc18f
Show file tree

Hide file tree

Showing 10 changed files with 359 additions and 197 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -57,6 +57,8 @@
     "sphinx.ext.todo",
     "sphinx_rtd_theme",
     "nbsphinx",
+    "sphinx.ext.graphviz",
+    "sphinx.ext.intersphinx",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -67,6 +69,10 @@
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "'**.ipynb_checkpoints'"]
 
+intersphinx_mapping = {
+    "python": ("https://python.readthedocs.io/en/latest", None),
+}
+
 
 # https://www.notion.so/Deepnote-Launch-Buttons-63c642a5e875463495ed2341e83a4b2a
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -42,6 +42,13 @@ datasets <building-introduction>`.
 **Using training datasets**
 
 -  :doc:`using/introduction`
+-  :doc:`using/opening`
+-  :doc:`using/subsetting`
+-  :doc:`using/combining`
+-  :doc:`using/selecting`
+-  :doc:`using/grids`
+-  :doc:`using/statistics`
+-  :doc:`using/other`
 -  :doc:`using/options`
 
 .. toctree::
@@ -50,6 +57,13 @@ datasets <building-introduction>`.
    :caption: Using datasets
 
    using/introduction
+   using/opening
+   using/subsetting
+   using/combining
+   using/selecting
+   using/grids
+   using/statistics
+   using/other
    using/options
 
 **Building training datasets**

diff --git a/docs/using/combining.rst b/docs/using/combining.rst
@@ -0,0 +1,81 @@
+.. _combining-datasets:
+
+####################
+ Combining datasets
+####################
+
+When combining datasets, the statistics of the first dataset are used by
+default. You can change this by setting the :ref:`selecting-statistics`
+option to a different dataset, even if it is not part of the
+combination. See
+
+.. _concat:
+
+********
+ concat
+********
+
+You can concatenate two or more datasets along the dates dimension. The
+package will check that all datasets are compatible (same resolution,
+same variables, etc.). Currently, the datasets must be given in
+chronological order with no gaps between them.
+
+.. code:: python
+
+   ds = open_dataset(
+       "aifs-ea-an-oper-0001-mars-o96-1940-1978-1h-v2",
+       "aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2",
+   )
+
+.. image:: concat.png
+   :alt: Concatenation
+
+Please note that you can pass more than two ``zarr`` files to the
+function.
+
+   **NOTE:** When concatenating file, the statistics are not recomputed;
+   it is the statistics of first file that are returned to the user.
+
+******
+ join
+******
+
+You can join two datasets that have the same dates, combining their
+variables.
+
+.. code:: python
+
+   from ecml_tools.data import open_dataset
+
+   ds = open_dataset(
+       "aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2",
+       "some-extra-parameters-from-another-source-o96-1979-2022-1h-v2",
+   )
+
+.. image:: join.png
+   :alt: Join
+
+If a variable is present in more that one file, that last occurrence of
+that variable will be used, and will be at the position of the first
+occurrence of that name.
+
+.. image:: overlay.png
+   :alt: Overlay
+
+Please note that you can join more than two ``zarr`` files.
+
+***********
+ ensembles
+***********
+
+.. code:: python
+
+   open_dataset(ensembles=[dataset1, dataset2, ...])
+
+*******
+ grids
+*******
+
+.. code:: python
+
+   open_dataset(grids=[dataset1, dataset2, ...], method=...)
diff --git a/docs/using/grids.rst b/docs/using/grids.rst
@@ -0,0 +1,17 @@
+.. _selecting-grids:
+
+#######################
+ Selecting grid points
+#######################
+
+**********
+ thinning
+**********
+
+.. code:: python
+
+   open_dataset(dataset, thinning=..., method="every-nth")
+
+******
+ area
+******
diff --git a/docs/using/opening.rst b/docs/using/opening.rst
@@ -0,0 +1,52 @@
+.. _opening-datasets:
+
+##################
+ Opening datasets
+##################
+
+.. code:: python
+
+   from anemoi_datasets import open_dataset
+
+   ds = open_dataset("path/to/dataset.zarr", option1=value1, option2=value2, ...)
+
+or
+
+.. code:: python
+
+   from anemoi_datasets import open_dataset
+
+   ds = open_dataset(combine=["path/to/dataset1.zarr",
+                              "path/to/dataset2.zarr", ...])
+
+or
+
+.. code:: python
+
+   from anemoi_datasets import open_dataset
+
+   ds = open_dataset(combine=["path/to/dataset1.zarr",
+                              "path/to/dataset2.zarr", ...],
+                              option1=value1, option2=value2, ...)
+
+The term `combine` is one of `join`, `concat`, `ensembles`, etc. See
+:ref:`combining-datasets` for more information.
+
+.. note::
+
+   The options `option1`, `option2`, apply to the combined dataset.
+
+.. code:: python
+
+   from anemoi_datasets import open_dataset
+
+   ds = open_dataset(combine=[{"dataset": "path/to/dataset1.zarr",
+                               "option1"=value1, "option2"=value2, ...},
+                              {"dataset": "path/to/dataset2.zarr",
+                               "option3"=value3, "option4"=value4, ...},
+                              ...])
+
+.. note::
+
+   The options `option1`, `option2`, apply to the first dataset, and
+   `option3`, `option4`, to the second dataset, etc.