diff --git a/.gitignore b/.gitignore index cde3dee8..a7d9e105 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ coverage-report.xml # Ignore Sphinx doc build result **/_build +**/_site # Ignore tox related folders/files .tox/** diff --git a/README.rst b/README.rst index 028961b7..f44a41f7 100644 --- a/README.rst +++ b/README.rst @@ -22,6 +22,8 @@ Key features: - **Metadata in action**: ``pysdmx`` supports retrieving metadata from an SDMX Registry or any service compliant with the SDMX-REST 2.0.0 API. Use these metadata to power statistical processes. +- **Reading and writing SDMX files**: ``pysdmx`` support reading and writing + SDMX data and structure messages, in various formats. - **Data discovery and retrieval**: This functionality is under development. ``pysdmx`` will enable listing public SDMX services, discovering available data available, and retrieving data from these services. diff --git a/docs/api/fmr/async.rst b/docs/api/fmr/async.rst index 595fbfb2..34d5bbcf 100644 --- a/docs/api/fmr/async.rst +++ b/docs/api/fmr/async.rst @@ -12,5 +12,5 @@ SDMX service in an asynchronous (i.e. non-blocking fashion). >>> print(mapping) >>> asyncio.run(main()) -.. autoclass:: pysdmx.fmr.AsyncRegistryClient +.. autoclass:: pysdmx.api.fmr.AsyncRegistryClient :members: \ No newline at end of file diff --git a/docs/api/fmr/sync.rst b/docs/api/fmr/sync.rst index b84b8146..5e5f62a4 100644 --- a/docs/api/fmr/sync.rst +++ b/docs/api/fmr/sync.rst @@ -8,5 +8,5 @@ SDMX service in a synchronous (i.e. blocking fashion). >>> gr = RegistryClient("https://registry.sdmx.org/sdmx/v2/") >>> schema = gr.get_schema("dataflow", "UIS", "EDUCAT_CLASS_A", "1.0") -.. autoclass:: pysdmx.fmr.RegistryClient +.. autoclass:: pysdmx.api.fmr.RegistryClient :members: \ No newline at end of file diff --git a/docs/api/model.rst b/docs/api/model.rst index a355c223..a9a699c0 100644 --- a/docs/api/model.rst +++ b/docs/api/model.rst @@ -61,10 +61,11 @@ API Reference .. toctree:: :maxdepth: 1 - model/organisation model/code model/concept model/dataflow model/category model/map - model/refmeta \ No newline at end of file + model/refmeta + model/organisation + model/vtl \ No newline at end of file diff --git a/docs/api/model/category.rst b/docs/api/model/category.rst index 35941e27..41e846a0 100644 --- a/docs/api/model/category.rst +++ b/docs/api/model/category.rst @@ -9,4 +9,4 @@ Category schemes - :ref:`fs`. .. automodule:: pysdmx.model.category - :members: Category, CategoryScheme \ No newline at end of file + :members: Category, CategoryScheme, DataflowRef \ No newline at end of file diff --git a/docs/api/model/code.rst b/docs/api/model/code.rst index 0e31c08b..3d446bdd 100644 --- a/docs/api/model/code.rst +++ b/docs/api/model/code.rst @@ -2,4 +2,4 @@ Codelists, hierarchies and value lists ====================================== .. automodule:: pysdmx.model.code - :members: Code, Codelist, HierarchicalCode, Hierarchy \ No newline at end of file + :members: Code, Codelist, HierarchicalCode, Hierarchy, HierarchyAssociation \ No newline at end of file diff --git a/docs/api/model/dataflow.rst b/docs/api/model/dataflow.rst index f9e1efdf..55c7626b 100644 --- a/docs/api/model/dataflow.rst +++ b/docs/api/model/dataflow.rst @@ -10,4 +10,4 @@ Dataflows and data structures - :ref:`validate`. .. automodule:: pysdmx.model.dataflow - :members: Component, Components, DataflowInfo, Role, Schema \ No newline at end of file + :members: Component, Components, Dataflow, DataflowInfo, Role, Schema \ No newline at end of file diff --git a/docs/api/model/map.rst b/docs/api/model/map.rst index 799eceb8..4f3368c2 100644 --- a/docs/api/model/map.rst +++ b/docs/api/model/map.rst @@ -9,4 +9,4 @@ Mapping definitions - :ref:`map`. .. automodule:: pysdmx.model.map - :members: StructureMap, ComponentMap, MultiComponentMap, ValueMap, MultiValueMap, ImplicitComponentMap, FixedValueMap, DatePatternMap \ No newline at end of file + :members: StructureMap, ComponentMap, RepresentationMap, MultiComponentMap, MultiRepresentationMap, ValueMap, MultiValueMap, ImplicitComponentMap, FixedValueMap, DatePatternMap \ No newline at end of file diff --git a/docs/api/model/organisation.rst b/docs/api/model/organisation.rst index 8983dd87..881e0c18 100644 --- a/docs/api/model/organisation.rst +++ b/docs/api/model/organisation.rst @@ -2,4 +2,4 @@ Organisations ============= .. automodule:: pysdmx.model.organisation - :members: Organisation, Contact, DataflowRef \ No newline at end of file + :members: Agency, AgencyScheme, DataConsumer, DataConsumerScheme, DataProvider, DataProviderScheme, MetadataProvider, MetadataProviderScheme \ No newline at end of file diff --git a/docs/api/model/vtl.rst b/docs/api/model/vtl.rst new file mode 100644 index 00000000..d2a0ff62 --- /dev/null +++ b/docs/api/model/vtl.rst @@ -0,0 +1,12 @@ +VTL artefacts +============= + +.. note:: + Additional information about how VTL artefacts can be used for + validating and transforming SDMX data is available in the + following tutorial: + + - :ref:`vtl`. + +.. automodule:: pysdmx.model.vtl + :members: CustomType, CustomTypeScheme, FromVtlMapping, NamePersonalisation, NamePersonalisationScheme, Ruleset, RulesetScheme, ToVtlMapping, Transformation, TransformationScheme, UserDefinedOperator, UserDefinedOperatorScheme, VtlCodelistMapping, VtlConceptMapping, VtlDataflowMapping, VtlMapping, VtlMappingScheme \ No newline at end of file diff --git a/docs/api/query/availability.rst b/docs/api/query/availability.rst new file mode 100644 index 00000000..c07ffd9f --- /dev/null +++ b/docs/api/query/availability.rst @@ -0,0 +1,11 @@ +SDMX-REST Availability Queries +============================== + +.. note:: + Additional information about how to build SDMX-REST queries + can be found in the following tutorial: + + - :ref:`sdmx-rest`. + +.. automodule:: pysdmx.api.qb.availability + :members: AvailabilityFormat, AvailabilityMode, AvailabilityQuery diff --git a/docs/api/query/data.rst b/docs/api/query/data.rst new file mode 100644 index 00000000..51b60d1d --- /dev/null +++ b/docs/api/query/data.rst @@ -0,0 +1,11 @@ +SDMX-REST Data Queries +====================== + +.. note:: + Additional information about how to build SDMX-REST queries + can be found in the following tutorial: + + - :ref:`sdmx-rest`. + +.. automodule:: pysdmx.api.qb.data + :members: DataContext, DataFormat, DataQuery diff --git a/docs/api/query/refmeta.rst b/docs/api/query/refmeta.rst new file mode 100644 index 00000000..6ed22315 --- /dev/null +++ b/docs/api/query/refmeta.rst @@ -0,0 +1,11 @@ +SDMX-REST Reference Metadata Queries +==================================== + +.. note:: + Additional information about how to build SDMX-REST queries + can be found in the following tutorial: + + - :ref:`sdmx-rest`. + +.. automodule:: pysdmx.api.qb.refmeta + :members: RefMetaByMetadataflowQuery, RefMetaByMetadatasetQuery, RefMetaByStructureQuery, RefMetaDetail, RefMetaFormat \ No newline at end of file diff --git a/docs/api/query/schema.rst b/docs/api/query/schema.rst new file mode 100644 index 00000000..a52be140 --- /dev/null +++ b/docs/api/query/schema.rst @@ -0,0 +1,11 @@ +SDMX-REST Schema Queries +======================== + +.. note:: + Additional information about how to build SDMX-REST queries + can be found in the following tutorial: + + - :ref:`sdmx-rest`. + +.. automodule:: pysdmx.api.qb.schema + :members: SchemaContext, SchemaFormat, SchemaQuery \ No newline at end of file diff --git a/docs/api/query/service.rst b/docs/api/query/service.rst new file mode 100644 index 00000000..8beded84 --- /dev/null +++ b/docs/api/query/service.rst @@ -0,0 +1,11 @@ +SDMX-REST Service Clients +========================= + +.. note:: + Additional information about how to execute SDMX-REST queries + against a specific service can be found in the following tutorial: + + - :ref:`sdmx-rest`. + +.. automodule:: pysdmx.api.qb + :members: ApiVersion, RestService diff --git a/docs/api/query/structure.rst b/docs/api/query/structure.rst new file mode 100644 index 00000000..9cb88328 --- /dev/null +++ b/docs/api/query/structure.rst @@ -0,0 +1,11 @@ +SDMX-REST Structure Queries +=========================== + +.. note:: + Additional information about how to build SDMX-REST queries + can be found in the following tutorial: + + - :ref:`sdmx-rest`. + +.. automodule:: pysdmx.api.qb.structure + :members: StructureDetail, StructureFormat, StructureQuery, StructureReference, StructureType diff --git a/docs/api/rest.rst b/docs/api/rest.rst new file mode 100644 index 00000000..6a253544 --- /dev/null +++ b/docs/api/rest.rst @@ -0,0 +1,28 @@ +.. _qb_api: + +SDMX-REST services +================== + +Overview +-------- + +``pysdmx`` allows **building SDMX-REST queries** and **executing them** +against an SDMX-REST compliant service. + +.. note:: + Discover how to execute SDMX-REST queries in the following tutorial: + + - :ref:`sdmx-rest`. + +API Reference +------------- + +.. toctree:: + :maxdepth: 1 + + query/service + query/availability + query/data + query/refmeta + query/schema + query/structure diff --git a/docs/conf.py b/docs/conf.py index 3854becb..29dc8523 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,8 +21,8 @@ # -- Project information ----------------------------------------------------- project = "pysdmx" -copyright = "2023, BIS" -author = "BIS" +copyright = "2025, BIS" +author = "BIS, MeaningfulData" # -- General configuration --------------------------------------------------- diff --git a/docs/howto/config.rst b/docs/howto/config.rst index 5b45b6c7..94f02416 100644 --- a/docs/howto/config.rst +++ b/docs/howto/config.rst @@ -95,8 +95,8 @@ Connecting to a Registry ^^^^^^^^^^^^^^^^^^^^^^^^ ``pysdmx`` allows retrieving metadata from an SDMX Registry in either a -synchronous (via ``pymedal.fmr.RegistryClient``) or asynchronous fashion -(via ``pymedal.fmr.AsyncRegistryClient``). The choice depends on your use +synchronous (via ``pysdmx.api.fmr.RegistryClient``) or asynchronous fashion +(via ``pysdmx.api.fmr.AsyncRegistryClient``). The choice depends on your use case. The asynchronous client is often preferred as it is non-blocking. To connect to your target Registry, instantiate the client by passing the diff --git a/docs/howto/data_rw.rst b/docs/howto/data_rw.rst new file mode 100644 index 00000000..cd33891f --- /dev/null +++ b/docs/howto/data_rw.rst @@ -0,0 +1,289 @@ +.. _data-rw: + +Reading and writing SDMX datasets +================================= + +.. note:: + + This tutorial shows how to read and write SDMX datasets using ``pysdmx``. + + - :ref:`data-rw`. + +.. warning:: + To read and write data, you must use the extra "data". You may need to install it using the following command: + + .. code-block:: bash + + pip install pysdmx[data] + + For SDMX-ML format, you need to install the extra "xml" as well: + + .. code-block:: bash + + pip install pysdmx[data,xml] + + + +``pysdmx`` allows to read and write SDMX datasets in the following formats: + +- SDMX-CSV 1.0 (located in ``pysdmx.io.csv.sdmx10``) +- SDMX-CSV 2.0 (located in ``pysdmx.io.csv.sdmx20``) +- SDMX-ML 2.1 (located in ``pysdmx.io.xml.sdmx21``) + - SDMX-ML 2.1 Generic + - SDMX-ML 2.1 Structure Specific + +Currently, all data-related readers and writers are based on PandasDataset class. + +.. autoclass:: pysdmx.io.pd.PandasDataset + :show-inheritance: + :undoc-members: + +Reading data +------------ + +To read data, we recommend using the read_sdmx function or the get_datasets function: + +.. autofunction:: pysdmx.io.read_sdmx + +A typical example to read data from a file, a string or a buffer, using read_sdmx: + +.. code-block:: python + + from pysdmx.io import read_sdmx + + # Read file from the same folder as this code + file_path = Path(__file__).parent / "sample.csv" + + # Read from file + data_msg = read_sdmx(file_path) + + # Read from URL + data_msg = read_sdmx("https://example.com/sample.csv") + + # Extracting the datasets (list of Dataset) + datasets = data_msg.data + + # Accessing the data of the test dataset by its Short URN + df = data_msg.get_dataset("DataStructure=TEST_AGENCY:TEST_ID(1.0)").data + + # Accessing the data of the test dataset by its position in the SDMX Message + df = data_msg.data[0].data + + +By default, the read_sdmx function will automatically detect the format of the file and use the appropriate reader. We may as well use the get_datasets to associate a dataset to its Schema: + +.. autofunction:: pysdmx.io.get_datasets + +.. important:: + + If the structures message is used, the get_datasets function will associate the dataset to its Schema. If the structures message is not used, the get_datasets function will return a list of datasets without any Schema association. + If a dataset references a dataflow, the structure message requires to have the dataflow children (or all descendants), i.e. the DataStructureDefinitions associated to this Dataflow in the same SDMX Message (with or without referenced artefacts like Codelists, ConceptSchemes, etc). + +.. code-block:: python + + from pysdmx.io import get_datasets + + # Read file from the same folder as this code (SDMX-CSV 2.0) + data_path = Path(__file__).parent / "sample.csv" + + # Data contains a reference to the dataflow ``Dataflow=MD:TEST(1.0)`` + datasets = get_datasets(data_path) + + print(datasets[0].structure) # Outputs a string with the Schema Short URN -> "Dataflow=MD:TEST(1.0)" + + # Reading the datasets and associating the schema + datasets = get_datasets(data_path, "https://example.com/dataflow/MD/TEST/1.0?references=descendants") + + print(datasets[0].structure) # Outputs a Schema object with the associated components + + +Both methods are based on the individual readers for each format supported, which are described below. +All individual readers will have a string as input. + +SDMX-CSV 1.0 +^^^^^^^^^^^^ + +`SDMX-CSV 1.0 specification `_ + +.. warning:: + + The SDMX-CSV 1.0 format is deprecated and should not be used for new implementations. + It only allows a dataflow to be represented, which is not enough for most use cases. + +.. autofunction:: pysdmx.io.csv.sdmx10.reader.read + +.. code-block:: python + + from pysdmx.io.input_processor import process_string_to_read + from pysdmx.io.csv.sdmx10.reader import read + + from pathlib import Path + + # Read file sample.csv from the same folder as this code + file_path = Path(__file__).parent / "sample10.csv" + input_str, format = process_string_to_read(file_path) + + # Using reader, result will be a list of datasets + datasets = read(input_str) + # Accessing the data of the test dataset + df = dataset[0].data + +SDMX-CSV 2.0 +^^^^^^^^^^^^ + +`SDMX-CSV 2.0 specification `_ + +.. autofunction:: pysdmx.io.csv.sdmx20.reader.read + +We currently support only comma as the delimiter. +Only the `ordinary case `_ is supported. + +You may use any custom script for the remaining use cases, if anyone is interested in them, please +raise an issue in `GitHub `_. + +.. code-block:: python + + from pysdmx.io.input_processor import process_string_to_read + from pysdmx.io.csv.sdmx20.reader import read + from pathlib import Path + + # Read file from the same folder as this code + file_path = Path(__file__).parent / "sample20.csv" + input_str, format = process_string_to_read(file_path) + + # Using reader, result will be a list of datasets + datasets = read(input_str) + # Accessing the data of the test dataset + df = dataset[0].data + +SDMX-ML 2.1 Data Readers +^^^^^^^^^^^^^^^^^^^^^^^^ + +SDMX-ML 2.1 format is described +`here (in Source Code, check documentation folder) `_ + +```pysdmx`` supports both Generic and Structure Specific SDMX-ML 2.1 to handle data on SDMX-ML, both as All Dimensions or Series format. + +.. autofunction:: pysdmx.io.xml.sdmx21.reader.generic.read + +.. autofunction:: pysdmx.io.xml.sdmx21.reader.structure_specific.read + +We do not support the following elements: + +- Dimension Group +- Reference to Provision Agreement + +The reader supports both Generic and Structure Specific SDMX-ML 2.1. +It will automatically detect any structural validation errors (if validate=True) and raise an exception. + +.. warning:: + + The SDMX-ML 2.1 Generic format is deprecated and should not be used for new implementations. SDMX-ML 3.0 only uses the Structure Specific format, which is more efficient and easier to use. + +.. code-block:: python + + from pysdmx.io.input_processor import process_string_to_read + from pysdmx.io.xml.sdmx21.reader.generic import read as read_generic # For Generic format + from pysdmx.io.xml.sdmx21.reader.structure_specific import read # For Structure Specific format + from pathlib import Path + + # Read file from the same folder as this code + file_path = Path(__file__).parent / "sample21.xml" + input_str, format = process_string_to_read(file_path) + + # Using reader, result will be a list of datasets + datasets = read(input_str, validate=True) + + # Accessing the data of the test dataset + df = dataset[0].data + + +Writing data +------------ + +``pysdmx`` allows to return the written data as a string or write it to a file. SDMX-CSV writers only allow one dataset to be written at a time, while SDMX-ML writers allow multiple datasets to be written at once. + +SDMX-CSV 1.0 +^^^^^^^^^^^^ + +`SDMX-CSV 1.0 specification `_ + +.. warning:: + + The SDMX-CSV 1.0 format is deprecated and should not be used for new implementations. + It only allows a dataflow to be represented, which is not enough for most use cases. + +.. autofunction:: pysdmx.io.csv.sdmx10.writer.write + +.. code-block:: python + + from pysdmx.io.csv.sdmx10.writer import write + from pathlib import Path + + # Write to file sample.csv in the same folder as this code + file_path = Path(__file__).parent / "sample.csv" + + # Write the datasets (list of Dataset or PandasDataset) to the file + write(datasets, file_path) + + +SDMX-CSV 2.0 +^^^^^^^^^^^^ + +`SDMX-CSV 2.0 specification `_ + +.. note:: + + The SDMX-CSV 2.0 writer will write the data as the `ordinary case `_. If you need to write data in other cases, you may need to write a custom script. + +.. warning:: + + We use only comma as the delimiter. + +.. autofunction:: pysdmx.io.csv.sdmx20.writer.write + +.. code-block:: python + + from pysdmx.io.csv.sdmx20.writer import write + from pathlib import Path + + # Write to file sample.csv in the same folder as this code + file_path = Path(__file__).parent / "sample.csv" + write(dataset, file_path) + +SDMX-ML 2.1 Data Writers +^^^^^^^^^^^^^^^^^^^^^^^^ + +SDMX-ML 2.1 format is described +`here (in Source Code, check documentation folder) `_ + +SDMX-ML 2.1 format allows to write multiple datasets at once. To use the Series format, you need to pass the dimension at observation dictionary, where the key is the dataset short urn and the value is the dimension id to be observed. + +.. important:: + + For each dataset, if dataset.structure is not a Schema, the writer can only write in the Structure Specific All Dimensions format. + We perform a check to ensure that the dataset has a Schema structure for the remaining formats as we need to know the roles for each component. + This check also ensures that the dataset.structure has at least one dimension and one measure defined. + +.. autofunction:: pysdmx.io.xml.sdmx21.writer.generic.write +.. autofunction:: pysdmx.io.xml.sdmx21.writer.structure_specific.write + +.. code-block:: python + + from pysdmx.io.xml.sdmx21.writer.generic import write as write_generic # For Generic format + from pysdmx.io.xml.sdmx21.writer.structure_specific import write # For StructureSpecific format + from pathlib import Path + + # List of datasets to write + datasets = [dataset1, dataset2] + + # Dimension at observation mapping (do not need to set them all if not needed + dim_mapping = { + "DataStructure=TEST_AGENCY:TEST_ID(1.0)": "TIME_PERIOD" + } + + # Write to file sample.xml in the same folder as this code + file_path = Path(__file__).parent / "sample.xml" + write(datasets, file_path, dimension_at_observation=dim_mapping) # This will write a Dataset in Series and another in AllDimensions format + diff --git a/docs/howto/map.rst b/docs/howto/map.rst index f41e4d51..6ec43e60 100644 --- a/docs/howto/map.rst +++ b/docs/howto/map.rst @@ -26,8 +26,8 @@ Step-by-step Solution --------------------- ``pysdmx`` allows retrieving metadata from an SDMX Registry in either a -synchronous manner (via ``pymedal.fmr.RegistryClient``) or asynchronously -(via ``pymedal.fmr.AsyncRegistryClient``). The choice depends on the use case +synchronous manner (via ``pysdmx.api.fmr.RegistryClient``) or asynchronously +(via ``pysdmx.api.fmr.AsyncRegistryClient``). The choice depends on the use case (and preference), but we tend to use the asynchronous client by default as it is non-blocking. @@ -170,7 +170,7 @@ mappings can be retrieved via the ``component_maps`` property: # target='CONTRACT', # values=[ # ValueMap(source='PROD TYPE', target='_T', valid_from=None, valid_to=None), - # ValueMap(source=re.compile('^([A-Z0-9]+)$'), target='\\1', valid_from=None, valid_to=None) + # ValueMap(source='regex:^([A-Z0-9]+)$', target='\\1', valid_from=None, valid_to=None) # ] # ) diff --git a/docs/howto/sdmx_rest.rst b/docs/howto/sdmx_rest.rst new file mode 100644 index 00000000..8b73b131 --- /dev/null +++ b/docs/howto/sdmx_rest.rst @@ -0,0 +1,116 @@ +.. _sdmx-rest: + +SDMX-REST services +================== + +``pysdmx`` allows **building SDMX-REST queries** and **executing them** +against an SDMX-REST compliant web service. + +For additional information about the SDMX-REST API, please refer to the +`SDMX documentation `_. + +SDMX-REST queries +----------------- + +The SDMX-REST API allows defining queries to retrieve +`data `_, +`structural `_ and +`reference metadata `_, +`schemas `_ +and `data availability `_. + +``pysdmx`` offers **query builders** for these different types of queries, as well as +enumerations for some of the parameters available in the SDMX-REST API. + +For example, the following can be used to retrieve information about a dataflow and +all the artefacts referenced directly or indirectly by this dataflow. + +.. code-block:: python + + from pysdmx.api.qb import ( + StructureDetail, + StructureQuery, + StructureReference, + StructureType, + ) + + query = StructureQuery( + StructureType.DATAFLOW, + "SDMX", + "NAMAIN_IDC_N", + detail=StructureDetail.REFERENCE_PARTIAL, + references=StructureReference.DESCENDANTS, + ) + +SDMX services +------------- + +Now that we have a query, we can execute it against the desired SDMX-REST service, +using the ``RestService`` class. + +The ``RestService`` requires an endpoint to which the query will be sent, +as well as the version of the SDMX-REST API that the endpoint supports. + +.. code-block:: python + + from pysdmx.api.qb import ApiVersion, RestService + + endpoint = "https://registry.sdmx.org/sdmx/v2/" + version = ApiVersion.V2_0_0 + service = RestService(endpoint, version) + resp = service.structure(query) + +In case the query requires features that are not available in the version +of the API supported by the endpoint, an error will be raised. + +Deserializing the response +-------------------------- + +The response of the web service will be returned as a sequence of bytes. +By default, the returned responses will be in the SDMX-JSON format, but +this can be configured when instantiating the ``RestService``. + +You can then process the response using your preferred library for the +requested format, such as, for example, Python ``json`` module, for SDMX-JSON +responses. + +Alternatively, for some messages, ``pysdmx.io.json.sdmxjson2`` deserializers +can be used. This is not well documented yet, as only a subset of messages +is currently supported, but further work will take place in this space. + +The code below shows how to do that, using one of the supported messages. + +.. code-block:: python + + import msgspec + + from pysdmx.api.qb import ( + ApiVersion, + RestService, + StructureQuery, + StructureType, + ) + from pysdmx.io.json.sdmxjson2.messages.code import JsonCodelistMessage + + # Step 1: Build your query + query = StructureQuery(StructureType.CODELIST, "SDMX", "CL_FREQ") + + # Step 2: Execute the query against your desired service + endpoint = "https://registry.sdmx.org/sdmx/v2/" + version = ApiVersion.V2_0_0 + service = RestService(endpoint, version) + resp = service.structure(query) + + # Step 3: Deserialize the response into a domain object + decoder = msgspec.json.Decoder(JsonCodelistMessage) + cl = decoder.decode(resp).to_model() + + # Step 4: Use the object the way you see fit + print(f"There are {len(cl.codes)} codes in the codelist") + + # Example output + # There are 34 codes in the codelist + + +For additional information about the query builders and the SDMX-REST service +class, please refer to the :ref:`API documentation`. \ No newline at end of file diff --git a/docs/howto/structure_db.rst b/docs/howto/structure_db.rst index c485f195..931793b7 100644 --- a/docs/howto/structure_db.rst +++ b/docs/howto/structure_db.rst @@ -30,8 +30,8 @@ Step-by-step Solution --------------------- ``pysdmx`` allows retrieving metadata from an SDMX Registry either -synchronously (via ``pymedal.fmr.RegistryClient``) or asynchronously -(via ``pymedal.fmr.AsyncRegistryClient``). The choice depends on the use case +synchronously (via ``pysdmx.api.fmr.RegistryClient``) or asynchronously +(via ``pysdmx.api.fmr.AsyncRegistryClient``). The choice depends on the use case and preference, but we use the asynchronous client by default as it is non-blocking. diff --git a/docs/howto/structure_fs.rst b/docs/howto/structure_fs.rst index d6a12ed7..28043344 100644 --- a/docs/howto/structure_fs.rst +++ b/docs/howto/structure_fs.rst @@ -42,8 +42,8 @@ Step-by-step Solution --------------------- ``pysdmx`` allows retrieving metadata from an SDMX Registry either -synchronously (via ``pymedal.fmr.RegistryClient``) or asynchronously -(via ``pymedal.fmr.AsyncRegistryClient``). +synchronously (via ``pysdmx.api.fmr.RegistryClient``) or asynchronously +(via ``pysdmx.api.fmr.AsyncRegistryClient``). Connecting to a Registry ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -67,7 +67,7 @@ in a **category scheme** and related **categorizations**. .. code-block:: python - cs = await client.get_categories("MY_AGENCY", "MY_DATAFLOWS") + cs = await client.get_categories("MY_AGENCY", "MY_CATEGORY_SCHEME") Now we iterate over the categories (and their sub-categories) to find the dataflows attached to them. Use the ``dataflows`` property to get a set diff --git a/docs/howto/validate.rst b/docs/howto/validate.rst index 5f5be7bb..d94a8eb9 100644 --- a/docs/howto/validate.rst +++ b/docs/howto/validate.rst @@ -66,8 +66,8 @@ Step-by-step solution --------------------- ``pysdmx`` allows retrieving metadata from an SDMX Registry in either a -synchronous (via ``pymedal.fmr.RegistryClient``) or asynchronous fashion -(via ``pymedal.fmr.AsyncRegistryClient``). Which one to use depends on the +synchronous (via ``pysdmx.api.fmr.RegistryClient``) or asynchronous fashion +(via ``pysdmx.api.fmr.AsyncRegistryClient``). Which one to use depends on the use case (and taste), but we tend to use the asynchronous client by default, as it is non-blocking. diff --git a/docs/howto/vtl.rst b/docs/howto/vtl.rst new file mode 100644 index 00000000..1bb9463a --- /dev/null +++ b/docs/howto/vtl.rst @@ -0,0 +1,179 @@ +.. _vtl: + +Using VTL for Validation +^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. important:: + A seamless integration of ``pysdmx`` and ``vtlengine`` will modify this + tutorial. The current version is a placeholder for the upcoming changes + showing the use of both libraries separated. + For the latest updates on VTL usage, please check + `issue #158 `_. + +In this tutorial, we shall examine the utilization of ``pysdmx`` +for reading **data** and **metadata** to generate and operate on +datapoints using ``vtlengine``. + +Numerous types of operations can be performed; however, this +tutorial will focus exclusively on the fundamental ones. + +.. contents:: + :local: + :depth: 2 + +Required Metadata +----------------- + +For the present scenario, the required metadata is contingent +upon the desired operations. For reference please check +`sdmx to vtl documentation `_ + +Step-by-Step Solution +--------------------- + +``pysdmx`` facilitates the reading of data and metadata from an SDMX +file or service. For the purpose of this tutorial, we shall employ the XML files +``structures.xml`` (data structure) and ``data.csv`` (data). + +Reading the Data +~~~~~~~~~~~~~~~~ + +The initial step involves reading the data structure and data from the +SDMX files. The following code snippet demonstrates the process: + +.. code-block:: python + + from pathlib import Path + + # Path to the structures file in SDMX-ML 2.1 (same directory as this script) + path_to_structures = Path(__file__).parent / "structures.xml" + + # Path to the data file + path_to_data = Path(__file__).parent / "data.csv" + + # Get Structures SDMX Message + structures_msg = read_sdmx(path_to_structures) + + # Get Data message + data_msg = read_sdmx(path_to_data) + + +Extracting the Data and Data Structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +After reading the data and metadata, the next step is to extract the +data and data structure from the SDMX messages. The following code snippet demonstrates +the process using the Short URN ``SDMX_TYPE=AGENCY_ID:ID(VERSION)`` + +.. code-block:: python + + # Extract the data structure and data for DS_1 + data_structure_1 = structures_msg.get_data_structure_definition("DataStructure=MD:DS_1(1.0)") + data_1 = data_msg.get_data("DataStructure=MD:DS_1(1.0)") + + # Extract the data structure and data for DS_2 + data_structure_2 = structures_msg.get_data_structure_definition("DataStructure=BIS:DS_2(1.0)") + data_2 = data_msg.get_data("DataStructure=BIS:DS_2(1.0)") + + + +To construct the datapoint, the metadata must be converted to the VTL +format using the ``to_vtl_json`` upcoming **DataStructureDefinition** method: + +.. code-block:: python + + from pysdmx.model.dataflow import Component, DataStructureDefinition, Role + from pysdmx.model.__utils import VTL_DTYPES_MAPPING, VTL_ROLE_MAPPING + + def to_vtl_json( + dsd: DataStructureDefinition, path: Optional[str] = None + ) -> Optional[Dict[str, Any]]: + """Formats the DataStructureDefinition as a VTL DataStructure.""" + + dataset_name = dsd.id + components = [] + NAME = "name" + ROLE = "role" + TYPE = "type" + NULLABLE = "nullable" + + _components: List[Component] = [] + _components.extend(dsd.components.dimensions) + _components.extend(dsd.components.measures) + _components.extend(dsd.components.attributes) + + for c in _components: + _type = VTL_DTYPES_MAPPING[c.dtype] + _nullability = c.role != Role.DIMENSION + _role = VTL_ROLE_MAPPING[c.role] + + component = { + NAME: c.id, + ROLE: _role, + TYPE: _type, + NULLABLE: _nullability, + } + + components.append(component) + + result = { + "datasets": [{"name": dataset_name, "DataStructure": components}] + } + if path is not None: + with open(path, "w") as fp: + json.dump(result, fp) + return None + + return result + + vtl_data_structure_1 = to_vtl_json(data_structure_1) + vtl_data_structure_2 = to_vtl_json(data_structure_2) + +Preparing the Dictionary +~~~~~~~~~~~~~~~~~~~~~~~~ + +To create the datapoint, a dictionary containing the required data and +structures must first be prepared. The arguments `data_structures` and +`datapoints` support the following types: + +- `Dict[str, Any]` +- `Path` +- `List[Union[Dict[str, Any], Path]]` + +The example below uses dictionaries for simplicity: + +.. code-block:: python + + vtl_data_structures = { + "DS_1": vtl_data_structure_1, + "DS_2": vtl_data_structure_2, + } + + datapoints = { + "DS_1": data_1, + "DS_2": data_2, + } + +Defining the Expression and Execution +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Next, define the expression to be executed and utilize the ``run`` +method of ``vtlengine`` to perform the operation. The following example +demonstrates the addition of the datapoints `DS_1` and `DS_2`, with the +result assigned to a new datapoint `DS_r`: + +For reference please check +`vtlengine run documentation `_ + +.. code-block:: python + + import vtlengine + + expression = "DS_r <- DS_1 + DS_2;" + + run_result = run( + script=expression, + data_structures=vtl_data_structures, + datapoints=datapoints, + return_only_persistent=True, + ) diff --git a/docs/index.rst b/docs/index.rst index cca977fc..ce7948f7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,7 +13,7 @@ Your opinionated Python SDMX library. .. toctree:: :maxdepth: 1 - :caption: How to + :caption: Metadata-driven processes howto/structure_fs howto/structure_db @@ -21,6 +21,19 @@ Your opinionated Python SDMX library. howto/map howto/config +.. toctree:: + :maxdepth: 1 + :caption: Data discovery + + howto/sdmx_rest + +.. toctree:: + :maxdepth: 2 + :caption: Handling SDMX datasets + + howto/data_rw + howto/vtl + .. toctree:: :maxdepth: 1 @@ -28,6 +41,7 @@ Your opinionated Python SDMX library. api/model api/fmr + api/rest api/helper Indices and tables diff --git a/docs/release.rst b/docs/release.rst index 149617a8..e4701170 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -1,23 +1,20 @@ Release notes ============= -1.0.0-beta-10 ------------- +1.0.0 (2025-01-20) +------------------ Added ^^^^^ -- Schema generation supports Hierarchical Associations - referencing Provision Agreements - - -1.0.0-beta-1 ------------- - -Added -^^^^^ - -- Core SDMX model classes +- Core domain classes for the SDMX information model. - Sync and async clients to retrieve metadata - from an SDMX Registry or SDMX-REST service -- Helper functions to handle SDMX URNs + from an SDMX Registry or SDMX-REST service, and use them to + drive statistical business processes. +- Data readers and writers for SDMX-ML 2.1, SDMX-CSV 2.0 and + SDMX-CSV 1.0 +- Structures readers and writers SDMX-ML 2.1, SDMX-JSON 2.0 and + Fusion-JSON +- SDMX-REST query builders and a service client to execute + queries against SDMX-REST services. +- Utility functions to handle SDMX URNs. diff --git a/docs/start.rst b/docs/start.rst index f04a9d10..9ccf2c37 100644 --- a/docs/start.rst +++ b/docs/start.rst @@ -56,13 +56,10 @@ However, metadata can do so much more than that, i.e. they can be "active" and - :ref:`config` ``pysdmx`` supports retrieving metadata from an SDMX Registry or any service -compliant with the SDMX-REST 2.0.0 API. +compliant with the SDMX-REST 2.0.0 (or above) API. -Install ``pysdmx`` with the ``fmr`` extra to enable this functionality: - -.. code:: bash - - pip install pysdmx[fmr] +These classes are part of the core functionality and don't require additional +installations. Data discovery and data retrieval ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -74,6 +71,15 @@ allow: - Discovering data available in these services. - Retrieving data from these services. +Although this functionality is still under development, it is already +possible to :ref:`build SDMX-REST queries and execute them against a +web service`. + +Reading and writing SDMX datasets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Head to the :ref:`how-to guide` to learn how to read and write SDMX datasets. + How can I get it? ----------------- @@ -89,9 +95,31 @@ For the core functionality, use: Some use cases require additional dependencies, which can be installed using `extras `_. For example, -to retrieve metadata from an SDMX Registry, install the ``fmr`` -extra: +to parse SDMX-ML messages, install the ``xml`` extra: .. code:: bash - pip install pysdmx[fmr] + pip install pysdmx[xml] + +To install all extras, use: + +.. code:: bash + + pip install pysdmx[all] + +The following extras are available: + +.. list-table:: Available extras + :widths: 25 50 + :header-rows: 1 + + * - Name + - Purpose + * - ``xml`` + - Read and Write SDMX-ML messages, on pysdmx.io.xml. + * - ``data`` + - Handle SDMX datasets as Pandas data frames, it is required also for reading and writing SDMX-CSV files as of today. + * - ``dc`` + - Only required to use the pysdmx.api.dc module when generating queries based on dates. + * - ``all`` + - Install all extras. \ No newline at end of file diff --git a/src/pysdmx/io/csv/sdmx10/reader/__init__.py b/src/pysdmx/io/csv/sdmx10/reader/__init__.py index 8443c80c..e31b1889 100644 --- a/src/pysdmx/io/csv/sdmx10/reader/__init__.py +++ b/src/pysdmx/io/csv/sdmx10/reader/__init__.py @@ -34,13 +34,13 @@ def __generate_dataset_from_sdmx_csv(data: pd.DataFrame) -> PandasDataset: def read(input_str: str) -> Sequence[PandasDataset]: - """Reads csv file and returns a payload dictionary. + """Reads csv data and returns a sequence of Datasets. Args: - input_str: Path to file, str. + input_str: str. Returns: - payload: dict. + A Sequence of Pandas Datasets. Raises: Invalid: If it is an invalid CSV file. diff --git a/src/pysdmx/io/csv/sdmx20/reader/__init__.py b/src/pysdmx/io/csv/sdmx20/reader/__init__.py index 00fb63f8..953abae6 100644 --- a/src/pysdmx/io/csv/sdmx20/reader/__init__.py +++ b/src/pysdmx/io/csv/sdmx20/reader/__init__.py @@ -79,13 +79,13 @@ def __generate_dataset_from_sdmx_csv(data: pd.DataFrame) -> PandasDataset: def read(input_str: str) -> Sequence[PandasDataset]: - """Reads csv file and returns a payload dictionary. + """Reads csv data and returns a sequence of Datasets. Args: - input_str: Path to file, str. + input_str: str. Returns: - payload: dict. + A Sequence of Pandas Datasets. Raises: Invalid: If it is an invalid CSV file. diff --git a/src/pysdmx/io/pd.py b/src/pysdmx/io/pd.py index ccf384f2..ca25bd78 100644 --- a/src/pysdmx/io/pd.py +++ b/src/pysdmx/io/pd.py @@ -16,11 +16,11 @@ class PandasDataset(Dataset, frozen=False, kw_only=True): withhold data. Args: - attributes: Attributes at dataset level- - data: Dataframe. + attributes: Attributes at dataset level. + data: Pandas Dataframe. structure: - URN or Schema related to this Dataset - (DSD, Dataflow, ProvisionAgreement) + URN or Schema related to this Dataset + (DSD, Dataflow, ProvisionAgreement) """ data: pd.DataFrame diff --git a/tests/io/csv/sdmx10/writer/test_writer_v1.py b/tests/io/csv/sdmx10/writer/test_writer_v1.py index 99195cd1..c59a8082 100644 --- a/tests/io/csv/sdmx10/writer/test_writer_v1.py +++ b/tests/io/csv/sdmx10/writer/test_writer_v1.py @@ -44,6 +44,25 @@ def test_to_sdmx_csv_writing(data_path, data_path_reference): ) +def test_to_sdmx_csv_writing_to_file(data_path, data_path_reference, tmpdir): + urn = "urn:sdmx:org.sdmx.infomodel.datastructure.DataFlow=MD:DS1(1.0)" + + dataset = PandasDataset( + attributes={}, + data=pd.read_json(data_path, orient="records"), + structure=urn, + ) + dataset.data = dataset.data.astype("str") + write([dataset], output_path=tmpdir / "output.csv") + result_df = pd.read_csv(tmpdir / "output.csv").astype(str) + reference_df = pd.read_csv(data_path_reference).astype(str) + pd.testing.assert_frame_equal( + result_df.fillna("").replace("nan", ""), + reference_df.replace("nan", ""), + check_like=True, + ) + + def test_writer_attached_attrs(data_path, data_path_reference_atch_atts): urn = "urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=MD:DS1(1.0)" dataset = PandasDataset( diff --git a/tests/io/csv/sdmx20/writer/test_writer_v2.py b/tests/io/csv/sdmx20/writer/test_writer_v2.py index 86b5d669..83e60a15 100644 --- a/tests/io/csv/sdmx20/writer/test_writer_v2.py +++ b/tests/io/csv/sdmx20/writer/test_writer_v2.py @@ -54,6 +54,27 @@ def test_to_sdmx_csv_writing(data_path, data_path_reference): ) +def test_to_sdmx_csv_writing_to_file(data_path, data_path_reference, tmpdir): + urn = ( + "urn:sdmx:org.sdmx.infomodel.registry." + "ProvisionAgreement=MD:PA1(1.0)" + ) + dataset = PandasDataset( + attributes={}, + data=pd.read_json(data_path, orient="records"), + structure=urn, + ) + dataset.data = dataset.data.astype("str") + write([dataset], output_path=tmpdir / "output.csv") + result_df = pd.read_csv(tmpdir / "output.csv").astype(str) + reference_df = pd.read_csv(data_path_reference).astype(str) + pd.testing.assert_frame_equal( + result_df.fillna("").replace("nan", ""), + reference_df.replace("nan", ""), + check_like=True, + ) + + def test_writer_attached_attrs(data_path, data_path_reference_attch_atts): dataset = PandasDataset( attributes={"DECIMALS": 3},