From 346c37e91d4912e3b7c5323fa26a5ad077d8b6a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 13 Dec 2024 14:43:46 +0100 Subject: [PATCH] wip: cleanup --- docs/source/backends/json.rst | 7 +- docs/source/details/backendconfig.rst | 6 +- include/openPMD/Dataset.hpp | 16 ++ include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 44 ++-- src/IO/JSON/JSONIOHandlerImpl.cpp | 200 +++++++++--------- 5 files changed, 154 insertions(+), 119 deletions(-) diff --git a/docs/source/backends/json.rst b/docs/source/backends/json.rst index bba6ca5df0..9b618c9e03 100644 --- a/docs/source/backends/json.rst +++ b/docs/source/backends/json.rst @@ -54,9 +54,10 @@ Stored as an actual dataset, an **openPMD dataset** is a JSON object with three Stored as a **dataset template**, an openPMD dataset is represented by three JSON keys: -* ``datatype`` (required): As above. -* ``extent`` (required): A list of integers, describing the extent of the dataset. -* ``attributes``: As above. + * ``datatype`` (required): As above. + * ``extent`` (required): A list of integers, describing the extent of the dataset. + This replaces the ``data`` key from the non-template representation. + * ``attributes``: As above. This mode stores only the dataset metadata. Chunk load/store operations are ignored. diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index cf78d9cdea..8b60d21a90 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -243,11 +243,11 @@ A full configuration of the JSON backend: The TOML backend is configured analogously, replacing the ``"json"`` key with ``"toml"``. -All keys found under ``hdf5.dataset`` are applicable globally as well as per dataset. +All keys found under ``json.dataset`` are applicable globally as well as per dataset. Explanation of the single keys: * ``json.dataset.mode`` / ``toml.dataset.mode``: One of ``"dataset"`` (default) or ``"template"``. In "dataset" mode, the dataset will be written as an n-dimensional (recursive) array, padded with nulls (JSON) or zeroes (TOML) for missing values. - In "template" mode, only the dataset metadata (type, extent and attributes) are stored and no chunks can be written or read. -* ``json.attribute.mode`` / ``toml.attribute.mode``: One of ``"long"`` (default in openPMD 1.*) or ``"short"`` (default in openPMD 2.*). + In "template" mode, only the dataset metadata (type, extent and attributes) are stored and no chunks can be written or read (i.e. write/read operations will be skipped). +* ``json.attribute.mode`` / ``toml.attribute.mode``: One of ``"long"`` (default in openPMD 1.*) or ``"short"`` (default in openPMD 2.* and generally in TOML). The long format explicitly encodes the attribute type in the dataset on disk, the short format only writes the actual attribute as a JSON/TOML value, requiring readers to recover the type. diff --git a/include/openPMD/Dataset.hpp b/include/openPMD/Dataset.hpp index ad1125d9d3..d79380105a 100644 --- a/include/openPMD/Dataset.hpp +++ b/include/openPMD/Dataset.hpp @@ -41,7 +41,23 @@ class Dataset public: enum : std::uint64_t { + /** + * Setting one dimension of the extent as JOINED_DIMENSION means that + * the extent along that dimension will be defined by the sum of all + * parallel processes' contributions. + * Only one dimension can be joined. For store operations, the offset + * should be an empty array and the extent should give the actual + * extent of the chunk (i.e. the number of joined elements along the + * joined dimension, equal to the global extent in all other + * dimensions). For more details, refer to + * docs/source/usage/workflow.rst. + */ JOINED_DIMENSION = std::numeric_limits::max(), + /** + * Some backends (i.e. JSON and TOML in template mode) support the + * creation of dataset with undefined datatype and extent. + * The extent should be given as {UNDEFINED_EXTENT} for that. + */ UNDEFINED_EXTENT = std::numeric_limits::max() - 1 }; diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index fc369047ec..38966e3b82 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -267,6 +267,10 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl */ FileFormat m_fileFormat{}; + /* + * Under which key do we find the backend configuration? + * -> "json" for the JSON backend, "toml" for the TOML backend. + */ std::string backendConfigKey() const; /* @@ -278,6 +282,10 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string m_originalExtension; + /* + * Was the config value explicitly user-chosen, or are we still working with + * defaults? + */ enum class SpecificationVia { DefaultValue, @@ -288,19 +296,21 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // Dataset IO mode // ///////////////////// - enum class IOMode + enum class DatasetMode { Dataset, Template }; - IOMode m_mode = IOMode::Dataset; - SpecificationVia m_IOModeSpecificationVia = SpecificationVia::DefaultValue; - bool m_printedSkippedWriteWarningAlready = false; + // IOMode m_mode{}; + // SpecificationVia m_IOModeSpecificationVia = + // SpecificationVia::DefaultValue; bool m_printedSkippedWriteWarningAlready + // = false; - struct DatasetMode + struct DatasetMode_s { - IOMode m_IOMode; + // Initialized in init() + DatasetMode m_mode{}; SpecificationVia m_specificationVia; bool m_skipWarnings; @@ -308,10 +318,11 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl operator std::tuple() { return std::tuple{ - m_IOMode, m_specificationVia, m_skipWarnings}; + m_mode, m_specificationVia, m_skipWarnings}; } }; - DatasetMode retrieveDatasetMode(openPMD::json::TracingJSON &config) const; + DatasetMode_s m_datasetMode; + DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config) const; /////////////////////// // Attribute IO mode // @@ -323,11 +334,16 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl Long }; - AttributeMode m_attributeMode = AttributeMode::Long; - SpecificationVia m_attributeModeSpecificationVia = - SpecificationVia::DefaultValue; + struct AttributeMode_s + { + // Will be modified in init() based on the openPMD version and the + // active file format (JSON/TOML) + AttributeMode m_mode{}; + SpecificationVia m_specificationVia = SpecificationVia::DefaultValue; + }; + AttributeMode_s m_attributeMode; - std::pair + AttributeMode_s retrieveAttributeMode(openPMD::json::TracingJSON &config) const; // HELPER FUNCTIONS @@ -376,7 +392,7 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // essentially: m_i = \prod_{j=0}^{i-1} extent_j static Extent getMultiplicators(Extent const &extent); - static std::pair getExtent(nlohmann::json &j); + static std::pair getExtent(nlohmann::json &j); // remove single '/' in the beginning and end of a string static std::string removeSlashes(std::string); @@ -434,7 +450,7 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // check whether the json reference contains a valid dataset template - IOMode verifyDataset(Param const ¶meters, nlohmann::json &); + DatasetMode verifyDataset(Param const ¶meters, nlohmann::json &); static nlohmann::json platformSpecifics(); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 3336bedc5e..9b671c097e 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -69,7 +69,7 @@ namespace JSONDefaults { using const_str = char const *const; constexpr const_str openpmd_internal = "__openPMD_internal"; - constexpr const_str IOMode = "dataset_mode"; + constexpr const_str DatasetMode = "dataset_mode"; constexpr const_str AttributeMode = "attribute_mode"; } // namespace JSONDefaults @@ -103,7 +103,11 @@ namespace } #endif - static constexpr char const *errorMsg = "JSON default value"; + template + static nlohmann::json call() + { + return 0; + } }; /* @@ -250,11 +254,13 @@ namespace } // namespace auto JSONIOHandlerImpl::retrieveDatasetMode( - openPMD::json::TracingJSON &config) const -> DatasetMode + openPMD::json::TracingJSON &config) const -> DatasetMode_s { - IOMode ioMode = m_mode; - SpecificationVia specificationVia = SpecificationVia::DefaultValue; - bool skipWarnings = false; + // start with / copy from current config + auto res = m_datasetMode; + DatasetMode &ioMode = res.m_mode; + SpecificationVia &specificationVia = res.m_specificationVia; + bool &skipWarnings = res.m_skipWarnings; if (auto [configLocation, maybeConfig] = getBackendConfig(config); maybeConfig.has_value()) { @@ -276,17 +282,17 @@ auto JSONIOHandlerImpl::retrieveDatasetMode( auto mode = modeOption.value(); if (mode == "dataset") { - ioMode = IOMode::Dataset; + ioMode = DatasetMode::Dataset; specificationVia = SpecificationVia::Manually; } else if (mode == "template") { - ioMode = IOMode::Template; + ioMode = DatasetMode::Template; specificationVia = SpecificationVia::Manually; } else if (mode == "template_no_warn") { - ioMode = IOMode::Template; + ioMode = DatasetMode::Template; specificationVia = SpecificationVia::Manually; skipWarnings = true; } @@ -300,15 +306,16 @@ auto JSONIOHandlerImpl::retrieveDatasetMode( } } } - return DatasetMode{ioMode, specificationVia, skipWarnings}; + return res; } auto JSONIOHandlerImpl::retrieveAttributeMode( - openPMD::json::TracingJSON &config) const - -> std::pair + openPMD::json::TracingJSON &config) const -> AttributeMode_s { - AttributeMode res = m_attributeMode; - SpecificationVia res_2 = SpecificationVia::DefaultValue; + // start with / copy from current config + auto res = m_attributeMode; + AttributeMode &mode = res.m_mode; + SpecificationVia &specificationVia = res.m_specificationVia; if (auto [configLocation, maybeConfig] = getBackendConfig(config); maybeConfig.has_value()) { @@ -327,28 +334,28 @@ auto JSONIOHandlerImpl::retrieveAttributeMode( "Invalid value of non-string type (accepted values are " "'dataset' and 'template'."); } - auto mode = modeOption.value(); - if (mode == "short") + auto modeCfg = modeOption.value(); + if (modeCfg == "short") { - res = AttributeMode::Short; - res_2 = SpecificationVia::Manually; + mode = AttributeMode::Short; + specificationVia = SpecificationVia::Manually; } - else if (mode == "long") + else if (modeCfg == "long") { - res = AttributeMode::Long; - res_2 = SpecificationVia::Manually; + mode = AttributeMode::Long; + specificationVia = SpecificationVia::Manually; } else { throw error::BackendConfigSchema( {configLocation, "attribute", "mode"}, - "Invalid value: '" + mode + + "Invalid value: '" + modeCfg + "' (accepted values are 'short' and 'long'."); } } } } - return std::make_pair(res, res_2); + return res; } std::string JSONIOHandlerImpl::backendConfigKey() const @@ -412,21 +419,21 @@ void JSONIOHandlerImpl::init(openPMD::json::TracingJSON config) switch (m_fileFormat) { case FileFormat::Json: - // @todo take the switch to openPMD 2.0 as a chance to switch to - // short attribute mode as a default here - m_attributeMode = AttributeMode::Long; - m_mode = IOMode::Dataset; + m_attributeMode.m_mode = + m_handler->m_standard >= OpenpmdStandard::v_2_0_0 + ? AttributeMode::Short + : AttributeMode::Long; + m_datasetMode.m_mode = DatasetMode::Dataset; break; case FileFormat::Toml: - m_attributeMode = AttributeMode::Short; - m_mode = IOMode::Template; + m_attributeMode.m_mode = AttributeMode::Short; + m_datasetMode.m_mode = DatasetMode::Dataset; break; } - std::tie( - m_mode, m_IOModeSpecificationVia, m_printedSkippedWriteWarningAlready) = - retrieveDatasetMode(config); - std::tie(m_attributeMode, m_attributeModeSpecificationVia) = - retrieveAttributeMode(config); + + // now modify according to config + m_datasetMode = retrieveDatasetMode(config); + m_attributeMode = retrieveAttributeMode(config); if (auto [_, backendConfig] = getBackendConfig(config); backendConfig.has_value()) @@ -461,22 +468,6 @@ void JSONIOHandlerImpl::createFile( access::write(m_handler->m_backendAccess), "[JSON] Creating a file in read-only mode is not possible."); - if (m_attributeModeSpecificationVia == SpecificationVia::DefaultValue) - { - switch (m_fileFormat) - { - - case FileFormat::Json: - m_attributeMode = m_handler->m_standard >= OpenpmdStandard::v_2_0_0 - ? AttributeMode::Short - : AttributeMode::Long; - break; - case FileFormat::Toml: - m_attributeMode = AttributeMode::Short; - break; - } - } - if (!writable->written) { std::string name = parameters.name + m_originalExtension; @@ -594,13 +585,14 @@ void JSONIOHandlerImpl::createDataset( parameter.options, /* considerFiles = */ false); // Retrieves mode from dataset-specific configuration, falls back to global // value if not defined + DatasetMode_s dm; auto [localMode, _, skipWarnings] = retrieveDatasetMode(config); (void)_; // No use in introducing logic to skip warnings only for one particular // dataset. If warnings are skipped, then they are skipped consistently. // Use |= since `false` is the default value and we don't wish to reset // the flag. - m_printedSkippedWriteWarningAlready |= skipWarnings; + m_datasetMode.m_skipWarnings |= skipWarnings; parameter.warnUnusedParameters( config, @@ -628,7 +620,7 @@ void JSONIOHandlerImpl::createDataset( switch (localMode) { - case IOMode::Dataset: { + case DatasetMode::Dataset: { auto extent = parameter.extent; switch (parameter.dtype) { @@ -641,14 +633,22 @@ void JSONIOHandlerImpl::createDataset( default: break; } - // TOML does not support nulls, so initialize with zero - dset["data"] = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional{} - : parameter.dtype); + if (parameter.extent.size() == 1 && + parameter.extent[0] == Dataset::UNDEFINED_EXTENT) + { + dset["data"] = std::vector(0); + } + else + { + // TOML does not support nulls, so initialize with zero + dset["data"] = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json ? std::optional{} + : parameter.dtype); + } break; } - case IOMode::Template: + case DatasetMode::Template: if (parameter.extent != Extent{0} && parameter.extent[0] != Dataset::UNDEFINED_EXTENT) { @@ -700,7 +700,7 @@ void JSONIOHandlerImpl::extendDataset( refreshFileFromParent(writable); auto &j = obtainJsonContents(writable); - IOMode localIOMode; + DatasetMode localIOMode; try { Extent datasetExtent; @@ -724,7 +724,7 @@ void JSONIOHandlerImpl::extendDataset( switch (localIOMode) { - case IOMode::Dataset: { + case DatasetMode::Dataset: { auto extent = parameters.extent; auto datatype = stringToDatatype(j["datatype"].get()); switch (datatype) @@ -749,7 +749,7 @@ void JSONIOHandlerImpl::extendDataset( j["data"] = newData; } break; - case IOMode::Template: { + case DatasetMode::Template: { j["extent"] = parameters.extent; } break; @@ -1235,16 +1235,16 @@ void JSONIOHandlerImpl::writeDataset( switch (verifyDataset(parameters, j)) { - case IOMode::Dataset: + case DatasetMode::Dataset: break; - case IOMode::Template: - if (!m_printedSkippedWriteWarningAlready) + case DatasetMode::Template: + if (!m_datasetMode.m_skipWarnings) { std::cerr << "[JSON/TOML backend: Warning] Trying to write data to a " "template dataset. Will skip." << std::endl; - m_printedSkippedWriteWarningAlready = true; + m_datasetMode.m_skipWarnings = true; } return; } @@ -1283,7 +1283,7 @@ void JSONIOHandlerImpl::writeAttribute( } nlohmann::json value; switchType(parameter.dtype, value, parameter.resource); - switch (m_attributeMode) + switch (m_attributeMode.m_mode) { case AttributeMode::Long: (*jsonVal)[filePosition->id]["attributes"][parameter.name] = { @@ -1326,18 +1326,18 @@ void JSONIOHandlerImpl::readDataset( refreshFileFromParent(writable); setAndGetFilePosition(writable); auto &j = obtainJsonContents(writable); - IOMode localMode = verifyDataset(parameters, j); + DatasetMode localMode = verifyDataset(parameters, j); switch (localMode) { - case IOMode::Template: + case DatasetMode::Template: std::cerr << "[Warning] Cannot read chunks in Template mode of JSON " "backend. Will fill with zeroes instead." << std::endl; switchNonVectorType( parameters.dtype, parameters.data.get(), parameters.extent); return; - case IOMode::Dataset: + case DatasetMode::Dataset: try { switchType(parameters.dtype, j["data"], parameters); @@ -1803,13 +1803,13 @@ Extent JSONIOHandlerImpl::getMultiplicators(Extent const &extent) } auto JSONIOHandlerImpl::getExtent(nlohmann::json &j) - -> std::pair + -> std::pair { Extent res; - IOMode ioMode; + DatasetMode ioMode; if (j.contains("data")) { - ioMode = IOMode::Dataset; + ioMode = DatasetMode::Dataset; nlohmann::json *ptr = &j["data"]; while (ptr->is_array()) { @@ -1831,12 +1831,12 @@ auto JSONIOHandlerImpl::getExtent(nlohmann::json &j) } else if (j.contains("extent")) { - ioMode = IOMode::Template; + ioMode = DatasetMode::Template; res = j["extent"].get(); } else { - ioMode = IOMode::Template; + ioMode = DatasetMode::Template; res = {0}; } return std::make_pair(std::move(res), ioMode); @@ -1981,38 +1981,40 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) auto const &openpmd_internal = res->at(JSONDefaults::openpmd_internal); // Init dataset mode according to file's default - if (m_IOModeSpecificationVia == SpecificationVia::DefaultValue && - openpmd_internal.contains(JSONDefaults::IOMode)) + if (m_datasetMode.m_specificationVia == + SpecificationVia::DefaultValue && + openpmd_internal.contains(JSONDefaults::DatasetMode)) { auto modeOption = openPMD::json::asLowerCaseStringDynamic( - openpmd_internal.at(JSONDefaults::IOMode)); + openpmd_internal.at(JSONDefaults::DatasetMode)); if (!modeOption.has_value()) { std::cerr << "[JSON/TOML backend] Warning: Invalid value of " "non-string type at internal meta table for entry '" - << JSONDefaults::IOMode << "'. Will ignore and continue." - << std::endl; + << JSONDefaults::DatasetMode + << "'. Will ignore and continue.\n"; } else if (modeOption.value() == "dataset") { - m_mode = IOMode::Dataset; + m_datasetMode.m_mode = DatasetMode::Dataset; } else if (modeOption.value() == "template") { - m_mode = IOMode::Template; + m_datasetMode.m_mode = DatasetMode::Template; } else { std::cerr << "[JSON/TOML backend] Warning: Invalid value '" << modeOption.value() << "' at internal meta table for entry '" - << JSONDefaults::IOMode - << "'. Will ignore and continue." << std::endl; + << JSONDefaults::DatasetMode + << "'. Will ignore and continue." << '\n'; } } - if (m_attributeModeSpecificationVia == SpecificationVia::DefaultValue && + if (m_attributeMode.m_specificationVia == + SpecificationVia::DefaultValue && openpmd_internal.contains(JSONDefaults::AttributeMode)) { auto modeOption = openPMD::json::asLowerCaseStringDynamic( @@ -2023,23 +2025,23 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) << "[JSON/TOML backend] Warning: Invalid value of " "non-string type at internal meta table for entry '" << JSONDefaults::AttributeMode - << "'. Will ignore and continue." << std::endl; + << "'. Will ignore and continue." << '\n'; } else if (modeOption.value() == "long") { - m_attributeMode = AttributeMode::Long; + m_attributeMode.m_mode = AttributeMode::Long; } else if (modeOption.value() == "short") { - m_attributeMode = AttributeMode::Short; + m_attributeMode.m_mode = AttributeMode::Short; } else { std::cerr << "[JSON/TOML backend] Warning: Invalid value '" << modeOption.value() << "' at internal meta table for entry '" - << JSONDefaults::IOMode - << "'. Will ignore and continue." << std::endl; + << JSONDefaults::DatasetMode + << "'. Will ignore and continue." << '\n'; } } } @@ -2068,20 +2070,20 @@ auto JSONIOHandlerImpl::putJsonContents( return it; } - switch (m_mode) + switch (m_datasetMode.m_mode) { - case IOMode::Dataset: + case DatasetMode::Dataset: (*it->second)["platform_byte_widths"] = platformSpecifics(); - (*it->second)[JSONDefaults::openpmd_internal][JSONDefaults::IOMode] = - "dataset"; + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "dataset"; break; - case IOMode::Template: - (*it->second)[JSONDefaults::openpmd_internal][JSONDefaults::IOMode] = - "template"; + case DatasetMode::Template: + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "template"; break; } - switch (m_attributeMode) + switch (m_attributeMode.m_mode) { case AttributeMode::Short: (*it->second)[JSONDefaults::openpmd_internal] @@ -2314,13 +2316,13 @@ bool JSONIOHandlerImpl::isGroup(nlohmann::json::const_iterator const &it) template auto JSONIOHandlerImpl::verifyDataset( - Param const ¶meters, nlohmann::json &j) -> IOMode + Param const ¶meters, nlohmann::json &j) -> DatasetMode { VERIFY_ALWAYS( isDataset(j), "[JSON] Specified dataset does not exist or is not a dataset."); - IOMode res; + DatasetMode res; try { Extent datasetExtent;