diff --git a/pyproject.toml b/pyproject.toml index ac04ed75..c7ee5a4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ [project.optional-dependencies] dev = [ - "aind-metadata-mapper[all] ; python_version >= '3.9'", + "aind-metadata-mapper[all]", "black", "coverage", "flake8", @@ -48,19 +48,19 @@ bergamo = [ ] bruker = [ - "bruker2nifti==1.0.4" + "bruker2nifti==1.0.4", ] mesoscope = [ "aind-metadata-mapper[bergamo]", "pillow", - "tifffile==2024.2.12", + "tifffile==2024.2.12 ; python_version >= '3.9'", ] openephys = [ "h5py", "np_session", - "npc_ephys", + "npc_ephys ; python_version >= '3.9'", "scipy", "pandas", "numpy", @@ -98,7 +98,7 @@ exclude = ''' ''' [tool.coverage.run] -omit = ["*__init__*"] +omit = ["*__init__*", "tests/integration/*"] source = ["aind_metadata_mapper", "tests"] [tool.coverage.report] diff --git a/src/aind_metadata_mapper/__init__.py b/src/aind_metadata_mapper/__init__.py index b335a61e..89fa2b18 100644 --- a/src/aind_metadata_mapper/__init__.py +++ b/src/aind_metadata_mapper/__init__.py @@ -1,3 +1,3 @@ """Init package""" -__version__ = "0.15.0" +__version__ = "0.16.0" diff --git a/src/aind_metadata_mapper/bergamo/models.py b/src/aind_metadata_mapper/bergamo/models.py index c2827328..da00d444 100644 --- a/src/aind_metadata_mapper/bergamo/models.py +++ b/src/aind_metadata_mapper/bergamo/models.py @@ -1,13 +1,15 @@ """Module defining JobSettings for Bergamo ETL""" + from decimal import Decimal from pathlib import Path from typing import List, Literal, Optional from pydantic import Field -from pydantic_settings import BaseSettings + +from aind_metadata_mapper.core import BaseJobSettings -class JobSettings(BaseSettings): +class JobSettings(BaseJobSettings): """Data that needs to be input by user. Can be pulled from env vars with BERGAMO prefix or set explicitly.""" diff --git a/src/aind_metadata_mapper/bergamo/session.py b/src/aind_metadata_mapper/bergamo/session.py index 1a23bb69..b8a351a9 100644 --- a/src/aind_metadata_mapper/bergamo/session.py +++ b/src/aind_metadata_mapper/bergamo/session.py @@ -1,4 +1,5 @@ """Module to map bergamo metadata into a session model.""" + import argparse import bisect import json diff --git a/src/aind_metadata_mapper/bruker/models.py b/src/aind_metadata_mapper/bruker/models.py index 2232c2b1..5f63e721 100644 --- a/src/aind_metadata_mapper/bruker/models.py +++ b/src/aind_metadata_mapper/bruker/models.py @@ -8,10 +8,11 @@ ScannerLocation, ) from pydantic import Field -from pydantic_settings import BaseSettings +from aind_metadata_mapper.core import BaseJobSettings -class JobSettings(BaseSettings): + +class JobSettings(BaseJobSettings): """Data that needs to be input by user.""" job_settings_name: Literal["Bruker"] = "Bruker" diff --git a/src/aind_metadata_mapper/core.py b/src/aind_metadata_mapper/core.py index 46c192df..29406d04 100644 --- a/src/aind_metadata_mapper/core.py +++ b/src/aind_metadata_mapper/core.py @@ -1,20 +1,178 @@ """Core abstract class that can be used as a template for etl jobs.""" import argparse +import json import logging from abc import ABC, abstractmethod +from functools import cached_property from os import PathLike from pathlib import Path -from typing import Any, Generic, Optional, TypeVar, Union +from typing import Any, Dict, Generic, Optional, Tuple, Type, TypeVar, Union from aind_data_schema.base import AindCoreModel -from pydantic import ValidationError -from pydantic_settings import BaseSettings -from aind_metadata_mapper.models import JobResponse +from pydantic import BaseModel, ConfigDict, Field, ValidationError +from pydantic.fields import FieldInfo +from pydantic_settings import ( + BaseSettings, + EnvSettingsSource, + InitSettingsSource, + PydanticBaseSettingsSource, +) _T = TypeVar("_T", bound=BaseSettings) +class JobResponse(BaseModel): + """Standard model of a JobResponse.""" + + model_config = ConfigDict(extra="forbid") + status_code: int + message: Optional[str] = Field(None) + data: Optional[str] = Field(None) + + +class JsonConfigSettingsSource(PydanticBaseSettingsSource): + """Base class for settings that parse JSON from various sources.""" + + def __init__(self, settings_cls, config_file_location: Path): + """Class constructor.""" + self.config_file_location = config_file_location + super().__init__(settings_cls) + + def _retrieve_contents(self) -> Dict[str, Any]: + """Retrieve and parse the JSON contents from the config file.""" + try: + with open(self.config_file_location, "r") as f: + return json.load(f) + except (json.JSONDecodeError, IOError) as e: + logging.warning( + f"Error loading config from {self.config_file_location}: {e}" + ) + raise e + + @cached_property + def _json_contents(self): + """Cache contents to a property to avoid re-downloading.""" + contents = self._retrieve_contents() + return contents + + def get_field_value( + self, field: FieldInfo, field_name: str + ) -> Tuple[Any, str, bool]: + """ + Gets the value, the key for model creation, and a flag to determine + whether value is complex. + Parameters + ---------- + field : FieldInfo + The field + field_name : str + The field name + + Returns + ------- + Tuple[Any, str, bool] + A tuple contains the key, value and a flag to determine whether + value is complex. + + """ + file_content_json = self._json_contents + field_value = file_content_json.get(field_name) + return field_value, field_name, False + + def prepare_field_value( + self, + field_name: str, + field: FieldInfo, + value: Any, + value_is_complex: bool, + ) -> Any: + """ + Prepares the value of a field. + Parameters + ---------- + field_name : str + The field name + field : FieldInfo + The field + value : Any + The value of the field that has to be prepared + value_is_complex : bool + A flag to determine whether value is complex + + Returns + ------- + Any + The prepared value + + """ + return value + + def __call__(self) -> Dict[str, Any]: + """ + Run this when this class is called. Required to be implemented. + + Returns + ------- + Dict[str, Any] + The fields for the settings defined as a dict object. + + """ + d: Dict[str, Any] = {} + + for field_name, field in self.settings_cls.model_fields.items(): + field_value, field_key, value_is_complex = self.get_field_value( + field, field_name + ) + field_value = self.prepare_field_value( + field_name, field, field_value, value_is_complex + ) + if field_value is not None: + d[field_key] = field_value + + return d + + +class BaseJobSettings(BaseSettings): + """Parent class for generating settings from a config file.""" + + user_settings_config_file: Optional[Path] = Field( + default=None, + repr=False, + description="Optionally pull settings from a local config file.", + ) + + class Config: + """Pydantic config to exclude field from displaying""" + + exclude = {"user_settings_config_file"} + + @classmethod + def settings_customise_sources( + cls, + settings_cls: Type[BaseSettings], + init_settings: InitSettingsSource, + env_settings: EnvSettingsSource, + dotenv_settings: PydanticBaseSettingsSource, + file_secret_settings: PydanticBaseSettingsSource, + ) -> Tuple[PydanticBaseSettingsSource, ...]: + """ + Customize the order of settings sources, including JSON file. + """ + config_file = init_settings.init_kwargs.get( + "user_settings_config_file" + ) + sources = [init_settings, env_settings] + + if isinstance(config_file, str): + config_file = Path(config_file) + + if config_file and config_file.is_file(): + sources.append(JsonConfigSettingsSource(settings_cls, config_file)) + + return tuple(sources) + + class GenericEtl(ABC, Generic[_T]): """A generic etl class. Child classes will need to create a JobSettings object that is json serializable. Child class will also need to implement diff --git a/src/aind_metadata_mapper/fip/models.py b/src/aind_metadata_mapper/fip/models.py index 608a521c..11b0e142 100644 --- a/src/aind_metadata_mapper/fip/models.py +++ b/src/aind_metadata_mapper/fip/models.py @@ -5,10 +5,11 @@ from typing import List, Literal, Optional from pydantic import Field -from pydantic_settings import BaseSettings +from aind_metadata_mapper.core import BaseJobSettings -class JobSettings(BaseSettings): + +class JobSettings(BaseJobSettings): """Data that needs to be input by user.""" job_settings_name: Literal["FIP"] = "FIP" diff --git a/src/aind_metadata_mapper/gather_metadata.py b/src/aind_metadata_mapper/gather_metadata.py index 97070453..93064f56 100644 --- a/src/aind_metadata_mapper/gather_metadata.py +++ b/src/aind_metadata_mapper/gather_metadata.py @@ -37,8 +37,8 @@ ) from aind_metadata_mapper.fip.session import FIBEtl from aind_metadata_mapper.mesoscope.session import MesoscopeEtl -from aind_metadata_mapper.smartspim.acquisition import SmartspimETL from aind_metadata_mapper.models import JobSettings +from aind_metadata_mapper.smartspim.acquisition import SmartspimETL class GatherMetadataJob: diff --git a/src/aind_metadata_mapper/mesoscope/models.py b/src/aind_metadata_mapper/mesoscope/models.py index c7e37402..19878266 100644 --- a/src/aind_metadata_mapper/mesoscope/models.py +++ b/src/aind_metadata_mapper/mesoscope/models.py @@ -5,10 +5,11 @@ from typing import List, Literal from pydantic import Field -from pydantic_settings import BaseSettings +from aind_metadata_mapper.core import BaseJobSettings -class JobSettings(BaseSettings): + +class JobSettings(BaseJobSettings): """Data to be entered by the user.""" job_settings_name: Literal["Mesoscope"] = "Mesoscope" diff --git a/src/aind_metadata_mapper/models.py b/src/aind_metadata_mapper/models.py index d45afbc8..e6d45859 100644 --- a/src/aind_metadata_mapper/models.py +++ b/src/aind_metadata_mapper/models.py @@ -2,14 +2,12 @@ from pathlib import Path from typing import List, Optional, Union -from typing_extensions import Annotated from aind_data_schema.core.processing import PipelineProcess from aind_data_schema_models.modalities import Modality from aind_data_schema_models.organizations import Organization - -from pydantic import Field, BaseModel, ConfigDict -from pydantic_settings import BaseSettings +from pydantic import BaseModel, ConfigDict, Field +from typing_extensions import Annotated from aind_metadata_mapper.bergamo.models import ( JobSettings as BergamoSessionJobSettings, @@ -17,6 +15,7 @@ from aind_metadata_mapper.bruker.models import ( JobSettings as BrukerSessionJobSettings, ) +from aind_metadata_mapper.core import BaseJobSettings from aind_metadata_mapper.fip.models import ( JobSettings as FipSessionJobSettings, ) @@ -37,7 +36,7 @@ class JobResponse(BaseModel): data: Optional[str] = Field(None) -class SessionSettings(BaseSettings): +class SessionSettings(BaseJobSettings): """Settings needed to retrieve session metadata""" job_settings: Annotated[ @@ -51,7 +50,7 @@ class SessionSettings(BaseSettings): ] -class AcquisitionSettings(BaseSettings): +class AcquisitionSettings(BaseJobSettings): """Fields needed to retrieve acquisition metadata""" # TODO: we can change this to a tagged union once more acquisition settings @@ -59,21 +58,21 @@ class AcquisitionSettings(BaseSettings): job_settings: SmartSpimAcquisitionJobSettings -class SubjectSettings(BaseSettings): +class SubjectSettings(BaseJobSettings): """Fields needed to retrieve subject metadata""" subject_id: str metadata_service_path: str = "subject" -class ProceduresSettings(BaseSettings): +class ProceduresSettings(BaseJobSettings): """Fields needed to retrieve procedures metadata""" subject_id: str metadata_service_path: str = "procedures" -class RawDataDescriptionSettings(BaseSettings): +class RawDataDescriptionSettings(BaseJobSettings): """Fields needed to retrieve data description metadata""" name: str @@ -83,13 +82,13 @@ class RawDataDescriptionSettings(BaseSettings): metadata_service_path: str = "funding" -class ProcessingSettings(BaseSettings): +class ProcessingSettings(BaseJobSettings): """Fields needed to retrieve processing metadata""" pipeline_process: PipelineProcess -class MetadataSettings(BaseSettings): +class MetadataSettings(BaseJobSettings): """Fields needed to retrieve main Metadata""" name: str @@ -104,7 +103,7 @@ class MetadataSettings(BaseSettings): instrument_filepath: Optional[Path] = None -class JobSettings(BaseSettings): +class JobSettings(BaseJobSettings): """Fields needed to gather all metadata""" metadata_service_domain: Optional[str] = None diff --git a/src/aind_metadata_mapper/open_ephys/utils/constants.py b/src/aind_metadata_mapper/open_ephys/utils/constants.py index 1cda2fb4..aafdab8b 100644 --- a/src/aind_metadata_mapper/open_ephys/utils/constants.py +++ b/src/aind_metadata_mapper/open_ephys/utils/constants.py @@ -1,4 +1,5 @@ """ Constants for the naming utils of metadata mapper """ + import re INT_NULL = -99 diff --git a/src/aind_metadata_mapper/smartspim/models.py b/src/aind_metadata_mapper/smartspim/models.py index 87ef4db3..614c5d15 100644 --- a/src/aind_metadata_mapper/smartspim/models.py +++ b/src/aind_metadata_mapper/smartspim/models.py @@ -3,10 +3,10 @@ from pathlib import Path from typing import Literal, Optional -from pydantic_settings import BaseSettings +from aind_metadata_mapper.core import BaseJobSettings -class JobSettings(BaseSettings): +class JobSettings(BaseJobSettings): """Data to be entered by the user.""" # Field can be used to switch between different acquisition etl jobs diff --git a/tests/resources/gather_metadata_job/test_bergamo_configs.json b/tests/resources/gather_metadata_job/test_bergamo_configs.json new file mode 100644 index 00000000..bcbacc27 --- /dev/null +++ b/tests/resources/gather_metadata_job/test_bergamo_configs.json @@ -0,0 +1,57 @@ +{ + "user_settings_config_file": null, + "job_settings_name": "Bergamo", + "input_source": "tests/resources/bergamo", + "output_directory": null, + "experimenter_full_name": [ + "John Apple" + ], + "subject_id": "12345", + "imaging_laser_wavelength": 920, + "fov_imaging_depth": 200, + "fov_targeted_structure": "Primary Motor Cortex", + "notes": "test upload", + "mouse_platform_name": "Standard Mouse Tube", + "active_mouse_platform": false, + "session_type": "BCI", + "iacuc_protocol": "2109", + "rig_id": "442 Bergamo 2p photostim", + "behavior_camera_names": [ + "Side Face Camera", + "Bottom Face Camera" + ], + "ch1_filter_names": [ + "Green emission filter", + "Emission dichroic" + ], + "ch1_detector_name": "Green PMT", + "ch1_daq_name": "PXI", + "ch2_filter_names": [ + "Red emission filter", + "Emission dichroic" + ], + "ch2_detector_name": "Red PMT", + "ch2_daq_name": "PXI", + "imaging_laser_name": "Chameleon Laser", + "photostim_laser_name": "Monaco Laser", + "stimulus_device_names": [ + "speaker", + "lickport" + ], + "photostim_laser_wavelength": 1040, + "fov_coordinate_ml": "1.5", + "fov_coordinate_ap": 1.5, + "fov_reference": "Bregma", + "starting_lickport_position": [ + 0.0, + -6.0, + 0.0 + ], + "behavior_task_name": "single neuron BCI conditioning", + "hit_rate_trials_0_10": null, + "hit_rate_trials_20_40": null, + "total_hits": null, + "average_hit_rate": null, + "trial_num": null, + "timezone": "US/Pacific" +} \ No newline at end of file diff --git a/tests/resources/job_settings.json b/tests/resources/job_settings.json new file mode 100644 index 00000000..eb9f0237 --- /dev/null +++ b/tests/resources/job_settings.json @@ -0,0 +1,4 @@ +{ + "name": "Anna Apple", + "id": 12345 +} diff --git a/tests/resources/job_settings_corrupt.txt b/tests/resources/job_settings_corrupt.txt new file mode 100644 index 00000000..4048f982 --- /dev/null +++ b/tests/resources/job_settings_corrupt.txt @@ -0,0 +1,4 @@ +{ + "name": "Anna Apple", + "id" +} diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 00000000..8c6d58c0 --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,57 @@ +"""Tests class and methods in core module""" + +import json +import os +import unittest +from pathlib import Path +from unittest.mock import MagicMock, patch + +from aind_metadata_mapper.core import BaseJobSettings + +RESOURCES_DIR = Path(os.path.dirname(os.path.realpath(__file__))) / "resources" +CONFIG_FILE_PATH = RESOURCES_DIR / "job_settings.json" +CONFIG_FILE_PATH_CORRUPT = RESOURCES_DIR / "job_settings_corrupt.txt" + + +class TestJobSettings(unittest.TestCase): + """Tests JobSettings can be configured from json file.""" + + class MockJobSettings(BaseJobSettings): + """Mock class for testing purposes""" + + name: str + id: int + + def test_load_from_config_file(self): + """Test job settings can be loaded from config file.""" + + job_settings = self.MockJobSettings( + user_settings_config_file=CONFIG_FILE_PATH + ) + expected_settings_json = json.dumps( + { + "user_settings_config_file": str(CONFIG_FILE_PATH), + "name": "Anna Apple", + "id": 12345, + } + ) + round_trip = self.MockJobSettings.model_validate_json( + expected_settings_json + ) + self.assertEqual( + round_trip.model_dump_json(), job_settings.model_dump_json() + ) + + @patch("logging.warning") + def test_load_from_config_file_json_error(self, mock_log_warn: MagicMock): + """Test job settings raises an error when config file is corrupt""" + + with self.assertRaises(Exception): + self.MockJobSettings( + user_settings_config_file=CONFIG_FILE_PATH_CORRUPT + ) + mock_log_warn.assert_called_once() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_gather_metadata.py b/tests/test_gather_metadata.py index 2b0ad995..9c6f8110 100644 --- a/tests/test_gather_metadata.py +++ b/tests/test_gather_metadata.py @@ -17,16 +17,17 @@ from aind_metadata_mapper.bergamo.models import ( JobSettings as BergamoSessionJobSettings, ) -from aind_metadata_mapper.bergamo.session import BergamoEtl from aind_metadata_mapper.bruker.models import ( JobSettings as BrukerSessionJobSettings, ) -from aind_metadata_mapper.bruker.session import MRIEtl from aind_metadata_mapper.core import JobResponse from aind_metadata_mapper.fip.models import ( JobSettings as FipSessionJobSettings, ) -from aind_metadata_mapper.fip.session import FIBEtl +from aind_metadata_mapper.gather_metadata import GatherMetadataJob +from aind_metadata_mapper.mesoscope.models import ( + JobSettings as MesoscopeSessionJobSettings, +) from aind_metadata_mapper.models import ( AcquisitionSettings, JobSettings, @@ -37,11 +38,6 @@ SessionSettings, SubjectSettings, ) -from aind_metadata_mapper.gather_metadata import GatherMetadataJob -from aind_metadata_mapper.mesoscope.models import ( - JobSettings as MesoscopeSessionJobSettings, -) -from aind_metadata_mapper.mesoscope.session import MesoscopeEtl from aind_metadata_mapper.smartspim.acquisition import ( JobSettings as SmartSpimAcquisitionJobSettings, ) @@ -52,6 +48,7 @@ / "gather_metadata_job" ) METADATA_DIR = RESOURCES_DIR / "metadata_files" +EXAMPLE_BERGAMO_CONFIGS = RESOURCES_DIR / "test_bergamo_configs.json" class TestGatherMetadataJob(unittest.TestCase): @@ -513,9 +510,7 @@ def test_get_session_metadata_bergamo_success( metadata_job = GatherMetadataJob(settings=job_settings) contents = metadata_job.get_session_metadata() self.assertEqual({"some_key": "some_value"}, contents) - BergamoEtl( - job_settings=bergamo_session_settings - ).run_job.assert_called_once() + mock_run_job.assert_called_once() @patch("aind_metadata_mapper.bruker.session.MRIEtl.run_job") def test_get_session_metadata_bruker_success( @@ -535,9 +530,7 @@ def test_get_session_metadata_bruker_success( metadata_job = GatherMetadataJob(settings=job_settings) contents = metadata_job.get_session_metadata() self.assertEqual({"some_key": "some_value"}, contents) - MRIEtl( - job_settings=bruker_session_settings - ).run_job.assert_called_once() + mock_run_job.assert_called_once() @patch("aind_metadata_mapper.fip.session.FIBEtl.run_job") def test_get_session_metadata_fip_success(self, mock_run_job: MagicMock): @@ -555,7 +548,7 @@ def test_get_session_metadata_fip_success(self, mock_run_job: MagicMock): metadata_job = GatherMetadataJob(settings=job_settings) contents = metadata_job.get_session_metadata() self.assertEqual({"some_key": "some_value"}, contents) - FIBEtl(job_settings=fip_session_settings).run_job.assert_called_once() + mock_run_job.assert_called_once() @patch("aind_metadata_mapper.mesoscope.session.MesoscopeEtl.run_job") def test_get_session_metadata_mesoscope_success( @@ -577,9 +570,7 @@ def test_get_session_metadata_mesoscope_success( metadata_job = GatherMetadataJob(settings=job_settings) contents = metadata_job.get_session_metadata() self.assertEqual({"some_key": "some_value"}, contents) - MesoscopeEtl( - job_settings=mesoscope_session_settings - ).run_job.assert_called_once() + mock_run_job.assert_called_once() def test_session_settings_error(self): """Tests SessionSettings raises error if JobSettings is not expected""" @@ -976,6 +967,27 @@ def test_run_job_main_metadata( self.assertIsNotNone(json_contents.get("_id")) self.assertIsNone(json_contents.get("id")) + def test_from_job_settings_file(self): + """Tests that users can set a session config file when requesting + GatherMetadataJob""" + + test_configs = { + "directory_to_write_to": RESOURCES_DIR, + "session_settings": { + "job_settings": { + "job_settings_name": "Bergamo", + "user_settings_config_file": EXAMPLE_BERGAMO_CONFIGS, + } + }, + } + job_settings = JobSettings.model_validate_json( + json.dumps(test_configs, default=str) + ) + self.assertEqual( + ["John Apple"], + job_settings.session_settings.job_settings.experimenter_full_name, + ) + if __name__ == "__main__": unittest.main()