Skip to content

Commit

Permalink
Merge pull request #50 from ecmwf-projects/move-service-definition-files
Browse files Browse the repository at this point in the history
Move service definition files
  • Loading branch information
aperezpredictia authored Dec 18, 2024
2 parents 6d736b3 + 96d1fa3 commit 27d034a
Show file tree
Hide file tree
Showing 50 changed files with 127 additions and 13,351 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/on-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ jobs:
repository: ecmwf-projects/cdm-obs.git
ref: 'new-variables'
path: common_data_model
- name: Download cads-forms-insitu
env:
BITBUCKET_TOKEN: ${{ secrets.BITBUCKET_TOKEN }}
timeout-minutes: 2
run: |
git clone --depth 1 -b dev https://"$BITBUCKET_TOKEN"@git.ecmwf.int/scm/cds/cads-forms-insitu.git
- name: Deploy test ingestion database
env:
TEST_INGESTION_DB_PASS: ${{ secrets.TEST_INGESTION_DB_PASS }}
Expand Down Expand Up @@ -88,6 +94,7 @@ jobs:
STORAGE_SECRET_KEY: ${{ secrets.STORAGE_SECRET_KEY}}
STORAGE_SECURE: ${{ secrets.STORAGE_SECURE}}
CDM_TABLES_LOCATION: ${{ github.workspace }}
CADS_OBS_INSITU_LOCATION: ${{ github.workspace }}
run: |
ls ${GITHUB_WORKSPACE}/common_data_model/*
make unit-tests COV_REPORT=xml
Expand Down
13 changes: 6 additions & 7 deletions cdsobs/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from cdsobs.metadata import get_dataset_metadata
from cdsobs.observation_catalogue.repositories.cads_dataset import CadsDatasetRepository
from cdsobs.retrieve.filter_datasets import between
from cdsobs.service_definition.api import get_service_definition
from cdsobs.service_definition.service_definition_models import ServiceDefinition
from cdsobs.storage import S3Client
from cdsobs.utils.logutils import get_logger
Expand All @@ -38,7 +39,6 @@

def run_ingestion_pipeline(
dataset_name: str,
service_definition: ServiceDefinition,
source: str,
session: Session,
config: CDSObsConfig,
Expand All @@ -60,8 +60,6 @@ def run_ingestion_pipeline(
----------
dataset_name :
Name of the dataset, for example insitu-observations-woudc-ozone-total-column-and-profiles
service_definition :
Object produced parsing the service_definition.json.
source :
Name of the data type to read from the dataset. For example "OzoneSonde".
session :
Expand All @@ -80,6 +78,7 @@ def run_ingestion_pipeline(
Month to start reading the data. It only applies to the first year of the interval.
Default is 1.
"""
service_definition = get_service_definition(config, dataset_name)

def _run_for_batch(time_space_batch):
try:
Expand All @@ -106,7 +105,6 @@ def _run_for_batch(time_space_batch):

def run_make_cdm(
dataset_name: str,
service_definition: ServiceDefinition,
source: str,
config: CDSObsConfig,
start_year: int,
Expand All @@ -125,8 +123,6 @@ def run_make_cdm(
----------
dataset_name :
Name of the dataset, for example insitu-observations-woudc-ozone-total-column-and-profiles
service_definition
Object produced parsing the service_definition.json.
source
Name of the data type to read from the dataset. For example "OzoneSonde".
config
Expand All @@ -142,6 +138,7 @@ def run_make_cdm(
make_production. If False, the data only will be loaded and checked for CDM
compliance in memory.
"""
service_definition = get_service_definition(config, dataset_name)

def _run_for_batch(time_batch):
try:
Expand Down Expand Up @@ -196,7 +193,9 @@ def _run_ingestion_pipeline_for_batch(
By default, these time intervals will be skipped.
"""
if not update and _entry_exists(dataset_name, session, source, time_space_batch):
logger.warning("A partition with the chosen parameters already exists")
logger.warning(
"A partition with the chosen parameters already exists and update is set to False."
)
else:
sorted_partitions = _read_homogenise_and_partition(
config, dataset_name, service_definition, source, time_space_batch
Expand Down
20 changes: 0 additions & 20 deletions cdsobs/api_rest/config_helper.py

This file was deleted.

14 changes: 10 additions & 4 deletions cdsobs/api_rest/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,23 @@ def get_capabilities(


@router.get("/capabilities/{dataset}/sources")
def get_sources(dataset: str) -> list[str]:
def get_sources(
dataset: str,
session: Annotated[HttpAPISession, Depends(session_gen)],
) -> list[str]:
"""Get available sources for a given dataset."""
service_definition = get_service_definition(dataset)
service_definition = get_service_definition(session.cdsobs_config, dataset)
return list(service_definition.sources)


@router.get("/{dataset}/service_definition")
def get_dataset_service_definition(dataset: str) -> ServiceDefinition:
def get_dataset_service_definition(
dataset: str,
session: Annotated[HttpAPISession, Depends(session_gen)],
) -> ServiceDefinition:
"""Get the service definition for a dataset."""
try:
return get_service_definition(dataset)
return get_service_definition(session.cdsobs_config, dataset)
except FileNotFoundError:
raise make_http_exception(
status_code=404, message=f"Service definition not found for {dataset=}"
Expand Down
1 change: 1 addition & 0 deletions cdsobs/cli/_get_forms_jsons.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def get_forms_jsons_command(
dataset_name,
catalogue_repository,
output_dir,
config=config,
upload_to_storage=upload,
storage_client=storage_client,
get_stations_file=stations_file,
Expand Down
41 changes: 12 additions & 29 deletions cdsobs/cli/_make_cdm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,12 @@
from cdsobs.api import run_make_cdm
from cdsobs.cli._utils import config_yml_typer
from cdsobs.config import read_and_validate_config
from cdsobs.service_definition.api import validate_service_definition


def make_cdm(
dataset_name: str = typer.Option(
..., "--dataset", "-d", help="Dataset name", show_default=False
),
service_definition_json: Path = typer.Option(
...,
"--service-definition",
"-s",
help="Path to the service_definition.json",
show_default=False,
),
start_year: int = typer.Option(
..., help="Year to start processing the data", show_default=False
),
Expand All @@ -28,7 +20,9 @@ def make_cdm(
),
cdsobs_config_yml: Path = config_yml_typer,
source: str = typer.Option(
"all", help="Process only a given source, by default it processes all"
...,
help="Source to process. Sources are defined in the service definition file,"
"in the sources mapping.",
),
output_dir: Path = typer.Option(
tempfile.gettempdir(),
Expand All @@ -45,23 +39,12 @@ def make_cdm(
):
"""Prepare the data to be uploaded without actually uploading it."""
config = read_and_validate_config(cdsobs_config_yml)

# read and validate service definition
service_definition = validate_service_definition(
str(service_definition_json), config.cdm_tables_location
)[0]
assert service_definition is not None

# Check if we selected only one source
sources = [source] if source != "all" else service_definition.sources.keys()
for source in sources:
run_make_cdm(
dataset_name,
service_definition,
source,
config,
start_year=start_year,
end_year=end_year,
output_dir=output_dir,
save_data=save_data,
)
run_make_cdm(
dataset_name,
source,
config,
start_year=start_year,
end_year=end_year,
output_dir=output_dir,
save_data=save_data,
)
45 changes: 13 additions & 32 deletions cdsobs/cli/_make_production.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,12 @@
from cdsobs.cli._utils import config_yml_typer
from cdsobs.config import read_and_validate_config
from cdsobs.observation_catalogue.database import get_session
from cdsobs.service_definition.api import validate_service_definition


def make_production(
dataset_name: str = typer.Option(
..., "--dataset", "-d", help="Dataset name", show_default=False
),
service_definition_json: Path = typer.Option(
...,
"--service-definition",
"-s",
help="Path to the service_definition.json",
show_default=False,
),
start_year: int = typer.Option(
..., help="Year to start processing the data", show_default=False
),
Expand All @@ -28,7 +20,9 @@ def make_production(
),
cdsobs_config_yml: Path = config_yml_typer,
source: str = typer.Option(
"all", help="Process only a given source, by default it processes all"
...,
help="Source to process. Sources are defined in the service definition file,"
"in the sources mapping.",
),
update: bool = typer.Option(
False,
Expand All @@ -54,27 +48,14 @@ def make_production(
uploads it to the observation catalogue and storage.
"""
config = read_and_validate_config(cdsobs_config_yml)

# read and validate service definition
service_definition = validate_service_definition(
str(service_definition_json), config.cdm_tables_location
)[0]
assert service_definition is not None

# Check if we selected only one source
sources = [source] if source != "all" else service_definition.sources.keys()

# ingestion pipeline per source
with get_session(config.catalogue_db) as session:
for source in sources:
run_ingestion_pipeline(
dataset_name,
service_definition,
source,
session,
config,
start_year,
end_year,
update,
start_month,
)
run_ingestion_pipeline(
dataset_name,
source,
session,
config,
start_year,
end_year,
update,
start_month,
)
2 changes: 1 addition & 1 deletion cdsobs/cli/_retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def retrieve(
config = validate_config(cdsobs_config_yml)
s3_client = S3Client.from_config(config.s3config)
output_file = retrieve_observations(
config.catalogue_db.get_url(),
config,
s3_client.public_url_base,
retrieve_args,
output_dir,
Expand Down
13 changes: 11 additions & 2 deletions cdsobs/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,16 @@


def _get_default_cdm_tables_location() -> Path:
if "CDM_TABLES_LOCATION" in os.environ:
return Path(os.environ["CDM_TABLES_LOCATION"])
return _get_default_location("CDM_TABLES_LOCATION")


def _get_default_cads_forms_insitu_location() -> Path:
return _get_default_location("CADS_OBS_INSITU_LOCATION")


def _get_default_location(env_varname: str) -> Path:
if env_varname in os.environ:
return Path(os.environ[env_varname])
else:
return Path.home().joinpath(".cdsobs")

Expand Down Expand Up @@ -183,6 +191,7 @@ class CDSObsConfig(pydantic.BaseModel):
ingestion_databases: Dict[str, DBConfig]
datasets: List[DatasetConfig]
cdm_tables_location: Path = _get_default_cdm_tables_location()
cads_obs_insitu_location: Path = _get_default_cads_forms_insitu_location()

@classmethod
def from_yaml(cls, config_file: Path) -> "CDSObsConfig":
Expand Down
4 changes: 0 additions & 4 deletions cdsobs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@

# From here, all constants are for the tests
cdsobs_path = typing.cast(Path, importlib.resources.files("cdsobs"))
SERVICE_DEFINITION_YML = Path(
cdsobs_path,
"data/insitu-observations-woudc-ozone-total-column-and-profiles/service_definition.yml",
)

TEST_VAR_OUT = "air_temperature"

Expand Down
Loading

0 comments on commit 27d034a

Please sign in to comment.