Skip to content

Commit

Permalink
Address comments on PR #177
Browse files Browse the repository at this point in the history
* Add test to verify appropriate file type extension for geoharvester extract
* Update conditional for helpers.generate_step_output_filename
* Return log statement
* Define config-level constant: GIS_SOURCES
  • Loading branch information
jonavellecuerdo committed Jan 9, 2024
1 parent 30ad207 commit a3e74f6
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 5 deletions.
2 changes: 1 addition & 1 deletion lambdas/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def generate_extract_command(
if verbose:
extract_command.append("--verbose")

if source in ["gismit", "gisogm"]:
if source in config.GIS_SOURCES:
extract_command.append("harvest")
if run_type == "daily":
extract_command.append("--harvest-type=incremental")
Expand Down
5 changes: 3 additions & 2 deletions lambdas/config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import logging
import os

GIS_SOURCES = ["gismit", "gisogm"]
INDEX_ALIASES = {
"rdi": ["jpal", "whoas", "zenodo"],
"timdex": ["alma", "aspace", "dspace"],
"geo": ["gismit", "gisogm"],
"geo": GIS_SOURCES,
}
REQUIRED_ENV = {
"TIMDEX_ALMA_EXPORT_BUCKET_ID",
Expand Down Expand Up @@ -83,7 +84,7 @@ def validate_input(input_data: dict) -> None:
# If next step is extract step, required harvest fields are present
# ruff: noqa: SIM102
if input_data["next-step"] == "extract":
if input_data["source"] not in ["gismit", "gisogm"]:
if input_data["source"] not in GIS_SOURCES:
if missing_harvest_fields := [
field for field in REQUIRED_OAI_HARVEST_FIELDS if field not in input_data
]:
Expand Down
4 changes: 3 additions & 1 deletion lambdas/format_input.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
import os

Expand All @@ -11,6 +12,7 @@ def lambda_handler(event: dict, _context: dict) -> dict:
config.verify_env()
verbose = config.check_verbosity(event.get("verbose", False))
config.configure_logger(logging.getLogger(), verbose)
logger.debug(json.dumps(event))
config.validate_input(event)

run_date = helpers.format_run_date(event["run-date"])
Expand All @@ -27,7 +29,7 @@ def lambda_handler(event: dict, _context: dict) -> dict:
}

if next_step == "extract":
if source in ["gismit", "gisogm"]:
if source in config.GIS_SOURCES:
result["harvester-type"] = "geo"
else:
result["harvester-type"] = "oai"
Expand Down
2 changes: 1 addition & 1 deletion lambdas/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def generate_step_output_filename(
"""
sequence_suffix = f"_{sequence}" if sequence else ""
if step == "extract":
file_type = "jsonl" if "gis" in source else "xml"
file_type = "jsonl" if source in config.GIS_SOURCES else "xml"
elif load_type == "delete":
file_type = "txt"
else:
Expand Down
11 changes: 11 additions & 0 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ def test_generate_index_name():
assert helpers.generate_index_name("testsource") == "testsource-2022-01-02t12-13-14"


def test_generate_step_output_filename_geoharvester_file_type():
assert (
helpers.generate_step_output_filename("gismit", "index", "prefix", "extract")
== "prefix-to-index.jsonl"
)
assert (
helpers.generate_step_output_filename("gisogm", "index", "prefix", "extract")
== "prefix-to-index.jsonl"
)


def test_generate_step_output_filename_with_sequence():
assert (
helpers.generate_step_output_filename(
Expand Down

0 comments on commit a3e74f6

Please sign in to comment.