Skip to content

Commit

Permalink
fix(mtd): add formatting af uuid in ds xml parsing
Browse files Browse the repository at this point in the history
Add an util function `format_acquisition_framework_id_from_xml` to handle, in particular, a specific case for the provided acquisition framework UUID for a dataset:
- Remove the possible prefix "http://oafs.fr/meta/ca/"

Note: such "http://oafs.fr/meta/ca/" prefix was noticed to be present for some datasets retrieved from INPN Métadonnées PREPROD for the instance 'Thématique' (ID : 2).
  • Loading branch information
VincentCauchois committed Oct 31, 2024
1 parent aeed4c6 commit 1d8e54f
Showing 1 changed file with 21 additions and 1 deletion.
22 changes: 21 additions & 1 deletion backend/geonature/core/gn_meta/mtd/xml_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import json
from typing import Union

from flask import current_app
from lxml import etree as ET
Expand Down Expand Up @@ -130,10 +131,29 @@ def parse_jdd_xml(xml):

root = ET.fromstring(xml, parser=_xml_parser)
jdd_list = []

def format_acquisition_framework_id_from_xml(provided_af_uuid) -> Union[str, None]:
"""
Format the acquisition framework UUID provided for the dataset
i.e. the value for the tag `<jdd:identifiantCadre>` in the XML file
Args:
provided_af_uuid (str): The acquisition framework UUID
Returns:
Union[str, None]: The formatted acquisition framework UUID, or None if none was provided
"""
if not provided_af_uuid:
return None

if provided_af_uuid.startswith("http://oafs.fr/meta/ca/"):
return provided_af_uuid.split("/")[-1]

return provided_af_uuid

for jdd in root.findall(".//" + namespace + "JeuDeDonnees"):
# We extract all the required informations from the different tags of the XML file
jdd_uuid = get_tag_content(jdd, "identifiantJdd")
ca_uuid = get_tag_content(jdd, "identifiantCadre")
ca_uuid = format_acquisition_framework_id_from_xml(get_tag_content(jdd, "identifiantCadre"))
dataset_name = get_tag_content(jdd, "libelle")
dataset_shortname = get_tag_content(jdd, "libelleCourt", default_value="")
dataset_desc = get_tag_content(jdd, "description", default_value="")
Expand Down

0 comments on commit 1d8e54f

Please sign in to comment.