Skip to content

Commit

Permalink
Merge branch 'optimize' of https://github.com/mapping-commons/sssom-py
Browse files Browse the repository at this point in the history
…into optimize
  • Loading branch information
hrshdhgd committed Nov 20, 2023
2 parents aa8a45c + 2b2be7b commit 2fe6430
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 24 deletions.
21 changes: 10 additions & 11 deletions src/sssom/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,29 +329,28 @@ def _init_mapping_set(meta: Optional[MetadataType]) -> MappingSet:
return mapping_set


MAPPING_SLOTS = set(_get_sssom_schema_object().mapping_slots)


def _get_mapping_dict(row: pd.Series, bad_attrs: Counter) -> Dict[str, Any]:
"""
Generate a mapping dictionary from a given row of data.
"""Generate a mapping dictionary from a given row of data.
It also updates the 'bad_attrs' counter for keys that are not present
in the sssom_schema_object's mapping_slots.
"""
mdict = {}
sssom_schema_object = _get_sssom_schema_object()

# Populate the mapping dictionary with key-value pairs from the row,
# only if the value exists, is not NaN, and the key is in the schema's mapping slots.
# The value could be a string or a list and is handled accordingly via _address_multivalued_slot().
mdict = {
k: _address_multivalued_slot(k, v)
for k, v in row.items()
if v and pd.notna(v) and k in sssom_schema_object.mapping_slots
if v and pd.notna(v) and k in MAPPING_SLOTS
}

# Update bad_attrs for keys not in mapping_slots
bad_keys = set(row.keys()) - set(sssom_schema_object.mapping_slots)
for k in bad_keys:
bad_attrs[k] += 1
bad_keys = set(row.keys()) - MAPPING_SLOTS
for bad_key in bad_keys:
bad_attrs[bad_key] += 1
return mdict


Expand Down Expand Up @@ -814,7 +813,6 @@ def _get_mapping_set_from_df(df: pd.DataFrame, meta: Optional[MetadataType] = No

for k, v in bad_attrs.items():
logging.warning(f"No attr for {k} [{v} instances]")

return mapping_set


Expand Down Expand Up @@ -890,8 +888,9 @@ def _ensure_valid_mapping_from_dict(mdict: Dict[str, Any]):
:param mdict: A dictionary containing the mapping metadata.
:return: A valid Mapping object, or None.
"""
mdict.setdefault(MAPPING_JUSTIFICATION, MAPPING_JUSTIFICATION_UNSPECIFIED)

try:
mdict.setdefault(MAPPING_JUSTIFICATION, MAPPING_JUSTIFICATION_UNSPECIFIED)
m = Mapping(**mdict)
except ValueError as e:
logging.warning(
Expand Down
25 changes: 12 additions & 13 deletions src/sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,8 @@ def from_mapping_set_document(cls, doc: MappingSetDocument) -> "MappingSetDataFr
non_double_cols.replace(np.nan, "", inplace=True)
df.update(non_double_cols)

return cls.with_converter(
df=sort_df_rows_columns(df), converter=doc.converter, metadata=meta
)
df = sort_df_rows_columns(df)
return cls.with_converter(df=df, converter=doc.converter, metadata=meta)

def to_mapping_set_document(self) -> "MappingSetDocument":
"""Get a mapping set document."""
Expand Down Expand Up @@ -1036,6 +1035,11 @@ def to_mapping_set_dataframe(doc: MappingSetDocument) -> MappingSetDataFrame:
return MappingSetDataFrame.from_mapping_set_document(doc)


DICT_FROM_MAPPING_ENUM_KEYS = set(_get_sssom_schema_object().dict["enums"].keys())
SLOTS = _get_sssom_schema_object().dict["slots"]
DOUBLE_SLOTS = {k for k, v in SLOTS.items() if v["range"] == "double"}


def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) -> dict:
"""
Get information for linkml objects (MatchTypeEnum, PredicateModifierEnum) from the Mapping object and return the dictionary form of the object.
Expand All @@ -1044,25 +1048,20 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) ->
:return: Dictionary
"""
map_dict = {}
sssom_schema_object = _get_sssom_schema_object().dict
slots = sssom_schema_object["slots"]
enums_keys = set(sssom_schema_object["enums"].keys())

slots_with_double_as_range = {k for k, v in slots.items() if v["range"] == "double"}

for property in map_obj:
mapping_property = map_obj[property]
if mapping_property is None:
map_dict[property] = np.nan if property in slots_with_double_as_range else ""
map_dict[property] = np.nan if property in DOUBLE_SLOTS else ""
continue

slot_of_interest = slots[property]
is_enum = slot_of_interest["range"] in enums_keys
slot_of_interest = SLOTS[property]
is_enum = slot_of_interest["range"] in DICT_FROM_MAPPING_ENUM_KEYS

# Check if the mapping_property is a list
if isinstance(mapping_property, list):
# If the property is an enumeration and it allows multiple values
if is_enum and slot_of_interest["multivalued"]:
# FIXME needs test
# Join all the enum values into a string separated by '|'
map_dict[property] = "|".join(
enum_value.code.text for enum_value in mapping_property
Expand All @@ -1071,8 +1070,8 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) ->
# If the property is not an enumeration or doesn't allow multiple values,
# join all the values into a string separated by '|'
map_dict[property] = "|".join(enum_value for enum_value in mapping_property)
# If the mapping_property is not a list but an enumeration
elif is_enum:
# FIXME needs test
# Assign the text of the enumeration code to the property in the dictionary
map_dict[property] = mapping_property.code.text
else:
Expand Down

0 comments on commit 2fe6430

Please sign in to comment.