From 2eba644174775ec7e94f64faff28ba3673a6d8de Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 19 Sep 2023 16:06:53 +0200 Subject: [PATCH] Remove needless output during tests (#428) There is a massive amount of text that's printed to stdout as well as uninformative logging during testing. This PR removes some of that without changing existing functionality. It also switches to using a specific logger in several places instead of directly using e.g. `logging.info` --- src/sssom/cli.py | 14 ++++++++------ src/sssom/io.py | 18 +++++++++++------- src/sssom/parsers.py | 8 +++++--- src/sssom/util.py | 4 +++- src/sssom/writers.py | 4 +++- tests/test_annotate.py | 11 ++++------- tests/test_cli.py | 9 +++------ tests/test_filter.py | 7 ++----- 8 files changed, 39 insertions(+), 36 deletions(-) diff --git a/src/sssom/cli.py b/src/sssom/cli.py index 35e200eb..3ba7e03c 100644 --- a/src/sssom/cli.py +++ b/src/sssom/cli.py @@ -11,7 +11,7 @@ .. seealso:: https://click.palletsprojects.com/en/8.0.x/setuptools/ """ -import logging +import logging as _logging import os import sys from operator import itemgetter @@ -61,6 +61,8 @@ ) from .writers import WRITER_FUNCTIONS, write_table +logging = _logging.getLogger(__name__) + SSSOM_SV_OBJECT = ( SSSOMSchemaView.instance if hasattr(SSSOMSchemaView, "instance") else SSSOMSchemaView() ) @@ -124,15 +126,15 @@ @click.version_option(__version__) def main(verbose: int, quiet: bool): """Run the SSSOM CLI.""" - logger = logging.getLogger() + logger = _logging.getLogger() if verbose >= 2: - logger.setLevel(level=logging.DEBUG) + logger.setLevel(level=_logging.DEBUG) elif verbose == 1: - logger.setLevel(level=logging.INFO) + logger.setLevel(level=_logging.INFO) else: - logger.setLevel(level=logging.WARNING) + logger.setLevel(level=_logging.WARNING) if quiet: - logger.setLevel(level=logging.ERROR) + logger.setLevel(level=_logging.ERROR) @main.command() diff --git a/src/sssom/io.py b/src/sssom/io.py index 506022e5..2f9b2933 100644 --- a/src/sssom/io.py +++ b/src/sssom/io.py @@ -270,7 +270,9 @@ def extract_iri(input: str, converter: Converter) -> List[str]: # return new_msdf -def run_sql_query(query: str, inputs: List[str], output: TextIO) -> MappingSetDataFrame: +def run_sql_query( + query: str, inputs: List[str], output: Optional[TextIO] = None +) -> MappingSetDataFrame: """Run a SQL query over one or more SSSOM files. Each of the N inputs is assigned a table name df1, df2, ..., dfN @@ -307,11 +309,12 @@ def run_sql_query(query: str, inputs: List[str], output: TextIO) -> MappingSetDa new_msdf = MappingSetDataFrame.with_converter( df=new_df, converter=msdf.converter, metadata=msdf.metadata ) - write_table(new_msdf, output) + if output is not None: + write_table(new_msdf, output) return new_msdf -def filter_file(input: str, output: TextIO, **kwargs) -> MappingSetDataFrame: +def filter_file(input: str, output: Optional[TextIO] = None, **kwargs) -> MappingSetDataFrame: """Filter a dataframe by dynamically generating queries based on user input. e.g. sssom filter --subject_id x:% --subject_id y:% --object_id y:% --object_id z:% tests/data/basic.tsv @@ -323,7 +326,7 @@ def filter_file(input: str, output: TextIO, **kwargs) -> MappingSetDataFrame: :param input: DataFrame to be queried over. :param output: Output location. - :param **kwargs: Filter options provided by user which generate queries (e.g.: --subject_id x:%). + :param kwargs: Filter options provided by user which generate queries (e.g.: --subject_id x:%). :raises ValueError: If parameter provided is invalid. :return: Filtered MappingSetDataFrame object. """ @@ -358,7 +361,7 @@ def filter_file(input: str, output: TextIO, **kwargs) -> MappingSetDataFrame: def annotate_file( - input: str, output: TextIO, replace_multivalued: bool = False, **kwargs + input: str, output: Optional[TextIO] = None, replace_multivalued: bool = False, **kwargs ) -> MappingSetDataFrame: """Annotate a file i.e. add custom metadata to the mapping set. @@ -366,7 +369,7 @@ def annotate_file( :param output: Output location. :param replace_multivalued: Multivalued slots should be replaced or not, defaults to False - :param **kwargs: Options provided by user + :param kwargs: Options provided by user which are added to the metadata (e.g.: --mapping_set_id http://example.org/abcd) :return: Annotated MappingSetDataFrame object. """ @@ -374,5 +377,6 @@ def annotate_file( are_params_slots(params) input_msdf = parse_sssom_table(input) msdf = augment_metadata(input_msdf, params, replace_multivalued) - write_table(msdf, output) + if output is not None: + write_table(msdf, output) return msdf diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py index 0ba93c85..16fc4d17 100644 --- a/src/sssom/parsers.py +++ b/src/sssom/parsers.py @@ -3,7 +3,7 @@ import io import itertools as itt import json -import logging +import logging as _logging import re import typing from collections import Counter @@ -68,6 +68,8 @@ to_mapping_set_dataframe, ) +logging = _logging.getLogger(__name__) + # * ******************************************************* # Parsers (from file) @@ -418,7 +420,7 @@ def from_sssom_dataframe( mdict, bad_attrs = _get_mdict_ms_and_bad_attrs(row, bad_attrs) mlist.append(_prepare_mapping(Mapping(**mdict))) for k, v in bad_attrs.most_common(): - logging.warning(f"No attr for {k} [{v} instances]") + logging.debug(f"No attr for {k} [{v} instances]") # the autogenerated code's type annotations are _really_ messy. This is in fact okay, # so with a heavy heart we employ type:ignore ms.mappings = mlist # type:ignore @@ -925,7 +927,7 @@ def split_dataframe_by_prefix( & (df[OBJECT_ID].str.startswith(object_prefix + ":")) ] if 0 == len(df_subset): - logging.warning(f"No matches ({len(df_subset)} matches found)") + logging.debug(f"No matches ({len(df_subset)} matches found)") continue subconverter = msdf.converter.get_subconverter( [subject_prefix, object_prefix, relation_prefix] diff --git a/src/sssom/util.py b/src/sssom/util.py index 98bf4428..5406e8ff 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -2,7 +2,7 @@ import itertools as itt import json -import logging +import logging as _logging import os import re from collections import defaultdict @@ -63,6 +63,8 @@ from .sssom_document import MappingSetDocument from .typehints import MetadataType, PrefixMap, get_default_metadata +logging = _logging.getLogger(__name__) + #: The key that's used in the YAML section of an SSSOM file PREFIX_MAP_KEY = "curie_map" diff --git a/src/sssom/writers.py b/src/sssom/writers.py index d7292392..55f260ac 100644 --- a/src/sssom/writers.py +++ b/src/sssom/writers.py @@ -1,7 +1,7 @@ """Serialization functions for SSSOM.""" import json -import logging +import logging as _logging from pathlib import Path from typing import Any, Callable, Dict, List, Optional, TextIO, Tuple, Union @@ -28,6 +28,8 @@ sort_df_rows_columns, ) +logging = _logging.getLogger(__name__) + # noinspection PyProtectedMember RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" diff --git a/tests/test_annotate.py b/tests/test_annotate.py index 26eb17c5..7f03f63d 100644 --- a/tests/test_annotate.py +++ b/tests/test_annotate.py @@ -1,12 +1,9 @@ """Test for annotating MappingSetDataFrame metadata.""" -import sys import unittest from os.path import join from sssom.io import annotate_file - -# from sssom.io import filter_file from sssom.parsers import parse_sssom_table from tests.constants import data_dir @@ -26,7 +23,7 @@ def test_annotate(self): "mapping_set_id": ("http://w3id.org/my/mapping.sssom.tsv",), "mapping_set_version": ("2021-01-01",), } - annotated_msdf = annotate_file(input=self.input, output=sys.stdout, **kwargs) + annotated_msdf = annotate_file(input=self.input, **kwargs) validation_msdf = parse_sssom_table(self.validation_file) self.assertEqual(annotated_msdf.metadata, validation_msdf.metadata) @@ -38,7 +35,7 @@ def test_annotate_multivalued(self): kwargs = { "creator_id": ("orcid:0123",), } - annotated_msdf = annotate_file(input=self.input, output=sys.stdout, **kwargs) + annotated_msdf = annotate_file(input=self.input, **kwargs) self.assertTrue(len(annotated_msdf.metadata["creator_id"]), 3) @@ -46,11 +43,11 @@ def test_annotate_multivalued(self): kwargs = { "creator_id": ("orcid:1234",), } - annotated_msdf_2 = annotate_file(input=self.input, output=sys.stdout, **kwargs) + annotated_msdf_2 = annotate_file(input=self.input, **kwargs) self.assertTrue(len(annotated_msdf_2.metadata["creator_id"]), 2) def test_annotate_fail(self): """Pass invalid param to see if it fails.""" kwargs = {"abcd": ("x:%", "y:%")} with self.assertRaises(ValueError): - annotate_file(input=self.input, output=sys.stdout, **kwargs) + annotate_file(input=self.input, **kwargs) diff --git a/tests/test_cli.py b/tests/test_cli.py index 56ff1e17..fff4b60b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -86,11 +86,8 @@ def test_cli_multiple_input(self): def run_successful(self, result: Result, obj: Any) -> None: """Check the test result is successful.""" - self.assertEqual( - result.exit_code, - 0, - f"{obj} did not perform as expected: {result.exception}", - ) + if result.exit_code: + raise RuntimeError(f"{obj} failed") from result.exception def run_convert(self, runner: CliRunner, test_case: SSSOMTestCase) -> Result: """Run the convert test.""" @@ -143,7 +140,7 @@ def run_split(self, runner: CliRunner, test_case: SSSOMTestCase) -> Result: def run_ptable(self, runner: CliRunner, test_case: SSSOMTestCase) -> Result: """Run the ptable test.""" - params = [test_case.filepath] + params = [test_case.filepath, "--output", test_case.get_out_file("ptable.tsv")] result = runner.invoke(ptable, params) self.run_successful(result, test_case) return result diff --git a/tests/test_filter.py b/tests/test_filter.py index 9f3924ea..ecf47a2f 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -1,13 +1,10 @@ """Test for filtering MappingSetDataFrame columns.""" -import sys import unittest from os.path import join from sssom.constants import PREDICATE_MODIFIER from sssom.io import filter_file - -# from sssom.io import filter_file from sssom.parsers import parse_sssom_table from tests.constants import data_dir @@ -25,7 +22,7 @@ def setUp(self) -> None: def test_filter(self): """Test filtering of rows.""" kwargs = {"subject_id": ("x:%", "y:%"), "object_id": ("y:%", "z:%", "a:%")} - filtered_msdf = filter_file(input=self.input, output=sys.stdout, **kwargs) + filtered_msdf = filter_file(input=self.input, **kwargs) validation_msdf = parse_sssom_table(self.validation_file) # Drop empty columns since read_sssom_table drops them by default. @@ -41,4 +38,4 @@ def test_filter_fail(self): """Pass invalid param to see if it fails.""" kwargs = {"subject_ids": ("x:%", "y:%"), "object_id": ("y:%", "z:%")} with self.assertRaises(ValueError): - filter_file(input=self.input, output=sys.stdout, **kwargs) + filter_file(input=self.input, **kwargs)