From c35753056ed6fa0447023169d031b84814bd9e4d Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Thu, 16 Mar 2023 14:45:37 -0400 Subject: [PATCH 1/4] fix: Force jsonschema to use our validator --- libcove/lib/common.py | 17 +++++++++++++---- tests/lib/test_common.py | 6 ------ tests/lib/test_converters.py | 5 ----- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/libcove/lib/common.py b/libcove/lib/common.py index dbec286..16fc5b5 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -720,11 +720,9 @@ def get_additional_codelist_values(schema_obj, json_data): path_string = "/".join(path_no_num) if path_string not in additional_codelist_values: - codelist_url = schema_obj.codelists + codelist codelist_amend_urls = [] if hasattr(schema_obj, "extended_codelist_urls"): - # Replace URL if this codelist is overridden by an extension. # Last one to be applied wins. if schema_obj.extended_codelist_urls.get(codelist): @@ -771,7 +769,6 @@ def get_additional_fields_info(json_data, schema_fields, context, fields_regex=F root_additional_fields = set() for field, field_info in fields_present.items(): - if field in schema_fields: continue if fields_regex and LANGUAGE_RE.search(field.split("/")[-1]): @@ -804,7 +801,6 @@ def get_counts_additional_fields( fields_regex=False, additional_fields_info=None, ): - if not additional_fields_info: schema_fields = schema_obj.get_pkg_schema_fields() additional_fields_info = get_additional_fields_info( @@ -850,6 +846,12 @@ def get_schema_validation_errors( schema_url=schema_obj.schema_host, ) + # Force jsonschema to use our validator. + # https://github.com/python-jsonschema/jsonschema/issues/994 + jsonschema.validators.validates("http://json-schema.org/draft-04/schema#")( + validator + ) + our_validator = validator( pkg_schema_obj, format_checker=format_checker, resolver=resolver ) @@ -1000,6 +1002,13 @@ def get_schema_validation_errors( validation_errors[ json.dumps(unique_validator_key, default=decimal_default) ].append(value) + + # Restore jsonschema's default validator, to not interfere with other software. + # https://github.com/python-jsonschema/jsonschema/issues/994 + jsonschema.validators.validates("http://json-schema.org/draft-04/schema#")( + jsonschema.validators.Draft4Validator + ) + return dict(validation_errors) diff --git a/tests/lib/test_common.py b/tests/lib/test_common.py index 6296933..fd4ae36 100644 --- a/tests/lib/test_common.py +++ b/tests/lib/test_common.py @@ -333,7 +333,6 @@ def test_get_schema_deprecated_paths(): def test_schema_dict_fields_generator_release_schema_deprecated_fields(): - with open( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -362,7 +361,6 @@ def test_schema_dict_fields_generator_release_schema_deprecated_fields(): def test_schema_dict_fields_generator_schema_with_list_and_oneof(): - with open( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -396,7 +394,6 @@ def test_schema_dict_fields_generator_schema_with_list_and_oneof(): def test_fields_present_generator_tenders_releases_2_releases(): - with open( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -449,7 +446,6 @@ def test_fields_present_generator_tenders_releases_2_releases(): def test_fields_present_generator_data_root_is_list(): - with open( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -501,7 +497,6 @@ def test_fields_present_generator_data_root_is_list(): def test_get_additional_fields_info(): - simple_data = { "non_additional_field": "a", "non_additional_list": [1, 2], @@ -1227,7 +1222,6 @@ def test_get_field_coverage_oc4ids(): ), ) def test_oneOfEnumSelectorField(data, count, errors): - with open(common_fixtures("schema_with_one_of_enum_selector_field.json")) as fp: schema = json.load(fp) diff --git a/tests/lib/test_converters.py b/tests/lib/test_converters.py index 11a8669..4660301 100644 --- a/tests/lib/test_converters.py +++ b/tests/lib/test_converters.py @@ -8,7 +8,6 @@ def test_convert_json_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-ocds-tests-", dir=tempfile.gettempdir() ) @@ -55,7 +54,6 @@ def test_convert_json_1(): def test_convert_activity_xml_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-iati-tests-", dir=tempfile.gettempdir() ) @@ -110,7 +108,6 @@ def test_convert_activity_xml_1(): def test_convert_org_xml_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-iati-tests-", dir=tempfile.gettempdir() ) @@ -166,7 +163,6 @@ def test_convert_org_xml_1(): def test_convert_json_root_is_list_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-ocds-tests-", dir=tempfile.gettempdir() ) @@ -214,7 +210,6 @@ def test_convert_json_root_is_list_1(): def test_convert_csv_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-ocds-tests-", dir=tempfile.gettempdir() ) From 38725a273f8bb625607a25bc87b3468883820725 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Fri, 7 Jul 2023 00:41:39 -0400 Subject: [PATCH 2/4] Upgrade to jsonschema 4.18.0. Drop Python 3.6 and 3.7 support. --- .github/workflows/test.yml | 2 +- libcove/lib/common.py | 36 +++++++++++++----------------------- setup.py | 3 ++- tests/lib/test_common.py | 11 ++++------- 4 files changed, 20 insertions(+), 32 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 08a6b9b..b541903 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ jobs: # We also only use Linux servers, so don't test on Mac # Need to use an older Ubuntu so Python 3.6 is available os: [ubuntu-20.04] - python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10", "3.11" ] + python-version: [ 3.8, 3.9, "3.10", "3.11" ] jsonref-version: ["==0.3", ">1"] steps: - uses: actions/checkout@v2 diff --git a/libcove/lib/common.py b/libcove/lib/common.py index 16fc5b5..cdf1f78 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -15,6 +15,7 @@ import jsonref import jsonschema.validators import requests +from referencing import Registry, Resource try: from functools import cached_property @@ -22,7 +23,7 @@ from cached_property import cached_property from flattentool import unflatten -from jsonschema import FormatChecker, RefResolver +from jsonschema import FormatChecker from jsonschema._utils import ensure_list, extras_msg, find_additional_properties, uniq from jsonschema.exceptions import UndefinedTypeCheck, ValidationError @@ -234,7 +235,9 @@ def oneOf_draft4(validator, oneOf, instance, schema): context=all_errors, ) - more_valid = [s for i, s in subschemas if validator.is_valid(instance, s)] + more_valid = [ + s for i, s in subschemas if validator.evolve(schema=s).is_valid(instance) + ] if more_valid: more_valid.append(first_valid) reprs = ", ".join(repr(schema) for schema in more_valid) @@ -846,6 +849,8 @@ def get_schema_validation_errors( schema_url=schema_obj.schema_host, ) + registry = Registry(retrieve=resolver.retrieve) + # Force jsonschema to use our validator. # https://github.com/python-jsonschema/jsonschema/issues/994 jsonschema.validators.validates("http://json-schema.org/draft-04/schema#")( @@ -853,7 +858,7 @@ def get_schema_validation_errors( ) our_validator = validator( - pkg_schema_obj, format_checker=format_checker, resolver=resolver + pkg_schema_obj, format_checker=format_checker, registry=registry ) for e in our_validator.iter_errors(json_data): message = e.message @@ -1177,7 +1182,7 @@ def get_fields_present(*args, **kwargs): } -class CustomRefResolver(RefResolver): +class CustomRefResolver: """This RefResolver is only for use with the jsonschema library""" def __init__(self, *args, **kw): @@ -1190,44 +1195,29 @@ def __init__(self, *args, **kw): # this is ignored when you supply a file self.schema_url = kw.pop("schema_url", "") self.config = kw.pop("config", "") - super().__init__(*args, **kw) - def resolve_remote(self, uri): + def retrieve(self, uri): schema_name = uri.split("/")[-1] if self.schema_file and self.file_schema_name == schema_name: uri = self.schema_file else: uri = urljoin(self.schema_url, schema_name) - document = self.store.get(uri) - - if document: - return document if uri.startswith("http"): # This branch of the if-statement in-lines `RefResolver.resolve_remote()`, but using `get_request()`. + # https://github.com/python-jsonschema/jsonschema/blob/dbc398245a583cb2366795dc529ae042d10c1577/jsonschema/validators.py#L1008-L1023 scheme = urlsplit(uri).scheme - - if scheme in self.handlers: - result = self.handlers[scheme](uri) - elif scheme in ["http", "https"]: - # Requests has support for detecting the correct encoding of - # json over http + if scheme in ("http", "https"): result = get_request(uri, config=self.config).json() else: - # Otherwise, pass off to urllib and assume utf-8 with urlopen(uri) as url: result = json.loads(url.read().decode("utf-8")) - - if self.cache_remote: - self.store[uri] = result - return result else: with open(uri) as schema_file: result = json.load(schema_file) add_is_codelist(result) - self.store[uri] = result - return result + return Resource.from_contents(result) def _get_schema_deprecated_paths( diff --git a/setup.py b/setup.py index eb0ccdb..2b27a68 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,8 @@ long_description="A data review library", install_requires=[ "jsonref", - "jsonschema>=3", + "jsonschema>=4.18", + "referencing", "requests", "cached-property;python_version<'3.8'", "flattentool>=0.11.0", diff --git a/tests/lib/test_common.py b/tests/lib/test_common.py index fd4ae36..2ab66cb 100644 --- a/tests/lib/test_common.py +++ b/tests/lib/test_common.py @@ -7,6 +7,7 @@ import jsonschema import pytest from freezegun import freeze_time +from referencing.exceptions import CannotDetermineSpecification from libcove.lib.common import ( SchemaJsonMixin, @@ -766,7 +767,7 @@ def get_pkg_schema_obj(self): assert "[Decimal('3.1')] is too short" in validation_error_json -def test_property_that_is_not_json_schema_doesnt_raise_exception(caplog, tmpdir): +def test_property_that_is_not_json_schema_doesnt_raise_exception(tmpdir): tmpdir.join("test.json").write( json.dumps({"properties": {"bad_property": "not_a_json_schema"}}) ) @@ -778,12 +779,8 @@ class DummySchemaObj: def get_pkg_schema_obj(self): return {"$ref": "test.json"} - validation_errors = get_schema_validation_errors({}, DummySchemaObj(), "", {}, {}) - assert validation_errors == {} - assert ( - "A 'properties' object contains a 'bad_property' value that is not a JSON Schema: 'not_a_json_schema'" - in caplog.text - ) + with pytest.raises(CannotDetermineSpecification): + get_schema_validation_errors({}, DummySchemaObj(), "", {}, {}) @pytest.mark.parametrize( From 2d7b84a9e0668da3f48eb99b5576c32109ea518e Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Sun, 9 Jul 2023 01:09:57 -0400 Subject: [PATCH 3/4] Allow subclasses to provide their own jsonschema registry --- libcove/lib/common.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/libcove/lib/common.py b/libcove/lib/common.py index 68f7ba5..4bd7e1c 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -833,24 +833,27 @@ def get_schema_validation_errors( if extra_checkers: format_checker.checkers.update(extra_checkers) - if getattr(schema_obj, "extended", None): - resolver = CustomRefResolver( - "", - pkg_schema_obj, - config=getattr(schema_obj, "config", None), - schema_url=schema_obj.schema_host, - schema_file=schema_obj.extended_schema_file, - file_schema_name=schema_obj.schema_name, - ) + if hasattr(schema_obj, "registry"): + registry = schema_obj.registry else: - resolver = CustomRefResolver( - "", - pkg_schema_obj, - config=getattr(schema_obj, "config", None), - schema_url=schema_obj.schema_host, - ) + if getattr(schema_obj, "extended", None): + resolver = CustomRefResolver( + "", + pkg_schema_obj, + config=getattr(schema_obj, "config", None), + schema_url=schema_obj.schema_host, + schema_file=schema_obj.extended_schema_file, + file_schema_name=schema_obj.schema_name, + ) + else: + resolver = CustomRefResolver( + "", + pkg_schema_obj, + config=getattr(schema_obj, "config", None), + schema_url=schema_obj.schema_host, + ) - registry = Registry(retrieve=resolver.retrieve) + registry = Registry(retrieve=resolver.retrieve) # Force jsonschema to use our validator. # https://github.com/python-jsonschema/jsonschema/issues/994 From 2603ca3d685c9b501a11930f2028ee46ebdb2f47 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 6 Dec 2023 11:36:04 -0500 Subject: [PATCH 4/4] Fix merge to nest registry logic under correct branch --- libcove/lib/common.py | 44 +++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/libcove/lib/common.py b/libcove/lib/common.py index 25bed96..3ee7353 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -814,28 +814,6 @@ def get_schema_validation_errors( if extra_checkers: format_checker.checkers.update(extra_checkers) - if hasattr(schema_obj, "registry"): - registry = schema_obj.registry - else: - if getattr(schema_obj, "extended", None): - resolver = CustomRefResolver( - "", - pkg_schema_obj, - config=getattr(schema_obj, "config", None), - schema_url=schema_obj.schema_host, - schema_file=schema_obj.extended_schema_file, - file_schema_name=schema_obj.schema_name, - ) - else: - resolver = CustomRefResolver( - "", - pkg_schema_obj, - config=getattr(schema_obj, "config", None), - schema_url=schema_obj.schema_host, - ) - - registry = Registry(retrieve=resolver.retrieve) - # Force jsonschema to use our validator. # https://github.com/python-jsonschema/jsonschema/issues/994 jsonschema.validators.validates("http://json-schema.org/draft-04/schema#")( @@ -845,6 +823,28 @@ def get_schema_validation_errors( if hasattr(schema_obj, "validator"): our_validator = schema_obj.validator(validator, format_checker) else: + if hasattr(schema_obj, "registry"): + registry = schema_obj.registry + else: + if getattr(schema_obj, "extended", None): + resolver = CustomRefResolver( + "", + pkg_schema_obj, + config=getattr(schema_obj, "config", None), + schema_url=schema_obj.schema_host, + schema_file=schema_obj.extended_schema_file, + file_schema_name=schema_obj.schema_name, + ) + else: + resolver = CustomRefResolver( + "", + pkg_schema_obj, + config=getattr(schema_obj, "config", None), + schema_url=schema_obj.schema_host, + ) + + registry = Registry(retrieve=resolver.retrieve) + our_validator = validator( pkg_schema_obj, format_checker=format_checker, registry=registry )