From 72b631f667fcd11f928d7623b52eb3976ca581c2 Mon Sep 17 00:00:00 2001 From: Benjamin Adams Date: Thu, 19 May 2022 16:58:04 -0400 Subject: [PATCH 1/3] Black code modifications --- compliance_checker/acdd.py | 8 +- compliance_checker/cf/appendix_c.py | 10 +- compliance_checker/cf/appendix_d.py | 33 ++- compliance_checker/cf/cf_1_6.py | 266 ++++++++++-------- compliance_checker/cf/cf_1_7.py | 27 +- compliance_checker/cf/cf_1_8.py | 195 +++++++------ compliance_checker/cf/cf_base.py | 2 + compliance_checker/cf/util.py | 2 + compliance_checker/cfutil.py | 30 +- compliance_checker/ioos.py | 11 +- compliance_checker/runner.py | 6 +- compliance_checker/suite.py | 34 +-- compliance_checker/tests/helpers.py | 16 +- compliance_checker/tests/test_cf.py | 217 +++++++------- .../tests/test_cf_integration.py | 86 +++--- compliance_checker/tests/test_ioos_profile.py | 10 +- compliance_checker/tests/test_suite.py | 19 +- setup.py | 3 +- 18 files changed, 532 insertions(+), 443 deletions(-) diff --git a/compliance_checker/acdd.py b/compliance_checker/acdd.py index f19903d5..a27d9eb4 100644 --- a/compliance_checker/acdd.py +++ b/compliance_checker/acdd.py @@ -769,7 +769,7 @@ def check_metadata_link(self, ds): :param netCDF4.Dataset ds: An open netCDF dataset """ - if not hasattr(ds, u"metadata_link"): + if not hasattr(ds, "metadata_link"): return msgs = [] meta_link = getattr(ds, "metadata_link") @@ -784,14 +784,14 @@ def check_id_has_no_blanks(self, ds): :param netCDF4.Dataset ds: An open netCDF dataset """ - if not hasattr(ds, u"id"): + if not hasattr(ds, "id"): return - if " " in getattr(ds, u"id"): + if " " in getattr(ds, "id"): return Result( BaseCheck.MEDIUM, False, "no_blanks_in_id", - msgs=[u"There should be no blanks in the id field"], + msgs=["There should be no blanks in the id field"], ) else: return Result(BaseCheck.MEDIUM, True, "no_blanks_in_id", msgs=[]) diff --git a/compliance_checker/cf/appendix_c.py b/compliance_checker/cf/appendix_c.py index f2eb88af..344e2626 100644 --- a/compliance_checker/cf/appendix_c.py +++ b/compliance_checker/cf/appendix_c.py @@ -4,7 +4,9 @@ # the same units as the standard name canonical units, "1" is unitless for # observation counts, and None is used for status_flag, which expects units # not to be present -valid_modifiers = {"detection_minimum": "u", - "number_of_observations": "1", - "standard_error": "u", - "status_flag": None} +valid_modifiers = { + "detection_minimum": "u", + "number_of_observations": "1", + "standard_error": "u", + "status_flag": None, +} diff --git a/compliance_checker/cf/appendix_d.py b/compliance_checker/cf/appendix_d.py index 70c13e56..5e80ca55 100644 --- a/compliance_checker/cf/appendix_d.py +++ b/compliance_checker/cf/appendix_d.py @@ -27,21 +27,26 @@ # vertical coordinates only have one computed_standard_name, but some have # multiple acceptable values. ocean_computed_standard_names = { - "altitude": {"zlev": "altitude", - "eta": "sea_surface_height_above_geoid", - "depth": "sea_floor_depth_below_geoid"}, - "height_above_geopotential_datum": - {"zlev": "height_above_geopotential_datum", + "altitude": { + "zlev": "altitude", + "eta": "sea_surface_height_above_geoid", + "depth": "sea_floor_depth_below_geoid", + }, + "height_above_geopotential_datum": { + "zlev": "height_above_geopotential_datum", "eta": "sea_surface_height_above_geopotential_datum", - "depth": "sea_floor_depth_below_geopotential_datum"}, - "height_above_reference_ellipsoid": - {"zlev": "height_above_reference_ellipsoid", - "eta": "sea_surface_height_above_reference_ellipsoid", - "depth": "sea_floor_depth_below_reference_ellipsoid"}, - "height_above_mean_sea_level": - {"zlev": "height_above_mean_sea_level", - "eta": "sea_surface_height_above_mean_sea_level", - "depth": "sea_floor_depth_below_mean_sea_level"} + "depth": "sea_floor_depth_below_geopotential_datum", + }, + "height_above_reference_ellipsoid": { + "zlev": "height_above_reference_ellipsoid", + "eta": "sea_surface_height_above_reference_ellipsoid", + "depth": "sea_floor_depth_below_reference_ellipsoid", + }, + "height_above_mean_sea_level": { + "zlev": "height_above_mean_sea_level", + "eta": "sea_surface_height_above_mean_sea_level", + "depth": "sea_floor_depth_below_mean_sea_level", + }, } dimless_vertical_coordinates_1_6 = { # only for CF-1.6 diff --git a/compliance_checker/cf/cf_1_6.py b/compliance_checker/cf/cf_1_6.py index dbf3e422..56fe3747 100644 --- a/compliance_checker/cf/cf_1_6.py +++ b/compliance_checker/cf/cf_1_6.py @@ -15,17 +15,22 @@ from compliance_checker.base import BaseCheck, BaseNCCheck, Result, TestCtx from compliance_checker.cf import util from compliance_checker.cf.appendix_c import valid_modifiers -from compliance_checker.cf.appendix_d import (dimless_vertical_coordinates_1_6, - no_missing_terms) +from compliance_checker.cf.appendix_d import ( + dimless_vertical_coordinates_1_6, + no_missing_terms, +) from compliance_checker.cf.appendix_e import cell_methods16 -from compliance_checker.cf.appendix_f import (grid_mapping_attr_types16, - grid_mapping_dict16) +from compliance_checker.cf.appendix_f import ( + grid_mapping_attr_types16, + grid_mapping_dict16, +) from compliance_checker.cf.cf_base import CFNCCheck, appendix_a_base import difflib logger = logging.getLogger(__name__) + class CF1_6Check(CFNCCheck): """CF-1.6-specific implementation of CFBaseCheck; supports checking netCDF datasets. @@ -56,8 +61,10 @@ def check_filename(self, ds): """Checks that the filename ends with .nc""" # IMPLEMENTS CONFORMANCE 2.1 filename_suffix = TestCtx(BaseCheck.HIGH, self.section_titles["2.1"]) - filename_suffix.assert_true(ds.filepath().endswith("nc"), - f'Dataset path {ds.filepath} must end with ".nc"') + filename_suffix.assert_true( + ds.filepath().endswith("nc"), + f'Dataset path {ds.filepath} must end with ".nc"', + ) return filename_suffix.to_result() def check_data_types(self, ds): @@ -158,13 +165,13 @@ def _check_add_offset_scale_factor_type(self, variable, attr_name): att.dtype == variable.dtype ) or ( # will short-circuit or if first condition is true isinstance(att, (np.float32, np.float64, float)) - and variable.dtype in (np.byte, np.short, np.int16, np.int, - np.int32, int)) + and variable.dtype + in (np.byte, np.short, np.int16, np.int, np.int32, int) + ) if not val: msgs.append(error_msg) - return Result(BaseCheck.MEDIUM, val, self.section_titles["8.1"], - msgs) + return Result(BaseCheck.MEDIUM, val, self.section_titles["8.1"], msgs) def check_add_offset_scale_factor_type(self, ds): """ @@ -186,13 +193,20 @@ def check_add_offset_scale_factor_type(self, ds): both = set(add_offset_vars).intersection(scale_factor_vars) both_msgs = [] for both_var in sorted(both, key=lambda var: var.name): - if (both_var.scale_factor.dtype != - both_var.add_offset.dtype): - both_msgs.append("When both scale_factor and add_offset " - f"are supplied for variable {both_var.name}, " - "they must have the same type") - results.append(Result(BaseCheck.MEDIUM, not bool(both_msgs), - self.section_titles["8.1"], both_msgs)) + if both_var.scale_factor.dtype != both_var.add_offset.dtype: + both_msgs.append( + "When both scale_factor and add_offset " + f"are supplied for variable {both_var.name}, " + "they must have the same type" + ) + results.append( + Result( + BaseCheck.MEDIUM, + not bool(both_msgs), + self.section_titles["8.1"], + both_msgs, + ) + ) for _att_vars_tup in ( ("add_offset", add_offset_vars), @@ -579,18 +593,25 @@ def check_units(self, ds): modifier_variables = cfutil._find_standard_name_modifier_variables(ds) forecast_variables = cfutil.get_forecast_metadata_variables(ds) - dimless_vert = {var.name for var in - ds.get_variables_by_attributes(standard_name=lambda s: s in self.appendix_d_parametric_coords) - if not hasattr(var, "units")} + dimless_vert = { + var.name + for var in ds.get_variables_by_attributes( + standard_name=lambda s: s in self.appendix_d_parametric_coords + ) + if not hasattr(var, "units") + } # check anything remaining that has units - #unit_containing = - unit_required_variables = (set( - coordinate_variables - + auxiliary_coordinates - + geophysical_variables - + forecast_variables - + modifier_variables) # standard names with modifiers require proper units, *except* for flags, where they should not be present - - dimless_vert) + # unit_containing = + unit_required_variables = ( + set( + coordinate_variables + + auxiliary_coordinates + + geophysical_variables + + forecast_variables + + modifier_variables + ) # standard names with modifiers require proper units, *except* for flags, where they should not be present + - dimless_vert + ) for name in unit_required_variables: # For reduced horizontal grids, the compression index variable does @@ -620,8 +641,7 @@ def check_units(self, ds): valid_units = self._check_valid_cf_units(ds, name) ret_val.append(valid_units) - units_attr_is_string = TestCtx(BaseCheck.MEDIUM, - self.section_titles["3.1"]) + units_attr_is_string = TestCtx(BaseCheck.MEDIUM, self.section_titles["3.1"]) # side effects, but better than teasing out the individual result if units is not None and units_attr_is_string.assert_true( @@ -685,18 +705,19 @@ def _check_valid_cf_units(self, ds, variable_name): if unit_type == "u": try: - reference = (self._std_names[standard_name]. - canonical_units) + reference = self._std_names[standard_name].canonical_units # if standard name isn't found, there won't be an associated units # but a standard name error will be raised elsewhere except KeyError: return valid_units.to_result() elif unit_type == "1": - reference = "1" + reference = "1" elif unit_type is None: - valid_units.assert_true(units is None, - f"units attribute for variable {variable_name} must be unset " - "when status_flag standard name modifier is set") + valid_units.assert_true( + units is None, + f"units attribute for variable {variable_name} must be unset " + "when status_flag standard name modifier is set", + ) return valid_units.to_result() # Is this even in the database? also, if there is no standard_name, @@ -728,7 +749,9 @@ def _check_valid_cf_units(self, ds, variable_name): try: units_conv = Unit(units) except ValueError: - valid_units.messages.append(f'Unit string "{units}" is not recognized by UDUnits') + valid_units.messages.append( + f'Unit string "{units}" is not recognized by UDUnits' + ) valid_units.out_of += 1 return valid_units else: @@ -739,10 +762,12 @@ def _check_valid_cf_units(self, ds, variable_name): # that use time relative to a reference point, despite canonical units # being expressed as "s"/seconds if standard_name not in {"time", "forecast_reference_time"}: - valid_units.assert_true(units_conv.is_convertible(Unit(reference)), - f'Units "{units}" for variable ' - f"{variable_name} must be convertible to " - f'canonical units "{reference}"') + valid_units.assert_true( + units_conv.is_convertible(Unit(reference)), + f'Units "{units}" for variable ' + f"{variable_name} must be convertible to " + f'canonical units "{reference}"', + ) return valid_units.to_result() @@ -816,9 +841,11 @@ def _check_valid_standard_units(self, ds, variable_name): if standard_name_modifier == "number_of_observations": valid_standard_units.out_of += 1 if units != "1": - err_msg = (f"When variable {variable_name} has a " - "standard name modifier of number_of_observations, " - "the specified units must be 1") + err_msg = ( + f"When variable {variable_name} has a " + "standard name modifier of number_of_observations, " + "the specified units must be 1" + ) valid_standard_units.messages.append(err_msg) else: valid_standard_units.score += 1 @@ -939,21 +966,19 @@ def check_standard_name(self, ds): ), ) valid_std_name.out_of += 1 - if (standard_name not in self._std_names): - err_msg = ( - "standard_name {} is not defined in Standard Name Table v{}." - .format( - standard_name or "undefined", - self._std_names._version)) - close_matches = difflib.get_close_matches(standard_name, - self._std_names) + if standard_name not in self._std_names: + err_msg = "standard_name {} is not defined in Standard Name Table v{}.".format( + standard_name or "undefined", self._std_names._version + ) + close_matches = difflib.get_close_matches( + standard_name, self._std_names + ) if close_matches: err_msg += f" Possible close match(es): {close_matches}" valid_std_name.messages.append(err_msg) else: valid_std_name.score += 1 - ret_val.append(valid_std_name.to_result()) # 2) optional - if modifiers, should be in table @@ -962,7 +987,9 @@ def check_standard_name(self, ds): valid_modifier.assert_true( standard_name_modifier in valid_modifiers, 'Standard name modifier "{}" for variable {} is not a valid modifier ' - "according to CF Appendix C".format(standard_name_modifier, name), + "according to CF Appendix C".format( + standard_name_modifier, name + ), ) ret_val.append(valid_modifier.to_result()) @@ -1098,8 +1125,7 @@ def check_flags(self, ds): else: allv = np.all(vals_arr & masks_arr == vals_arr) - allvr = Result(BaseCheck.MEDIUM, allv, - self.section_titles["3.5"]) + allvr = Result(BaseCheck.MEDIUM, allv, self.section_titles["3.5"]) if not allvr.value: allvr.msgs = [ "flag masks and flag values for '{}' combined don't equal flag values".format( @@ -1131,12 +1157,12 @@ def _check_flag_values(self, ds, name): valid_values = TestCtx(BaseCheck.HIGH, self.section_titles["3.5"]) # IMPLEMENTATION CONFORMANCE 3.5 REQUIRED 2/8 - valid_values.assert_true(hasattr(variable, "flag_meanings"), + valid_values.assert_true( + hasattr(variable, "flag_meanings"), f"Variable {variable.name} must have attribute flag_meanings " - "defined when flag_values attribute is present" + "defined when flag_values attribute is present", ) - # the flag values must be independent, no repeating values flag_set = np.unique(flag_values) valid_values.assert_true( @@ -1158,7 +1184,8 @@ def _check_flag_values(self, ds, name): valid_values.assert_true( len(flag_meanings) == np.array(flag_values).size, f"{name}'s flag_meanings and flag_values should have the same " - "number of elements.") + "number of elements.", + ) return valid_values.to_result() @@ -1194,10 +1221,11 @@ def _check_flag_masks(self, ds, name): or np.issubdtype(variable.dtype, "b") ) - valid_masks.assert_true(0 not in np.array(flag_masks), - f"flag_masks for variable {variable.name} must " - "not contain zero as an element") - + valid_masks.assert_true( + 0 not in np.array(flag_masks), + f"flag_masks for variable {variable.name} must " + "not contain zero as an element", + ) valid_masks.assert_true( type_ok, @@ -1211,7 +1239,7 @@ def _check_flag_masks(self, ds, name): # scalars from netCDF4 Python len(flag_meanings) == np.array(flag_masks).size, f"{name} flag_meanings and flag_masks should have the same " - "number of elements." + "number of elements.", ) return valid_masks.to_result() @@ -1597,8 +1625,7 @@ def check_dimensional_vertical_coordinate( ): continue - valid_vertical_coord = TestCtx(BaseCheck.HIGH, - self.section_titles["4.3"]) + valid_vertical_coord = TestCtx(BaseCheck.HIGH, self.section_titles["4.3"]) valid_vertical_coord.assert_true( isinstance(units, str) and units, "§4.3.1 {}'s units must be defined for vertical coordinates, " @@ -1801,7 +1828,8 @@ def check_calendar(self, ds): "366_day", "360_day", "julian", - "none"} + "none", + } ret_val = [] @@ -1820,8 +1848,9 @@ def check_calendar(self, ds): # passes if the calendar is valid, otherwise notify of invalid # calendar else: - result = Result(BaseCheck.LOW, True, self.section_titles["4.4"], - reasoning) + result = Result( + BaseCheck.LOW, True, self.section_titles["4.4"], reasoning + ) ret_val.append(result) @@ -1834,14 +1863,16 @@ def _check_leap_time(self, time_variable): leap_time = TestCtx(BaseCheck.HIGH, self.section_titles["4.4"]) leap_time.out_of = 1 # IMPLEMENTATION CONFORMANCE 4.4.1 REQUIRED 2, 3 / 5 - if (not hasattr(time_variable, "month_lengths") or not - (hasattr(time_variable.month_lengths, "dtype") and - np.issubdtype(time_variable.month_lengths.dtype, np.integer) and - time_variable.month_lengths.size == 12)): + if not hasattr(time_variable, "month_lengths") or not ( + hasattr(time_variable.month_lengths, "dtype") + and np.issubdtype(time_variable.month_lengths.dtype, np.integer) + and time_variable.month_lengths.size == 12 + ): leap_time.messages.append( f"For nonstandard calendar on variable {time_variable.name}, " "attribute month_lengths must be supplied as a 12-element " - "integer array") + "integer array" + ) return leap_time.to_result() # If leap years are included, then attributes leap_month and # leap_year must be included. @@ -1849,31 +1880,37 @@ def _check_leap_time(self, time_variable): # IMPLEMENTATION CONFORMANCE 4.4.1 REQUIRED 4,5/5 if hasattr(time_variable, "leap_month"): leap_time.assert_true( - (np.isscalar(time_variable.leap_month) and - hasattr(time_variable.leap_month, "dtype") and - np.issubdtype(time_variable.leap_month.dtype, np.integer) and - 1 <= time_variable.leap_month <= 12), - "When attribute leap_month is supplied for variable " - f"{time_variable.name}, the value must be a scalar integer " - "between 1 and 12") + ( + np.isscalar(time_variable.leap_month) + and hasattr(time_variable.leap_month, "dtype") + and np.issubdtype(time_variable.leap_month.dtype, np.integer) + and 1 <= time_variable.leap_month <= 12 + ), + "When attribute leap_month is supplied for variable " + f"{time_variable.name}, the value must be a scalar integer " + "between 1 and 12", + ) # IMPLEMENTATION CONFORMANCE 4.4.1 RECOMMENDED 1/2 if not has_leap_year: leap_time.out_of += 1 - fail_message = (f"For time variable {time_variable.name}, " - "attribute leap_year must be present if " - "leap_month attribute is defined") + fail_message = ( + f"For time variable {time_variable.name}, " + "attribute leap_year must be present if " + "leap_month attribute is defined" + ) leap_time.messages.append(fail_message) # IMPLEMENTATION CONFORMANCE 4.4.1 REQUIRED 5/5 if has_leap_year: - leap_time.assert_true(np.isscalar(time_variable.leap_year) and - hasattr(time_variable.leap_year, "dtype"), - "When attribute leap_year is supplied for variable " - f"{time_variable.name}, the value must be a scalar " - "integer") + leap_time.assert_true( + np.isscalar(time_variable.leap_year) + and hasattr(time_variable.leap_year, "dtype"), + "When attribute leap_year is supplied for variable " + f"{time_variable.name}, the value must be a scalar " + "integer", + ) return leap_time.to_result() - ############################################################################### # Chapter 5: Coordinate Systems ############################################################################### @@ -2912,12 +2949,11 @@ def check_climatological_statistics(self, ds): if hasattr(clim_coord_var, "bounds"): climatology_result.out_of += 1 climatology_ctx.messages.append( - f"Variable {clim_coord_var.name} has a climatology " - "attribute and cannot also have a bounds attribute." - ) + f"Variable {clim_coord_var.name} has a climatology " + "attribute and cannot also have a bounds attribute." + ) result = Result( - BaseCheck.MEDIUM, False, (self.section_titles["7.4"]), - reasoning + BaseCheck.MEDIUM, False, (self.section_titles["7.4"]), reasoning ) # IMPLEMENTATION CONFORMANCE 7.4 REQUIRED 2/6 @@ -2932,33 +2968,32 @@ def check_climatological_statistics(self, ds): else: # IMPLEMENTATION CONFORMANCE 7.4 REQUIRED 4/6 clim_var = ds.variables[clim_coord_var.climatology] - if clim_var.dtype is str or not np.issubdtype(clim_var, - np.number): + if clim_var.dtype is str or not np.issubdtype(clim_var, np.number): climatology_ctx.out_of += 1 climatology_ctx.messages.append( f"Climatology variable {clim_var.name} is not a numeric type" - ) + ) # IMPLEMENTATION CONFORMANCE REQUIRED 6/6 - if (hasattr(clim_var, "_FillValue") or - hasattr(clim_var, "missing_value")): + if hasattr(clim_var, "_FillValue") or hasattr( + clim_var, "missing_value" + ): climatology_ctx.out_of += 1 climatology_ctx.messages.append( f"Climatology variable {clim_var.name} may not contain" "attributes _FillValue or missing_value" - ) + ) # IMPLEMENTATION CONFORMANCE 7.4 REQUIRED 5/6 for same_attr in ("units", "standard_name", "calendar"): if hasattr(clim_var, same_attr): - climatology_ctx.assert_true(getattr(clim_var, same_attr) == - getattr(clim_coord_var, same_attr, - None), - f"Attribute {same_attr} must have the same value in both " - "variables {clim_var.name} and {clim_coord_var.name}") + climatology_ctx.assert_true( + getattr(clim_var, same_attr) + == getattr(clim_coord_var, same_attr, None), + f"Attribute {same_attr} must have the same value in both " + "variables {clim_var.name} and {clim_coord_var.name}", + ) ret_val.append(climatology_ctx.to_result()) - - # check that coordinate bounds are in the proper order. # make sure last elements are boundary variable specific dimensions # IMPLEMENTATION CONFORMANCE 7.4 REQUIRED 3/6 @@ -2984,11 +3019,14 @@ def check_climatological_statistics(self, ds): # IMPLEMENTATION CONFORMANCE 7.4 REQUIRED 3/6 - dim size of 2 for # climatology-specific dimension - elif (ds.dimensions[ + elif ( + ds.dimensions[ ds.variables[clim_coord_var.climatology].dimensions[-1] - ].size != 2): + ].size + != 2 + ): reasoning.append( - "Climatology dimension \"{}\" should only contain two elements".format( + 'Climatology dimension "{}" should only contain two elements'.format( ds.variables[clim_coord_var.climatology].name ) ) @@ -3124,7 +3162,9 @@ def check_packed_data(self, ds): valid = False # IMPLEMENTATION CONFORMANCE REQUIRED 3/3 # IMPLEMENTATION CONFORMANCE REQUIRED 3/3 - reasoning.append("Variable is not of type byte, short, or int as required for different type add_offset/scale_factor.") + reasoning.append( + "Variable is not of type byte, short, or int as required for different type add_offset/scale_factor." + ) result = Result( BaseCheck.MEDIUM, valid, self.section_titles["8.1"], reasoning diff --git a/compliance_checker/cf/cf_1_7.py b/compliance_checker/cf/cf_1_7.py index 28c5a381..ebf0486b 100644 --- a/compliance_checker/cf/cf_1_7.py +++ b/compliance_checker/cf/cf_1_7.py @@ -33,6 +33,7 @@ logger = logging.getLogger(__name__) + class CF1_7Check(CF1_6Check): """Implementation for CF v1.7. Inherits from CF1_6Check as most of the checks are the same.""" @@ -87,21 +88,21 @@ def check_external_variables(self, ds): :param netCDF4.Dataset ds: An open netCDF dataset :rtype: compliance_checker.base.Result """ - external_vars_ctx = TestCtx( - BaseCheck.MEDIUM, self.section_titles["2.6.3"] - ) + external_vars_ctx = TestCtx(BaseCheck.MEDIUM, self.section_titles["2.6.3"]) # IMPLEMENTATION CONFORMANCE 2.6.3 REQUIRED 2/2 try: external_var_names = set(ds.external_variables.strip().split()) - bad_external_var_names = (external_var_names.intersection(ds.variables)) + bad_external_var_names = external_var_names.intersection(ds.variables) if bad_external_var_names: external_vars_ctx.out_of += 1 - bad_msg = ("Global attribute external_variables should not " - "have any variable names which are present in the dataset. " - "Currently, the following names appear in both external_variables " - f"and the dataset's variables: {bad_external_var_names}") + bad_msg = ( + "Global attribute external_variables should not " + "have any variable names which are present in the dataset. " + "Currently, the following names appear in both external_variables " + f"and the dataset's variables: {bad_external_var_names}" + ) external_vars_ctx.messages.append(bad_msg) # string/global attributes are handled in Appendix A checks @@ -110,9 +111,6 @@ def check_external_variables(self, ds): return external_vars_ctx.to_result() - - - def check_actual_range(self, ds): """ Check the actual_range attribute of variables. As stated in @@ -794,8 +792,7 @@ def check_grid_mapping(self, ds): ) elif len_vdatum_name_attrs == 1: # should be one or zero attrs - proj_db_path = os.path.join(pyproj.datadir.get_data_dir(), - "proj.db") + proj_db_path = os.path.join(pyproj.datadir.get_data_dir(), "proj.db") try: with sqlite3.connect(proj_db_path) as conn: v_datum_attr = next(iter(vert_datum_attrs)) @@ -830,7 +827,9 @@ def check_standard_name_deprecated_modifiers(self, ds): """ deprecated_var_names = cfutil._find_standard_name_modifier_variables(ds, True) if deprecated_var_names: - warn(f"Deprecated standard_name modifiers found on variables {deprecated_var_names}") + warn( + f"Deprecated standard_name modifiers found on variables {deprecated_var_names}" + ) def _process_v_datum_str(self, v_datum_str, conn): vdatum_query = """SELECT 1 FROM alias_name WHERE diff --git a/compliance_checker/cf/cf_1_8.py b/compliance_checker/cf/cf_1_8.py index bcfd12e4..f89ff43f 100644 --- a/compliance_checker/cf/cf_1_8.py +++ b/compliance_checker/cf/cf_1_8.py @@ -11,8 +11,13 @@ from compliance_checker.cf.util import reference_attr_variables, string_from_var_type import itertools import warnings -from shapely.geometry import (MultiPoint, LineString, MultiLineString, Polygon, - MultiPolygon) +from shapely.geometry import ( + MultiPoint, + LineString, + MultiLineString, + Polygon, + MultiPolygon, +) from compliance_checker.cf.util import reference_attr_variables """ @@ -40,11 +45,13 @@ class CF1_8Check(CF1_7Check): def __init__(self, options=None): super(CF1_8Check, self).__init__(options) - self.section_titles.update({"2.7": - "§2.7 Groups", - "6.1.2": - "§6.1.2 Taxon Names and Identifiers", - "7.5": "§7.5 Geometries"}) + self.section_titles.update( + { + "2.7": "§2.7 Groups", + "6.1.2": "§6.1.2 Taxon Names and Identifiers", + "7.5": "§7.5 Geometries", + } + ) def check_groups(self, ds: MemoizedDataset): """ @@ -120,7 +127,8 @@ def check_geometry(self, ds: Dataset): :returns list: List of error messages """ vars_with_geometry = ds.get_variables_by_attributes( - geometry=lambda g: g is not None) + geometry=lambda g: g is not None + ) results = [] unique_geometry_var_names = {var.geometry for var in vars_with_geometry} if unique_geometry_var_names: @@ -128,8 +136,9 @@ def check_geometry(self, ds: Dataset): geom_valid.out_of += 1 for geometry_var_name in unique_geometry_var_names: if geometry_var_name not in ds.variables: - geom_valid.messages.append("Cannot find geometry variable " - f"named {geometry_var_name}") + geom_valid.messages.append( + "Cannot find geometry variable " f"named {geometry_var_name}" + ) results.append(geom_valid.to_result()) continue else: @@ -140,15 +149,18 @@ def check_geometry(self, ds: Dataset): try: node_coord_var_names = geometry_var.node_coordinates except AttributeError as e: - geom_valid.messages.append('Could not find required attribute ' - '"node_coordinates" in geometry ' - f'variable "{geometry_var_name}"') + geom_valid.messages.append( + "Could not find required attribute " + '"node_coordinates" in geometry ' + f'variable "{geometry_var_name}"' + ) results.append(geom_valid.to_result()) if not isinstance(node_coord_var_names, str): geom_valid.messages.append( - 'Attribute "node_coordinates" in geometry ' - f'variable "{geometry_var_name}" must be ' - 'a string') + 'Attribute "node_coordinates" in geometry ' + f'variable "{geometry_var_name}" must be ' + "a string" + ) results.append(geom_valid.to_result()) continue split_coord_names = node_coord_var_names.strip().split(" ") @@ -161,38 +173,42 @@ def check_geometry(self, ds: Dataset): # If any variables weren't found, we can't continue if not_found_node_vars: geom_valid.messages.append( - "The following referenced node coordinate" - "variables for geometry variable" - f'"{geometry_var_name}" were not found: ' - f'{not_found_node_vars}') + "The following referenced node coordinate" + "variables for geometry variable" + f'"{geometry_var_name}" were not found: ' + f"{not_found_node_vars}" + ) results.append(geom_valid.to_result()) continue return error_msgs - node_count = reference_attr_variables(ds, - getattr(geometry_var, "node_count", None)) + node_count = reference_attr_variables( + ds, getattr(geometry_var, "node_count", None) + ) # multipart lines and polygons only - part_node_count = reference_attr_variables(ds, - getattr(geometry_var, "part_node_count", None)) + part_node_count = reference_attr_variables( + ds, getattr(geometry_var, "part_node_count", None) + ) # polygons with interior geometry only - interior_ring = reference_attr_variables(ds, - getattr(geometry_var, "interior_ring", None)) + interior_ring = reference_attr_variables( + ds, getattr(geometry_var, "interior_ring", None) + ) if geometry_type == "point": geometry = PointGeometry(node_coord_vars, node_count) elif geometry_type == "line": - geometry = LineGeometry(node_coord_vars, node_count, - part_node_count) + geometry = LineGeometry(node_coord_vars, node_count, part_node_count) elif geometry_type == "polygon": - geometry = PolygonGeometry(node_coord_vars, node_count, - part_node_count, - interior_ring) + geometry = PolygonGeometry( + node_coord_vars, node_count, part_node_count, interior_ring + ) else: geom_valid.messages.append( - f'For geometry variable "{geometry_var_name}' - 'the attribute "geometry_type" must exist' - 'and have one of the following values:' - '"point", "line", "polygon"') + f'For geometry variable "{geometry_var_name}' + 'the attribute "geometry_type" must exist' + "and have one of the following values:" + '"point", "line", "polygon"' + ) results.append(geom_valid.to_result()) continue # check geometry @@ -237,10 +253,12 @@ def check_taxa(self, ds: Dataset): ret_val = [] # taxa identification variables taxa_name_variables = ds.get_variables_by_attributes( - standard_name="biological_taxon_name") + standard_name="biological_taxon_name" + ) taxa_lsid_variables = ds.get_variables_by_attributes( - standard_name="biological_taxon_identifier") - + standard_name="biological_taxon_identifier" + ) + def match_taxa_standard_names(standard_name_string): """ Match variables which are standard_names related to taxa, but @@ -483,10 +501,10 @@ def __init__(self, coord_vars, node_count): def _split_mulitpart_geometry(self): arr_extents_filt = self.part_node_count[self.part_node_count > 0] - splits = np.split(np.vstack(self.coord_vars).T, - arr_extents_filt.cumsum()[:-1]) + splits = np.split(np.vstack(self.coord_vars).T, arr_extents_filt.cumsum()[:-1]) return splits + class PointGeometry(GeometryStorage): """Class for validating Point/MultiPoint geometries""" @@ -499,19 +517,21 @@ def check_geometry(self): expected_node_count = self.node_count if all(len(cv.dimensions) != 0 for cv in self.coord_vars): - same_dim_group = itertools.groupby(self.coord_vars, - lambda x: x.dimensions) - same_dim = (next(same_dim_group, True) and - not next(same_dim_group, False)) + same_dim_group = itertools.groupby(self.coord_vars, lambda x: x.dimensions) + same_dim = next(same_dim_group, True) and not next(same_dim_group, False) if not same_dim: - self.errors.append("For a point geometry, coordinate " - "variables must be the same length as " - "node_count defined, or must be " - "length 1 if node_count is not set") + self.errors.append( + "For a point geometry, coordinate " + "variables must be the same length as " + "node_count defined, or must be " + "length 1 if node_count is not set" + ) return self.errors + class LineGeometry(GeometryStorage): """Class for validating Line/MultiLine geometries""" + def __init__(self, coord_vars, node_count, part_node_count): super().__init__(coord_vars, node_count) self.part_node_count = part_node_count @@ -520,40 +540,46 @@ def __init__(self, coord_vars, node_count, part_node_count): def check_geometry(self): geom_errors = [] - same_dim_group = itertools.groupby(self.coord_vars, - lambda x: x.dimensions) - same_dim = (next(same_dim_group, True) and - not next(same_dim_group, False)) + same_dim_group = itertools.groupby(self.coord_vars, lambda x: x.dimensions) + same_dim = next(same_dim_group, True) and not next(same_dim_group, False) if not same_dim: - raise IndexError("Coordinate variables must be the same length. " - "If node_count is specified, this value must " - "also sum to the length of the coordinate " - "variables.") + raise IndexError( + "Coordinate variables must be the same length. " + "If node_count is specified, this value must " + "also sum to the length of the coordinate " + "variables." + ) # if a multipart if self.node_count is not None: same_length = len(self.coord_vars[0]) == self.node_count[:].sum() if not same_length: - geom_errors.append("Coordinate variables must be the same " - "length. If node_count is specified, this " - "value must also sum to the length of the " - "coordinate variables.") + geom_errors.append( + "Coordinate variables must be the same " + "length. If node_count is specified, this " + "value must also sum to the length of the " + "coordinate variables." + ) if self.part_node_count is not None: if not np.issubdtype(self.part_node_count.dtype, np.integer): - geom_errors.append("when part_node_count is specified, it must " - "be an array of integers") + geom_errors.append( + "when part_node_count is specified, it must " + "be an array of integers" + ) same_node_count = len(self.coord_vars[0]) == self.node_count[:].sum() if not same_node_count: - geom_errors.append("The sum of part_node_count must be equal " - "to the value of node_count") + geom_errors.append( + "The sum of part_node_count must be equal " + "to the value of node_count" + ) return geom_errors class PolygonGeometry(LineGeometry): """Class for validating Line/MultiLine geometries""" + # TODO/clarify: Should polygons be simple, i.e. non-self intersecting? # Presumably - def __init__(self, coord_vars, node_count, part_node_count, - interior_ring): + def __init__(self, coord_vars, node_count, part_node_count, interior_ring): super().__init__(coord_vars, node_count, part_node_count) self.part_node_count = part_node_count self.interior_ring = interior_ring @@ -578,7 +604,9 @@ def check_polygon_orientation(self, transposed_coords, interior=False): try: polygon = Polygon(transposed_coords.tolist()) except ValueError: - raise ValueError("Polygon contains too few points to perform orientation test") + raise ValueError( + "Polygon contains too few points to perform orientation test" + ) ccw = polygon.exterior.is_ccw return not ccw if interior else ccw @@ -590,8 +618,7 @@ def check_geometry(self): if messages: return messages if self.part_node_count is not None: - extents = np.concatenate([np.array([0]), - self.part_node_count[:].cumsum()]) + extents = np.concatenate([np.array([0]), self.part_node_count[:].cumsum()]) if self.interior_ring is not None: ring_orientation = self.interior_ring[:].astype(bool) else: @@ -599,8 +626,7 @@ def check_geometry(self): current_node_count = self.node_count[:].copy() node_indexer_len = len(self.part_node_count) else: - extents = np.concatenate([np.array([0]), - self.node_count[:].cumsum()]) + extents = np.concatenate([np.array([0]), self.node_count[:].cumsum()]) node_indexer_len = len(self.node_count) ring_orientation = np.zeros(node_indexer_len, dtype=bool) # TODO: is it necessary to check whether part_node_count "consumes" @@ -608,19 +634,22 @@ def check_geometry(self): # a node part of 9, follow by next 3 will consume a node part of # 3 after consuming for i in range(node_indexer_len): - extent_slice = slice(extents[i], extents[i+1]) - poly_sliced = np.vstack([cv[extent_slice] for cv in - self.coord_vars]).T - pass_orientation = (self.check_polygon_orientation( - poly_sliced, - ring_orientation[i])) + extent_slice = slice(extents[i], extents[i + 1]) + poly_sliced = np.vstack([cv[extent_slice] for cv in self.coord_vars]).T + pass_orientation = self.check_polygon_orientation( + poly_sliced, ring_orientation[i] + ) if not pass_orientation: - orient_fix = (("exterior", "counterclockwise") - if not ring_orientation[i] else - ("interior", "clockwise")) - message = (f"An {orient_fix[0]} polygon referred to by " - f"coordinates ({poly_sliced}) must have coordinates " - f"in {orient_fix[1]} order") + orient_fix = ( + ("exterior", "counterclockwise") + if not ring_orientation[i] + else ("interior", "clockwise") + ) + message = ( + f"An {orient_fix[0]} polygon referred to by " + f"coordinates ({poly_sliced}) must have coordinates " + f"in {orient_fix[1]} order" + ) messages.append(message) return messages diff --git a/compliance_checker/cf/cf_base.py b/compliance_checker/cf/cf_base.py index 9dafad2d..58628215 100644 --- a/compliance_checker/cf/cf_base.py +++ b/compliance_checker/cf/cf_base.py @@ -33,6 +33,7 @@ logger = logging.getLogger(__name__) + class CFBaseCheck(BaseCheck): """ CF Convention Checker Base @@ -1277,6 +1278,7 @@ class CFNCCheck(BaseNCCheck, CFBaseCheck): "valid_range": {"Type": "N", "attr_loc": {"D", "C"}, "cf_section": None}, } + class CFNCCheck(BaseNCCheck, CFBaseCheck): @classmethod def beliefs(cls): # @TODO diff --git a/compliance_checker/cf/util.py b/compliance_checker/cf/util.py index 6050dd67..e36966fe 100644 --- a/compliance_checker/cf/util.py +++ b/compliance_checker/cf/util.py @@ -235,6 +235,7 @@ def get_safe(dict_instance, keypath, default=None): except Exception: return default + class VariableReferenceError(Exception): """A variable to assign bad variable references to""" @@ -581,6 +582,7 @@ def is_vertical_coordinate(var_name, var): satisfied |= getattr(var, "positive", "").lower() in ("up", "down") return satisfied + def compare_unit_types(specified, reference): """ Compares two unit strings via UDUnits diff --git a/compliance_checker/cfutil.py b/compliance_checker/cfutil.py index 10191706..da5f8390 100644 --- a/compliance_checker/cfutil.py +++ b/compliance_checker/cfutil.py @@ -110,10 +110,11 @@ def is_dimensionless_standard_name(standard_name_table, standard_name): # standard_name must be string, so if it is not, it is *wrong* by default if not isinstance(standard_name, str): return False - found_standard_name = standard_name_table.find(".//entry[@id='{}']".format(standard_name)) + found_standard_name = standard_name_table.find( + ".//entry[@id='{}']".format(standard_name) + ) if found_standard_name is not None: - canonical_units = (Unit(found_standard_name.find("canonical_units"). - text)) + canonical_units = Unit(found_standard_name.find("canonical_units").text) return canonical_units.is_dimensionless() # if the standard name is not found, assume we need units for the time being else: @@ -374,14 +375,13 @@ def get_cell_boundary_variables(ds): boundary_variables.append(var.bounds) return boundary_variables + @lru_cache(128) def get_bounds_variables(ds): - contains_bounds = ds.get_variables_by_attributes(bounds= - lambda s: s in ds.variables) + contains_bounds = ds.get_variables_by_attributes(bounds=lambda s: s in ds.variables) return {ds.variables[parent_var.bounds] for parent_var in contains_bounds} - @lru_cache(128) def get_geophysical_variables(ds): """ @@ -392,8 +392,7 @@ def get_geophysical_variables(ds): """ parameters = [] for variable in ds.variables: - if (is_geophysical(ds, variable) and - variable not in get_bounds_variables(ds)): + if is_geophysical(ds, variable) and variable not in get_bounds_variables(ds): parameters.append(variable) return parameters @@ -742,11 +741,18 @@ def match_modifier_variables(standard_name_str): if not return_deprecated: matches = re.search(r"^\w+ +\w+", standard_name_str) else: - matches = re.search(r"^\w+ +(?:status_flag|number_of_observations)$", standard_name_str) + matches = re.search( + r"^\w+ +(?:status_flag|number_of_observations)$", standard_name_str + ) return bool(matches) - return [var.name for var in - ds.get_variables_by_attributes(standard_name= - match_modifier_variables)] + + return [ + var.name + for var in ds.get_variables_by_attributes( + standard_name=match_modifier_variables + ) + ] + def get_flag_variables(ds): """ diff --git a/compliance_checker/ioos.py b/compliance_checker/ioos.py index 6a215228..c00985ea 100644 --- a/compliance_checker/ioos.py +++ b/compliance_checker/ioos.py @@ -935,7 +935,7 @@ def check_cf_role_variables(self, ds): [ ( f"Invalid featureType '{feature_type_attr}'; please see the " - "IOOS 1.2 Profile and CF-1.7 Conformance documents for valid featureType" + "IOOS 1.2 Profile and CF-1.7 Conformance documents for valid featureType" ) ], ) @@ -959,9 +959,8 @@ def check_cf_role_variables(self, ds): elif feature_type == "point": return Result( - BaseCheck.MEDIUM, - True, - "CF DSG: featureType=trajectoryProfile") + BaseCheck.MEDIUM, True, "CF DSG: featureType=trajectoryProfile" + ) else: return Result( @@ -971,8 +970,8 @@ def check_cf_role_variables(self, ds): [ ( f"Invalid featureType '{feature_type_attr}'; " - "please see the IOOS 1.2 Profile and CF-1.7 " - "Conformance documents for valid featureType" + "please see the IOOS 1.2 Profile and CF-1.7 " + "Conformance documents for valid featureType" ) ], ) diff --git a/compliance_checker/runner.py b/compliance_checker/runner.py index 16f2cb15..9755318d 100644 --- a/compliance_checker/runner.py +++ b/compliance_checker/runner.py @@ -77,8 +77,7 @@ def run_checker( for loc in locs: # loop through each dataset and run specified checks ds = cs.load_dataset(loc) - score_groups = cs.run_all(ds, checker_names, include_checks, - skip_checks) + score_groups = cs.run_all(ds, checker_names, include_checks, skip_checks) for group in score_groups.values(): all_groups.append(group[0]) # TODO: consider wrapping in a proper context manager instead @@ -131,8 +130,7 @@ def run_checker( output_filename = "{}.json".format( os.path.splitext(output_filename)[0] ) - cls.json_output(cs, score_dict, output_filename, ds_loc, limit, - out_fmt) + cls.json_output(cs, score_dict, output_filename, ds_loc, limit, out_fmt) else: raise TypeError("Invalid format %s" % out_fmt) diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index ad02436f..453f57b2 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -92,8 +92,7 @@ def _print_suites(self, verbose=0): :type verbose: int """ for checker in sorted(self.checkers.keys()): - version = getattr(self.checkers[checker], "_cc_checker_version", - "???") + version = getattr(self.checkers[checker], "_cc_checker_version", "???") if verbose > 0: print(" - {} (v{})".format(checker, version)) elif ":" in checker and not checker.endswith( @@ -109,8 +108,7 @@ def _print_checker(self, checker_obj): :type checker_obj: subclass of compliance_checker.base.BaseChecker """ - check_functions = self._get_checks(checker_obj, - defaultdict(lambda: None)) + check_functions = self._get_checks(checker_obj, defaultdict(lambda: None)) for c, _ in check_functions: print("- {}".format(c.__name__)) if c.__doc__ is not None: @@ -184,8 +182,7 @@ def _load_checkers(cls, checkers): print("Could not load", c, ":", e, file=sys.stderr) # find the latest version of versioned checkers and set that as the # default checker for compliance checker if no version is specified - ver_checkers = sorted([c.split(":", 1) for c in cls.checkers if ":" - in c]) + ver_checkers = sorted([c.split(":", 1) for c in cls.checkers if ":" in c]) for spec, versions in itertools.groupby(ver_checkers, itemgetter(0)): version_nums = [v[-1] for v in versions] try: @@ -219,12 +216,14 @@ def _get_checks(self, checkclass, include_checks, skip_checks): if include_checks: for fn_name, fn_obj in meths: if fn_name in include_checks: - returned_checks.append((fn_obj, skip_checks[fn_name])) + returned_checks.append((fn_obj, skip_checks[fn_name])) else: for fn_name, fn_obj in meths: - if (fn_name.startswith("check_") and - skip_checks[fn_name] != BaseCheck.HIGH): - returned_checks.append((fn_obj, skip_checks[fn_name])) + if ( + fn_name.startswith("check_") + and skip_checks[fn_name] != BaseCheck.HIGH + ): + returned_checks.append((fn_obj, skip_checks[fn_name])) return returned_checks @@ -332,8 +331,7 @@ def _process_skip_checks(cls, skip_checks): check_dict = defaultdict(lambda: None) # A is for "all", "M" is for medium, "L" is for low - check_lookup = {"A": BaseCheck.HIGH, "M": BaseCheck.MEDIUM, - "L": BaseCheck.LOW} + check_lookup = {"A": BaseCheck.HIGH, "M": BaseCheck.MEDIUM, "L": BaseCheck.LOW} for skip_check_spec in skip_checks: split_check_spec = skip_check_spec.split(":") @@ -372,8 +370,7 @@ def _process_skip_checks(cls, skip_checks): return check_dict def run(self, ds, skip_checks, *checker_names): - warnings.warn("suite.run is deprecated, use suite.run_all in calls " - "instead") + warnings.warn("suite.run is deprecated, use suite.run_all in calls " "instead") return self.run_all(ds, checker_names, skip_checks=skip_checks) def run_all(self, ds, checker_names, include_checks=None, skip_checks=None): @@ -686,8 +683,7 @@ def weight_sort(result): # create dict of the groups -> {level: [reasons]} result = { key: [v for v in valuesiter if v.value[0] != v.value[1]] - for key, valuesiter in itertools.groupby(groups_sorted, - key=weight_sort) + for key, valuesiter in itertools.groupby(groups_sorted, key=weight_sort) } priorities = self.checkers[check]._cc_display_headers @@ -732,8 +728,7 @@ def process_table(res, check): print("{:^{width}}".format(level_name, width=width)) print("-" * width) - data_issues = [process_table(res, check) for res in - result[level]] + data_issues = [process_table(res, check) for res in result[level]] has_printed = False for issue, reasons in data_issues: @@ -743,8 +738,7 @@ def process_table(res, check): print("") # join alphabetized reasons together reason_str = "\n".join( - "* {}".format(r) for r in sorted(reasons, - key=lambda x: x[0]) + "* {}".format(r) for r in sorted(reasons, key=lambda x: x[0]) ) proc_str = "{}\n{}".format(issue, reason_str) print(proc_str) diff --git a/compliance_checker/tests/helpers.py b/compliance_checker/tests/helpers.py index cfe3eb6d..a49e6b32 100644 --- a/compliance_checker/tests/helpers.py +++ b/compliance_checker/tests/helpers.py @@ -31,12 +31,14 @@ def __init__(self, filename=None, default_fill_value=None): super(MockTimeSeries, self).__init__(filename) self.createDimension("time", 500) for name, std_name, units in ( - ("time", "time", "seconds since 1970-01-01"), - ("lon", "longitude", "degrees_east"), - ("lat", "latitude", "degrees_north"), - ("depth", "depth", "m")): - var = self.createVariable(name, "d", ("time",), - fill_value=default_fill_value) + ("time", "time", "seconds since 1970-01-01"), + ("lon", "longitude", "degrees_east"), + ("lat", "latitude", "degrees_north"), + ("depth", "depth", "m"), + ): + var = self.createVariable( + name, "d", ("time",), fill_value=default_fill_value + ) var.standard_name = std_name var.units = units @@ -72,11 +74,9 @@ def __init__(self, copy_var=None): def __getitem__(self, idx): return self._arr[idx] - def __setitem__(self, idx, val): self._arr[idx] = val - def ncattrs(self): return [ att diff --git a/compliance_checker/tests/test_cf.py b/compliance_checker/tests/test_cf.py index c95c4e84..87779417 100644 --- a/compliance_checker/tests/test_cf.py +++ b/compliance_checker/tests/test_cf.py @@ -33,8 +33,11 @@ ) from compliance_checker.suite import CheckSuite from compliance_checker.tests import BaseTestCase -from compliance_checker.tests.helpers import (MockRaggedArrayRepr, - MockTimeSeries, MockVariable) +from compliance_checker.tests.helpers import ( + MockRaggedArrayRepr, + MockTimeSeries, + MockVariable, +) import requests_mock import json import re @@ -271,7 +274,7 @@ def test_naming_conventions(self): assert len(results) == 3 assert scored < out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 2 - assert all(r.name == u"§2.3 Naming Conventions" for r in results) + assert all(r.name == "§2.3 Naming Conventions" for r in results) # another non-compliant dataset dataset = self.load_dataset(STATIC_FILES["chap2"]) @@ -280,7 +283,7 @@ def test_naming_conventions(self): assert len(results) == 3 assert scored < out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 2 - assert all(r.name == u"§2.3 Naming Conventions" for r in results) + assert all(r.name == "§2.3 Naming Conventions" for r in results) def test_check_names_unique(self): """ @@ -300,7 +303,7 @@ def test_check_names_unique(self): assert result.value == (6, 7) assert ( result.msgs[0] - == u"Variables are not case sensitive. Duplicate variables named: not_unique" + == "Variables are not case sensitive. Duplicate variables named: not_unique" ) def test_check_dimension_names(self): @@ -316,7 +319,7 @@ def test_check_dimension_names(self): dataset = self.load_dataset(STATIC_FILES["chap2"]) result = self.cf.check_dimension_names(dataset) - assert result.msgs[0] == u"no_reason has two or more dimensions named time" + assert result.msgs[0] == "no_reason has two or more dimensions named time" def test_check_dimension_order(self): """ @@ -330,7 +333,7 @@ def test_check_dimension_order(self): result = self.cf.check_dimension_order(dataset) assert result.value == (5, 6) assert result.msgs[0] == ( - u"really_bad's spatio-temporal dimensions are not in the " + "really_bad's spatio-temporal dimensions are not in the " "recommended order T, Z, Y, X and/or further dimensions are not " "located left of T, Z, Y, X. The dimensions (and their guessed " "types) are latitude (Y), power (U) (with U: other/unknown; L: " @@ -350,7 +353,7 @@ def test_check_fill_value_outside_valid_range(self): dataset = self.load_dataset(STATIC_FILES["bad_data_type"]) result = self.cf.check_fill_value_outside_valid_range(dataset) assert result.msgs[0] == ( - u"salinity:_FillValue (1.0) should be outside the " + "salinity:_FillValue (1.0) should be outside the " "range specified by valid_min/valid_max (-10, 10)" ) @@ -358,7 +361,7 @@ def test_check_fill_value_outside_valid_range(self): result = self.cf.check_fill_value_outside_valid_range(dataset) assert result.value == (1, 2) assert result.msgs[0] == ( - u"wind_speed:_FillValue (12.0) should be outside the " + "wind_speed:_FillValue (12.0) should be outside the " "range specified by valid_min/valid_max (0.0, 20.0)" ) @@ -384,7 +387,7 @@ def test_check_conventions_are_cf_16(self): result = self.cf.check_conventions_version(dataset) self.assertFalse(result.value) assert result.msgs[0] == ( - u"§2.6.1 Conventions global attribute does not contain " '"CF-1.6"' + "§2.6.1 Conventions global attribute does not contain " '"CF-1.6"' ) def test_check_convention_globals(self): @@ -405,7 +408,7 @@ def test_check_convention_globals(self): assert result.value[0] != result.value[1] assert ( result.msgs[0] - == u"§2.6.2 global attribute title should exist and be a non-empty string" + == "§2.6.2 global attribute title should exist and be a non-empty string" ) def test_check_convention_possibly_var_attrs(self): @@ -433,7 +436,7 @@ def test_check_convention_possibly_var_attrs(self): assert result.value[0] != result.value[1] assert ( result.msgs[0] - == u"§2.6.2 references global attribute should be a non-empty string" + == "§2.6.2 references global attribute should be a non-empty string" ) # load bad_data_type.nc @@ -447,8 +450,7 @@ def test_check_convention_possibly_var_attrs(self): assert result.value[0] != result.value[1] assert ( - result.msgs[0] - == u"§2.6.2 salinity:institution should be a non-empty string" + result.msgs[0] == "§2.6.2 salinity:institution should be a non-empty string" ) def test_check_standard_name(self): @@ -467,12 +469,10 @@ def test_check_standard_name(self): results = self.cf.check_standard_name(dataset) score, out_of, messages = get_results(results) - - # 9 vars checked, 8 fail assert len(results) == 9 assert score < out_of - assert all(r.name == u"§3.3 Standard Name" for r in results) + assert all(r.name == "§3.3 Standard Name" for r in results) # check recommendations with a misspelled standard name dataset = MockTimeSeries() @@ -496,13 +496,14 @@ def test_check_standard_name(self): temperature.standard_name = "sea_water_temperature" temperature.ancillary_variables = "temperature_flag" - temperature_flag = dataset.createVariable("temperature_flag", "i2", - ("time",)) + temperature_flag = dataset.createVariable("temperature_flag", "i2", ("time",)) # bad modifier temperature_flag.standard_name = "sea_water_temperature status flag" _, _, messages = get_results(self.cf.check_standard_name(dataset)) - assert ('Standard name modifier "status flag" for variable temperature_flag is not a valid modifier according to CF Appendix C' - in messages) + assert ( + 'Standard name modifier "status flag" for variable temperature_flag is not a valid modifier according to CF Appendix C' + in messages + ) # proper name, units supplied temperature_flag.standard_name = "sea_water_temperature status_flag" temperature_flag.units = "1" @@ -512,8 +513,10 @@ def test_check_standard_name(self): # long_name or standard_name present del temperature.standard_name _, _, messages = get_results(self.cf.check_standard_name(dataset)) - assert ("Attribute long_name or/and standard_name is highly " - "recommended for variable temperature" in messages) + assert ( + "Attribute long_name or/and standard_name is highly " + "recommended for variable temperature" in messages + ) def test_cell_bounds(self): dataset = self.load_dataset(STATIC_FILES["grid-boundaries"]) @@ -563,7 +566,7 @@ def test_cell_measures(self): dataset = self.load_dataset(STATIC_FILES["bad_cell_measure2"]) results = self.cf.check_cell_measures(dataset) score, out_of, messages = get_results(results) - message = u"Cell measure variable box_area referred to by PS is not present in dataset variables" + message = "Cell measure variable box_area referred to by PS is not present in dataset variables" assert message in messages def test_climatology_cell_methods(self): @@ -619,8 +622,10 @@ def test_climatology_cell_methods(self): temp.climatology = "clim_bounds" results = self.cf.check_climatological_statistics(bad_dim_ds) assert results[1].value[0] < results[1].value[1] - assert (results[1].msgs[0] == 'Climatology dimension "clim_bounds" ' - "should only contain two elements") + assert ( + results[1].msgs[0] == 'Climatology dimension "clim_bounds" ' + "should only contain two elements" + ) def test_check_ancillary_variables(self): """ @@ -630,15 +635,15 @@ def test_check_ancillary_variables(self): dataset = self.load_dataset(STATIC_FILES["rutgers"]) results = self.cf.check_ancillary_variables(dataset) result_dict = {result.name: result for result in results} - result = result_dict[u"§3.4 Ancillary Data"] + result = result_dict["§3.4 Ancillary Data"] assert result.value == (2, 2) dataset = self.load_dataset(STATIC_FILES["bad_reference"]) results = self.cf.check_ancillary_variables(dataset) result_dict = {result.name: result for result in results} - result = result_dict[u"§3.4 Ancillary Data"] + result = result_dict["§3.4 Ancillary Data"] assert result.value == (1, 2) - assert u"temp_qc is not a variable in this dataset" == result.msgs[0] + assert "temp_qc is not a variable in this dataset" == result.msgs[0] def test_download_standard_name_table(self): """ @@ -691,7 +696,9 @@ def test_check_flags(self): # only 4 variables in this dataset do not have perfect scores imperfect = [r.value for r in results if r.value[0] < r.value[1]] assert len(imperfect) == 4 - dataset.variables["conductivity_qc"] = MockVariable(dataset.variables["conductivity_qc"]) + dataset.variables["conductivity_qc"] = MockVariable( + dataset.variables["conductivity_qc"] + ) # Test with single element. Will fail, but should not throw exception. dataset.variables["conductivity_qc"].flag_values = np.array([1], dtype=np.int8) results = self.cf.check_flags(dataset) @@ -717,15 +724,19 @@ def test_check_flag_masks(self): flags_var.flag_masks = np.array([0, 1], dtype="i2") results = self.cf.check_flags(dataset) score, out_of, messages = get_results(results) - assert ("flag_masks for variable flags must not contain zero as an " - "element" in messages) + assert ( + "flag_masks for variable flags must not contain zero as an " + "element" in messages + ) # IMPLEMENTATION 3.5 REQUIRED 1/1 flags_var.flag_masks = np.array([1], dtype="i2") flags_var.flag_values = np.array([2], dtype="i2") results = self.cf.check_flags(dataset) score, out_of, messages = get_results(results) - assert ("flag masks and flag values for 'flags' combined don't equal " - "flag values" in messages) + assert ( + "flag masks and flag values for 'flags' combined don't equal " + "flag values" in messages + ) def test_check_bad_units(self): """Load a dataset with units that are expected to fail (bad_units.nc). @@ -749,12 +760,10 @@ def test_check_bad_units(self): results_list = list(chain(*(r.msgs for r in all_results if r.msgs))) # check the results only have '§3.1 Units' as the header - assert all(r.name == u"§3.1 Units" for r in all_results) + assert all(r.name == "§3.1 Units" for r in all_results) # check that all the expected variables have been hit - assert all( - any(s in msg for msg in results_list) for s in ["time", "lev"] - ) + assert all(any(s in msg for msg in results_list) for s in ["time", "lev"]) def test_latitude(self): """ @@ -773,7 +782,7 @@ def test_latitude(self): assert len(results) == 12 assert scored < out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 3 - assert (r.name == u"§4.1 Latitude Coordinate" for r in results) + assert (r.name == "§4.1 Latitude Coordinate" for r in results) # check with another ds -- all 6 vars checked pass dataset = self.load_dataset(STATIC_FILES["rotated_pole_grid"]) @@ -781,7 +790,7 @@ def test_latitude(self): scored, out_of, messages = get_results(results) assert len(results) == 6 assert scored == out_of - assert (r.name == u"§4.1 Latitude Coordinate" for r in results) + assert (r.name == "§4.1 Latitude Coordinate" for r in results) # hack to avoid writing to read-only file dataset.variables["rlat"] = MockVariable(dataset.variables["rlat"]) @@ -791,7 +800,7 @@ def test_latitude(self): rlat.units = "degrees_north" results = self.cf.check_latitude(dataset) scored, out_of, messages = get_results(results) - wrong_format = u"Grid latitude variable '{}' should use degree equivalent units without east or north components. Current units are {}" + wrong_format = "Grid latitude variable '{}' should use degree equivalent units without east or north components. Current units are {}" self.assertTrue(wrong_format.format(rlat.name, rlat.units) in messages) rlat.units = "radians" results = self.cf.check_latitude(dataset) @@ -815,7 +824,7 @@ def test_longitude(self): assert len(results) == 12 assert scored < out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 3 - assert all(r.name == u"§4.2 Longitude Coordinate" for r in results) + assert all(r.name == "§4.2 Longitude Coordinate" for r in results) # check different dataset # TODO can be improved for check_latitude too dataset = self.load_dataset(STATIC_FILES["rotated_pole_grid"]) @@ -830,7 +839,7 @@ def test_longitude(self): rlon.units = "degrees_east" results = self.cf.check_longitude(dataset) scored, out_of, messages = get_results(results) - wrong_format = u"Grid longitude variable '{}' should use degree equivalent units without east or north components. Current units are {}" + wrong_format = "Grid longitude variable '{}' should use degree equivalent units without east or north components. Current units are {}" self.assertTrue(wrong_format.format(rlon.name, rlon.units) in messages) rlon.units = "radians" results = self.cf.check_longitude(dataset) @@ -888,14 +897,14 @@ def test_vertical_dimension(self): dataset = self.load_dataset(STATIC_FILES["example-grid"]) results = self.cf.check_dimensional_vertical_coordinate(dataset) assert len(results) == 1 - assert all(r.name == u"§4.3 Vertical Coordinate" for r in results) + assert all(r.name == "§4.3 Vertical Coordinate" for r in results) # non-compliance -- one check fails dataset = self.load_dataset(STATIC_FILES["illegal-vertical"]) results = self.cf.check_dimensional_vertical_coordinate(dataset) scored, out_of, messages = get_results(results) assert len(results) == 1 - assert all(r.name == u"§4.3 Vertical Coordinate" for r in results) + assert all(r.name == "§4.3 Vertical Coordinate" for r in results) assert scored < out_of def test_appendix_d(self): @@ -1027,7 +1036,7 @@ def test_dimensionless_vertical(self): # all variables checked (2) pass assert len(results) == 2 assert scored == out_of - assert all(r.name == u"§4.3 Vertical Coordinate" for r in results) + assert all(r.name == "§4.3 Vertical Coordinate" for r in results) # Check negative compliance -- 3 out of 4 pass @@ -1037,7 +1046,7 @@ def test_dimensionless_vertical(self): assert len(results) == 4 assert scored <= out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 2 - assert all(r.name == u"§4.3 Vertical Coordinate" for r in results) + assert all(r.name == "§4.3 Vertical Coordinate" for r in results) # test with an invalid formula_terms dataset.variables["lev2"] = MockVariable(dataset.variables["lev2"]) @@ -1052,7 +1061,7 @@ def test_dimensionless_vertical(self): assert len(results) == 4 assert scored <= out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 2 - assert all(r.name == u"§4.3 Vertical Coordinate" for r in results) + assert all(r.name == "§4.3 Vertical Coordinate" for r in results) def test_is_time_variable(self): var1 = MockVariable() @@ -1105,7 +1114,7 @@ def test_check_time_coordinate(self): scored, out_of, messages = get_results(results) - assert u"time does not have correct time units" in messages + assert "time does not have correct time units" in messages assert (scored, out_of) == (1, 2) def test_check_calendar(self): @@ -1121,32 +1130,34 @@ def test_check_calendar(self): results = self.cf.check_calendar(dataset) scored, out_of, messages = get_results(results) # TEST CONFORMANCE 4.4.1 REQUIRED 2, 3 / 5 - bad_month_msg = ("For nonstandard calendar on variable time, attribute " - "month_lengths must be supplied as a 12-element integer array") + bad_month_msg = ( + "For nonstandard calendar on variable time, attribute " + "month_lengths must be supplied as a 12-element integer array" + ) assert bad_month_msg in messages dataset = MockTimeSeries() time = dataset.variables["time"] dataset.variables["time"].calendar = "custom" - dataset.variables["time"].month_lengths = np.array([30.3], - dtype=np.double) + dataset.variables["time"].month_lengths = np.array([30.3], dtype=np.double) results = self.cf.check_calendar(dataset) scored, out_of, messages = get_results(results) assert bad_month_msg in messages - dataset.variables["time"].month_lengths = np.array([31, 29, 31, 30, 31, - 30, 31, 31, 30, 31, - 30, 31], - dtype=np.int) + dataset.variables["time"].month_lengths = np.array( + [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], dtype=np.int + ) results = self.cf.check_calendar(dataset) scored, out_of, messages = get_results(results) assert bad_month_msg not in messages # TEST CONFORMANCE 4.4.1 REQUIRED 4,5/5 - leap_month_msg = ("When attribute leap_month is supplied for variable " - "time, the value must be a scalar integer between 1 " - "and 12") + leap_month_msg = ( + "When attribute leap_month is supplied for variable " + "time, the value must be a scalar integer between 1 " + "and 12" + ) dataset.variables["time"].leap_month = np.array([0], dtype=np.uint8) results = self.cf.check_calendar(dataset) scored, out_of, messages = get_results(results) @@ -1157,12 +1168,16 @@ def test_check_calendar(self): scored, out_of, messages = get_results(results) assert leap_month_msg not in messages # TEST CONFORMANCE 4.4.1 RECOMMENDED 1/2 - assert ("For time variable time, attribute leap_year must be present " - "if leap_month attribute is defined" in messages) + assert ( + "For time variable time, attribute leap_year must be present " + "if leap_month attribute is defined" in messages + ) # TEST CONFORMANCE 4.4.1 REQUIRED 5/5 - leap_year_msg = ("When attribute leap_year is supplied for variable " - "time, the value must be a scalar integer") + leap_year_msg = ( + "When attribute leap_year is supplied for variable " + "time, the value must be a scalar integer" + ) dataset.variables["time"].leap_year = ["2.18"] results = self.cf.check_calendar(dataset) @@ -1178,7 +1193,7 @@ def test_check_aux_coordinates(self): dataset = self.load_dataset(STATIC_FILES["illegal-aux-coords"]) results = self.cf.check_aux_coordinates(dataset) result_dict = {result.name: result for result in results} - result = result_dict[u"§5 Coordinate Systems"] + result = result_dict["§5 Coordinate Systems"] assert result.msgs == [] # shouldn't have any messages assert result.value == (4, 4) @@ -1189,7 +1204,7 @@ def test_check_grid_coordinates(self): result_dict = {result.name: result for result in results} result = result_dict[ - u"§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections" + "§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections" ] assert result.value == (2, 2) assert (scored, out_of) == (2, 2) @@ -1209,7 +1224,7 @@ def test_check_two_dimensional(self): assert scored < out_of assert all( r.name - == u"§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections" + == "§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections" for r in results ) @@ -1219,7 +1234,7 @@ def test_check_reduced_horizontal_grid(self): scored, out_of, messages = get_results(results) assert scored == out_of assert len(results) == 1 - assert all(r.name == u"§5.3 Reduced Horizontal Grid" for r in results) + assert all(r.name == "§5.3 Reduced Horizontal Grid" for r in results) # load failing ds -- one variable has failing check dataset = self.load_dataset(STATIC_FILES["bad-rhgrid"]) @@ -1228,7 +1243,7 @@ def test_check_reduced_horizontal_grid(self): assert scored != out_of assert len(results) == 2 assert len([r for r in results if r.value[0] < r.value[1]]) == 1 - assert all(r.name == u"§5.3 Reduced Horizontal Grid" for r in results) + assert all(r.name == "§5.3 Reduced Horizontal Grid" for r in results) def test_check_grid_mapping(self): dataset = self.load_dataset(STATIC_FILES["mapping"]) @@ -1376,7 +1391,7 @@ def test_check_geographic_region(self): assert len(results) == 2 assert scored < out_of assert ( - u"6.1.1 'Neverland' specified by 'neverland' is not a valid region" + "6.1.1 'Neverland' specified by 'neverland' is not a valid region" in messages ) @@ -1473,8 +1488,10 @@ def test_check_standard_name_modifier_units(self): temp.ancillary_variables = "temp_flag" scored, out_of, messages = get_results(self.cf.check_units(dataset)) assert scored != out_of - assert ("units attribute for variable temperature_flag must be unset " - "when status_flag modifier is set") + assert ( + "units attribute for variable temperature_flag must be unset " + "when status_flag modifier is set" + ) del temp_flag.units scored, out_of, messages = get_results(self.cf.check_units(dataset)) @@ -1483,9 +1500,6 @@ def test_check_standard_name_modifier_units(self): temp_counts = dataset.createVariable("temp_counts", "i1", ("time",)) temp.ancillary_variables += " temp_counts" - - - def test_check_duplicates(self): """ Test to verify that the check identifies duplicate axes. Load the @@ -1499,7 +1513,7 @@ def test_check_duplicates(self): # only one check run here, so we can directly compare all the values assert scored != out_of - assert messages[0] == u"'temp' has duplicate axis X defined by [lon_rho, lon_u]" + assert messages[0] == "'temp' has duplicate axis X defined by [lon_rho, lon_u]" def test_check_multi_dimensional_coords(self): """ @@ -1513,7 +1527,7 @@ def test_check_multi_dimensional_coords(self): # 4 variables were checked in this ds, 2 of which passed assert len(results) == 4 assert len([r for r in results if r.value[0] < r.value[1]]) == 2 - assert all(r.name == u"§5 Coordinate Systems" for r in results) + assert all(r.name == "§5 Coordinate Systems" for r in results) def test_64bit(self): dataset = self.load_dataset(STATIC_FILES["ints64"]) @@ -1530,7 +1544,7 @@ def test_variable_feature_check(self): assert len(results) == 2 assert scored < out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 1 - assert all(r.name == u"§9.1 Features and feature types" for r in results) + assert all(r.name == "§9.1 Features and feature types" for r in results) # compliant dataset dataset = self.load_dataset(STATIC_FILES["trajectory-complete"]) @@ -1559,7 +1573,7 @@ def test_check_cell_methods(self): # check the results only have expected headers assert set([r.name for r in results]).issubset( - set([u"§7.1 Cell Boundaries", u"§7.3 Cell Methods"]) + set(["§7.1 Cell Boundaries", "§7.3 Cell Methods"]) ) # check that all the expected variables have been hit @@ -1591,7 +1605,7 @@ def test_check_cell_methods(self): scored, out_of, messages = get_results(results) self.assertTrue( - u'§7.3.3 The non-standard "comment:" element must come after any standard elements in cell_methods for variable temperature' + '§7.3.3 The non-standard "comment:" element must come after any standard elements in cell_methods for variable temperature' in messages ) @@ -1608,7 +1622,7 @@ def test_check_cell_methods(self): results = self.cf.check_cell_methods(nc_obj) scored, out_of, messages = get_results(results) self.assertTrue( - u'§7.3.3 Invalid cell_methods keyword "invalid_keyword:" for variable temperature. Must be one of [interval, comment]' + '§7.3.3 Invalid cell_methods keyword "invalid_keyword:" for variable temperature. Must be one of [interval, comment]' in messages ) @@ -1619,7 +1633,7 @@ def test_check_cell_methods(self): results = self.cf.check_cell_methods(nc_obj) scored, out_of, messages = get_results(results) assert ( - u"§7.3.3 Parenthetical content inside temperature:cell_methods is not well formed: interval 0.2 m interval: 0.01 degrees" + "§7.3.3 Parenthetical content inside temperature:cell_methods is not well formed: interval 0.2 m interval: 0.01 degrees" in messages ) @@ -1660,10 +1674,12 @@ def test_check_external_variables(self): dataset.createVariable("ext3", "i4", ()) result = self.cf.check_external_variables(dataset) assert result.value[0] < result.value[1] - assert ("Global attribute external_variables should not have any " - "variable names which are present in the dataset. Currently, " - "the following names appear in both external_variables " - "and the dataset's variables: {'ext3'}" in result.msgs) + assert ( + "Global attribute external_variables should not have any " + "variable names which are present in the dataset. Currently, " + "the following names appear in both external_variables " + "and the dataset's variables: {'ext3'}" in result.msgs + ) def test_check_actual_range(self): """Test the check_actual_range method works as expected""" @@ -1688,7 +1704,7 @@ def test_check_actual_range(self): score, out_of, messages = get_results(result) assert score < out_of assert len(messages) == 1 - assert messages[0] == u"actual_range of 'a' must be 2 elements" + assert messages[0] == "actual_range of 'a' must be 2 elements" dataset.close() dataset = MockTimeSeries() @@ -1878,7 +1894,7 @@ def test_cell_measures(self): dataset = self.load_dataset(STATIC_FILES["bad_cell_measure2"]) results = self.cf.check_cell_measures(dataset) score, out_of, messages = get_results(results) - message = u"Cell measure variable box_area referred to by PS is not present in dataset variables" + message = "Cell measure variable box_area referred to by PS is not present in dataset variables" assert message in messages def test_variable_features(self): @@ -2152,8 +2168,7 @@ def test_check_dimensionless_vertical_coordinate_1_7(self): # this time, assign computed_standard_name ret_val = [] - dataset.variables["lev"].setncattr("computed_standard_name", - "air_pressure") + dataset.variables["lev"].setncattr("computed_standard_name", "air_pressure") # run the check self.cf._check_dimensionless_vertical_coordinate_1_7( @@ -2183,7 +2198,7 @@ def test_dimensionless_vertical(self): # all variables checked (2) pass assert len(results) == 3 assert scored == out_of - assert all(r.name == u"§4.3 Vertical Coordinate" for r in results) + assert all(r.name == "§4.3 Vertical Coordinate" for r in results) # make one variable's computed_standard_name incorrect, one should fail dataset.variables["lev"].computed_standard_name = "definitely_not_right" @@ -2192,7 +2207,7 @@ def test_dimensionless_vertical(self): assert len(results) == 3 assert scored < out_of - assert all(r.name == u"§4.3 Vertical Coordinate" for r in results) + assert all(r.name == "§4.3 Vertical Coordinate" for r in results) def test_check_attr_type(self): """ @@ -2463,11 +2478,11 @@ def test_check_add_offset_scale_factor_type(self): self.assertFalse(r[1].msgs) # integer variable type (int8, int16, int32) compared against - #floating point add_offset/scale_factor + # floating point add_offset/scale_factor for var_bytes in ("1", "2", "4"): - coarse_temp = dataset.createVariable(f"coarse_temp_{var_bytes}", - f"i{var_bytes}", - dimensions=("time",)) + coarse_temp = dataset.createVariable( + f"coarse_temp_{var_bytes}", f"i{var_bytes}", dimensions=("time",) + ) coarse_temp.setncattr("scale_factor", np.float32(23.0)) coarse_temp.setncattr("add_offset", np.double(-2.1)) r = self.cf.check_add_offset_scale_factor_type(dataset) @@ -2475,10 +2490,12 @@ def test_check_add_offset_scale_factor_type(self): # are same type should be false self.assertFalse(r[0].value) # TEST CONFORMANCE 8.1 REQUIRED 1/3 - self.assertEqual(r[0].msgs[0], - "When both scale_factor and add_offset are supplied for " - f"variable coarse_temp_{var_bytes}, they must have the " - "same type") + self.assertEqual( + r[0].msgs[0], + "When both scale_factor and add_offset are supplied for " + f"variable coarse_temp_{var_bytes}, they must have the " + "same type", + ) # Individual checks for scale_factor/add_offset should be OK, # however self.assertTrue(r[-1].value) diff --git a/compliance_checker/tests/test_cf_integration.py b/compliance_checker/tests/test_cf_integration.py index e0b14457..cc1579c6 100644 --- a/compliance_checker/tests/test_cf_integration.py +++ b/compliance_checker/tests/test_cf_integration.py @@ -21,15 +21,15 @@ ( "sldmb_43093_agg", [ - u"attribute time:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores", - u"attribute lat:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores", - u"attribute lon:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores", - u"§2.6.2 global attribute history should exist and be a non-empty string", - u"standard_name temperature is not defined in Standard Name Table v{}".format( + "attribute time:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores", + "attribute lat:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores", + "attribute lon:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores", + "§2.6.2 global attribute history should exist and be a non-empty string", + "standard_name temperature is not defined in Standard Name Table v{}".format( std_names._version ), - u"temperature's auxiliary coordinate specified by the coordinates attribute, precise_lat, is not a variable in this dataset", - u"temperature's auxiliary coordinate specified by the coordinates attribute, precise_lon, is not a variable in this dataset", + "temperature's auxiliary coordinate specified by the coordinates attribute, precise_lat, is not a variable in this dataset", + "temperature's auxiliary coordinate specified by the coordinates attribute, precise_lon, is not a variable in this dataset", ], ), ( @@ -52,24 +52,24 @@ "standard_name visibility is not defined in Standard Name Table v{}".format( std_names._version ), - "Standard name modifier \"data_quality\" for variable visibility_qc is not a valid modifier according to CF Appendix C", + 'Standard name modifier "data_quality" for variable visibility_qc is not a valid modifier according to CF Appendix C', "standard_name wind_direction is not defined in Standard Name Table v{}".format( std_names._version ), - "Standard name modifier \"data_quality\" for variable wind_direction_qc is not a valid modifier according to CF Appendix C", + 'Standard name modifier "data_quality" for variable wind_direction_qc is not a valid modifier according to CF Appendix C', "standard_name wind_gust is not defined in Standard Name Table v{}".format( std_names._version ), - "Standard name modifier \"data_quality\" for variable wind_gust_qc is not a valid modifier according to CF Appendix C", - "Standard name modifier \"data_quality\" for variable air_temperature_qc is not a valid modifier according to CF Appendix C", + 'Standard name modifier "data_quality" for variable wind_gust_qc is not a valid modifier according to CF Appendix C', + 'Standard name modifier "data_quality" for variable air_temperature_qc is not a valid modifier according to CF Appendix C', "standard_name use_wind is not defined in Standard Name Table v{}".format( std_names._version ), "standard_name barometric_pressure is not defined in Standard Name Table v{}".format( std_names._version ), - "Standard name modifier \"data_quality\" for variable barometric_pressure_qc is not a valid modifier according to CF Appendix C", - "Standard name modifier \"data_quality\" for variable wind_speed_qc is not a valid modifier according to CF Appendix C", + 'Standard name modifier "data_quality" for variable barometric_pressure_qc is not a valid modifier according to CF Appendix C', + 'Standard name modifier "data_quality" for variable wind_speed_qc is not a valid modifier according to CF Appendix C', "standard_name barometric_pressure is not defined in Standard Name Table v{}".format( std_names._version ), @@ -81,23 +81,23 @@ ( "3mf07", [ - u"latitude:valid_min must be a numeric type not a string", - u"latitude:valid_max must be a numeric type not a string", - u"longitude:valid_min must be a numeric type not a string", - u"longitude:valid_max must be a numeric type not a string", - u"§2.6.2 references global attribute should be a non-empty string", - u"§2.6.2 comment global attribute should be a non-empty string", - u"dimensions for auxiliary coordinate variable z (z) are not a subset of dimensions for variable flag (profile)", - u"dimensions for auxiliary coordinate variable z (z) are not a subset of dimensions for variable haul (profile)", + "latitude:valid_min must be a numeric type not a string", + "latitude:valid_max must be a numeric type not a string", + "longitude:valid_min must be a numeric type not a string", + "longitude:valid_max must be a numeric type not a string", + "§2.6.2 references global attribute should be a non-empty string", + "§2.6.2 comment global attribute should be a non-empty string", + "dimensions for auxiliary coordinate variable z (z) are not a subset of dimensions for variable flag (profile)", + "dimensions for auxiliary coordinate variable z (z) are not a subset of dimensions for variable haul (profile)", ], ), ( "ooi_glider", [ - u"§2.6.2 comment global attribute should be a non-empty string", - u"Attribute long_name or/and standard_name is highly recommended for variable deployment", - u"latitude variable 'latitude' should define standard_name='latitude' or axis='Y'", - u"longitude variable 'longitude' should define standard_name='longitude' or axis='X'", + "§2.6.2 comment global attribute should be a non-empty string", + "Attribute long_name or/and standard_name is highly recommended for variable deployment", + "latitude variable 'latitude' should define standard_name='latitude' or axis='Y'", + "longitude variable 'longitude' should define standard_name='longitude' or axis='X'", ], ), ( @@ -115,7 +115,7 @@ ), ( "kibesillah", - [u"§2.6.2 global attribute title should exist and be a non-empty string"], + ["§2.6.2 global attribute title should exist and be a non-empty string"], ), ( "pr_inundation", @@ -140,14 +140,14 @@ ( "ww3", [ - u"§2.6.2 global attribute title should exist and be a non-empty string", - u"§2.6.2 global attribute history should exist and be a non-empty string", - u"§2.6.1 Conventions field is not present", - u"Attribute long_name or/and standard_name is highly recommended for variable time", - u"Attribute long_name or/and standard_name is highly recommended for variable lon", - u"Attribute long_name or/and standard_name is highly recommended for variable lat", - u"latitude variable 'lat' should define standard_name='latitude' or axis='Y'", - u"longitude variable 'lon' should define standard_name='longitude' or axis='X'", + "§2.6.2 global attribute title should exist and be a non-empty string", + "§2.6.2 global attribute history should exist and be a non-empty string", + "§2.6.1 Conventions field is not present", + "Attribute long_name or/and standard_name is highly recommended for variable time", + "Attribute long_name or/and standard_name is highly recommended for variable lon", + "Attribute long_name or/and standard_name is highly recommended for variable lat", + "latitude variable 'lat' should define standard_name='latitude' or axis='Y'", + "longitude variable 'lon' should define standard_name='longitude' or axis='X'", ], ), ( @@ -158,27 +158,27 @@ "standard_name cloud_cover is not defined in Standard Name Table v{}".format( std_names._version ), - u"standard_name dew_point is not defined in Standard Name Table v{}".format( + "standard_name dew_point is not defined in Standard Name Table v{}".format( std_names._version ), ( - u"GRID is not a valid CF featureType. It must be one of point, timeseries, " + "GRID is not a valid CF featureType. It must be one of point, timeseries, " "trajectory, profile, timeseriesprofile, trajectoryprofile" ), ( - u"global attribute _CoordSysBuilder should begin with a letter and " + "global attribute _CoordSysBuilder should begin with a letter and " "be composed of letters, digits, and underscores" ), - u'units for cl, "fraction" are not recognized by UDUNITS', + 'units for cl, "fraction" are not recognized by UDUNITS', ], ), ( "bad_cf_role", [ - u"§2.6.2 global attribute title should exist and be a non-empty string", - u"§2.6.2 global attribute history should exist and be a non-empty string", - u"§2.6.1 Conventions field is not present", - u"§9.5 The only acceptable values of cf_role for Discrete Geometry CF data sets are timeseries_id, profile_id, and trajectory_id", + "§2.6.2 global attribute title should exist and be a non-empty string", + "§2.6.2 global attribute history should exist and be a non-empty string", + "§2.6.1 Conventions field is not present", + "§9.5 The only acceptable values of cf_role for Discrete Geometry CF data sets are timeseries_id, profile_id, and trajectory_id", ], ), pytest.param( @@ -307,7 +307,7 @@ def test_fvcom(self, cs, loaded_dataset): # it's not clear to me what this is supposed to be doing -- this else clause is outside of the if else: raise AssertionError( - u'"dimensions for auxiliary coordinate variable siglay (node, siglay) ' + '"dimensions for auxiliary coordinate variable siglay (node, siglay) ' 'are not a subset of dimensions for variable u (siglay, nele, time)"' " not in messages" ) diff --git a/compliance_checker/tests/test_ioos_profile.py b/compliance_checker/tests/test_ioos_profile.py index e16acdac..5dff822b 100644 --- a/compliance_checker/tests/test_ioos_profile.py +++ b/compliance_checker/tests/test_ioos_profile.py @@ -12,8 +12,7 @@ NamingAuthorityValidator, ) from compliance_checker.tests import BaseTestCase -from compliance_checker.tests.helpers import (MockTimeSeries, MockVariable, - MockNetCDF) +from compliance_checker.tests.helpers import MockTimeSeries, MockVariable, MockNetCDF from compliance_checker.tests.resources import STATIC_FILES from compliance_checker.tests.test_cf import get_results @@ -666,7 +665,6 @@ def test_check_standard_name(self): scored, out_of, messages = get_results(results) self.assertLess(scored, out_of) - # have to recreate here or temperature gives KeyError despite appearing # deleted -- why? ds = MockTimeSeries() # time, lat, lon, depth @@ -809,8 +807,10 @@ def test_check_qartod_variables_flags(self): # QARTOD variable with flag meanings, without flag_meanings qr.setncattr("flag_values", np.array([0, 1, 2], dtype=np.byte)) results = self.ioos.check_qartod_variables_flags(ds) - self.assertIn("Variable depth_qc must have attribute flag_meanings defined when flag_values attribute is present", - results[0].msgs) + self.assertIn( + "Variable depth_qc must have attribute flag_meanings defined when flag_values attribute is present", + results[0].msgs, + ) self.assertNotEqual(results[0].value[0], results[0].value[1]) # should fail self.assertFalse(results[1].value) # still fail diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py index 701723ca..325c1f86 100644 --- a/compliance_checker/tests/test_suite.py +++ b/compliance_checker/tests/test_suite.py @@ -64,13 +64,11 @@ def test_suite(self): ds = self.cs.load_dataset(static_files["2dim"]) self.cs.run(ds, [], "acdd") - def test_suite_pathlib(self): path_obj = Path(static_files["2dim"]) ds = self.cs.load_dataset(path_obj) self.cs.run(ds, [], "acdd") - def test_unicode_formatting(self): ds = self.cs.load_dataset(static_files["bad_region"]) score_groups = self.cs.run(ds, [], "cf") @@ -83,8 +81,7 @@ def test_unicode_formatting(self): ) # This asserts that print is able to generate all of the unicode # output - self.cs.standard_output_generation(groups, limit, points, out_of, - checker) + self.cs.standard_output_generation(groups, limit, points, out_of, checker) def test_generate_dataset_netCDF4(self): """ @@ -135,21 +132,21 @@ def test_skip_check_level(self): msg_set = {msg for sg in score_groups["cf"][0] for msg in sg.msgs} expected_excluded_names = { - u"§3.5 flag_meanings for lat", - u"§3.5 flag_meanings for lon", - u"§3.5 lat is a valid flags variable", - u"§3.5 lat is a valid flags variable", - u"§3.5 lon is a valid flags variable", + "§3.5 flag_meanings for lat", + "§3.5 flag_meanings for lon", + "§3.5 lat is a valid flags variable", + "§3.5 lat is a valid flags variable", + "§3.5 lon is a valid flags variable", } self.assertTrue(len(expected_excluded_names & name_set) == 0) # should skip references - ref_msg = u"references global attribute should be a non-empty string" + ref_msg = "references global attribute should be a non-empty string" self.assertTrue(ref_msg not in msg_set) # check_standard_name is high priority, but we requested only low, # so the standard_name check should still exist - standard_name_hdr = u"§3.3 Standard Name" + standard_name_hdr = "§3.3 Standard Name" self.assertTrue(standard_name_hdr in name_set) def test_group_func(self): diff --git a/setup.py b/setup.py index c3ebbced..e58299fa 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,6 @@ def pip_requirements(fname="requirements.txt"): return reqs - setup( name="compliance-checker", description="Checks Datasets and SOS endpoints for standards compliance", @@ -64,7 +63,7 @@ def pip_requirements(fname="requirements.txt"): "ioos_sos = compliance_checker.ioos:IOOSBaseSOSCheck", "ioos-0.1 = compliance_checker.ioos:IOOS0_1Check", "ioos-1.1 = compliance_checker.ioos:IOOS1_1Check", - "ioos-1.2 = compliance_checker.ioos:IOOS1_2Check" + "ioos-1.2 = compliance_checker.ioos:IOOS1_2Check", ], }, package_data={ From d21a88b4515b9045fa9e3a680cdafa219ecd8d49 Mon Sep 17 00:00:00 2001 From: Benjamin Adams Date: Thu, 19 May 2022 17:02:23 -0400 Subject: [PATCH 2/3] Run autoflake to remove unused code, imports Manual additions were made as well where code clarity/missing features were a priority. --- compliance_checker/base.py | 2 -- compliance_checker/cf/__init__.py | 3 ++- compliance_checker/cf/cf.py | 19 +++---------------- compliance_checker/cf/cf_1_6.py | 17 ++--------------- compliance_checker/cf/cf_1_7.py | 8 +------- compliance_checker/cf/cf_1_8.py | 4 ++-- compliance_checker/cf/cf_base.py | 14 -------------- compliance_checker/ioos.py | 8 +++----- compliance_checker/suite.py | 3 +-- compliance_checker/tests/conftest.py | 2 +- compliance_checker/tests/test_acdd.py | 4 ++-- compliance_checker/tests/test_cf.py | 4 ++-- .../tests/test_cf_integration.py | 1 - compliance_checker/tests/test_ioos_profile.py | 2 +- 14 files changed, 20 insertions(+), 71 deletions(-) diff --git a/compliance_checker/base.py b/compliance_checker/base.py index 8992b2ee..6bebcd8a 100644 --- a/compliance_checker/base.py +++ b/compliance_checker/base.py @@ -7,7 +7,6 @@ import itertools import pprint import re -import sys import warnings from collections import defaultdict @@ -159,7 +158,6 @@ def setup(self, ds): Automatically run when running a CheckSuite. Define this method in your Checker class. """ - pass def __init__(self, options=None): self._defined_results = defaultdict(lambda: defaultdict(dict)) diff --git a/compliance_checker/cf/__init__.py b/compliance_checker/cf/__init__.py index a7757b22..573f9508 100644 --- a/compliance_checker/cf/__init__.py +++ b/compliance_checker/cf/__init__.py @@ -2,7 +2,8 @@ dimless_vertical_coordinates_1_6, dimless_vertical_coordinates_1_7, ) -from compliance_checker.cf.cf import CF1_6Check, CF1_7Check, util +from compliance_checker.cf.cf import CF1_6Check, CF1_7Check +from compliance_checker.cf import util __all__ = [ diff --git a/compliance_checker/cf/cf.py b/compliance_checker/cf/cf.py index 83d7a9c7..531a2b67 100644 --- a/compliance_checker/cf/cf.py +++ b/compliance_checker/cf/cf.py @@ -1,19 +1,5 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import logging -import os -import sqlite3 -import sys - -from collections import OrderedDict, defaultdict -from functools import wraps -from warnings import warn - -import numpy as np -import pyproj -import regex - -from cf_units import Unit from compliance_checker import cfutil from compliance_checker.base import BaseCheck, BaseNCCheck, Result, TestCtx @@ -21,8 +7,9 @@ from compliance_checker.cf.appendix_d import ( dimless_vertical_coordinates_1_6, dimless_vertical_coordinates_1_7, - no_missing_terms, + no_missing_terms ) + from compliance_checker.cf.appendix_e import cell_methods16, cell_methods17 from compliance_checker.cf.appendix_f import ( ellipsoid_names17, @@ -31,7 +18,7 @@ grid_mapping_dict16, grid_mapping_dict17, horizontal_datum_names17, - prime_meridian_names17, + prime_meridian_names17 ) # Version specific checkers organized in other modules diff --git a/compliance_checker/cf/cf_1_6.py b/compliance_checker/cf/cf_1_6.py index 56fe3747..4074d767 100644 --- a/compliance_checker/cf/cf_1_6.py +++ b/compliance_checker/cf/cf_1_6.py @@ -1,10 +1,6 @@ import logging -import os -import sys -from collections import OrderedDict, defaultdict -from functools import wraps -from warnings import warn +from collections import defaultdict import numpy as np import regex @@ -12,12 +8,11 @@ from cf_units import Unit from compliance_checker import cfutil -from compliance_checker.base import BaseCheck, BaseNCCheck, Result, TestCtx +from compliance_checker.base import BaseCheck, Result, TestCtx from compliance_checker.cf import util from compliance_checker.cf.appendix_c import valid_modifiers from compliance_checker.cf.appendix_d import ( dimless_vertical_coordinates_1_6, - no_missing_terms, ) from compliance_checker.cf.appendix_e import cell_methods16 from compliance_checker.cf.appendix_f import ( @@ -189,7 +184,6 @@ def check_add_offset_scale_factor_type(self, ds): scale_factor=lambda x: x is not None ) - _attr_vars_tup = [] both = set(add_offset_vars).intersection(scale_factor_vars) both_msgs = [] for both_var in sorted(both, key=lambda var: var.name): @@ -826,13 +820,6 @@ def _check_valid_standard_units(self, ds, variable_name): standard_name, standard_name_modifier = self._split_standard_name(standard_name) - standard_entry = self._std_names.get(standard_name, None) - if standard_entry is not None: - canonical_units = standard_entry.canonical_units - else: - # Any unit comparisons with None returns False - canonical_units = None - # Other standard_name modifiers have the same units as the # unmodified standard name or are not checked for units. diff --git a/compliance_checker/cf/cf_1_7.py b/compliance_checker/cf/cf_1_7.py index ebf0486b..014764c3 100644 --- a/compliance_checker/cf/cf_1_7.py +++ b/compliance_checker/cf/cf_1_7.py @@ -1,24 +1,18 @@ import logging import os import sqlite3 -import sys -from collections import OrderedDict, defaultdict -from functools import wraps from warnings import warn import numpy as np import pyproj import regex -from cf_units import Unit from compliance_checker import cfutil -from compliance_checker.base import BaseCheck, BaseNCCheck, Result, TestCtx -from compliance_checker.cf import util +from compliance_checker.base import BaseCheck, Result, TestCtx from compliance_checker.cf.appendix_d import ( dimless_vertical_coordinates_1_7, - no_missing_terms, ) from compliance_checker.cf.appendix_e import cell_methods17 from compliance_checker.cf.appendix_f import ( diff --git a/compliance_checker/cf/cf_1_8.py b/compliance_checker/cf/cf_1_8.py index f89ff43f..f2241a56 100644 --- a/compliance_checker/cf/cf_1_8.py +++ b/compliance_checker/cf/cf_1_8.py @@ -1,8 +1,8 @@ -from compliance_checker.base import BaseCheck, TestCtx, Result +from compliance_checker.base import BaseCheck, TestCtx from compliance_checker import MemoizedDataset from compliance_checker.cf.cf_1_7 import CF1_7Check from netCDF4 import Dataset -from compliance_checker.base import BaseCheck, BaseNCCheck, Result, TestCtx +from compliance_checker.base import BaseCheck, TestCtx import requests from lxml import etree from shapely.geometry import Polygon diff --git a/compliance_checker/cf/cf_base.py b/compliance_checker/cf/cf_base.py index 58628215..32fe81ec 100644 --- a/compliance_checker/cf/cf_base.py +++ b/compliance_checker/cf/cf_base.py @@ -10,26 +10,13 @@ import numpy as np import regex -from cf_units import Unit from compliance_checker import cfutil from compliance_checker.base import BaseCheck, BaseNCCheck, Result, TestCtx from compliance_checker.cf import util from compliance_checker.cf.appendix_d import ( - dimless_vertical_coordinates_1_6, - dimless_vertical_coordinates_1_7, no_missing_terms, ) -from compliance_checker.cf.appendix_e import cell_methods16, cell_methods17 -from compliance_checker.cf.appendix_f import ( - ellipsoid_names17, - grid_mapping_attr_types16, - grid_mapping_attr_types17, - grid_mapping_dict16, - grid_mapping_dict17, - horizontal_datum_names17, - prime_meridian_names17, -) logger = logging.getLogger(__name__) @@ -1228,7 +1215,6 @@ class CFNCCheck(BaseNCCheck, CFBaseCheck): attributes from BaseNCCheck (like supported_ds) will not be passed to CFNCCheck.""" - pass appendix_a_base = { diff --git a/compliance_checker/ioos.py b/compliance_checker/ioos.py index c00985ea..1ddf93c0 100644 --- a/compliance_checker/ioos.py +++ b/compliance_checker/ioos.py @@ -20,7 +20,6 @@ BaseSOSGCCheck, Result, TestCtx, - attr_check, check_has, ) from compliance_checker.cf import util as cf_util # not to be confused with cfutil.py @@ -734,7 +733,7 @@ def check_contributor_role_and_vocabulary(self, ds): None if role_val else [role_msg.format(_role)], ) ) - except TypeError as e: + except TypeError: role_results.append( Result( BaseCheck.MEDIUM, @@ -767,7 +766,7 @@ def check_contributor_role_and_vocabulary(self, ds): None if vocb_val else [vocb_msg.format(_vocb)], ) ) - except TypeError as e: + except TypeError: vocb_results.append( Result( BaseCheck.MEDIUM, @@ -1442,7 +1441,6 @@ def _var_qualifies_for_gts_ingest(self, ds, var): bool """ - val = False # should have an ancillary variable with standard_name aggregate_quality_flag avar_val = False @@ -1504,7 +1502,7 @@ def check_gts_ingest_requirements(self, ds): """ # is dataset properly flagged for ingest? - glb_gts_attr = getattr(ds, "gts_ingest", None) + getattr(ds, "gts_ingest", None) # check variables all_passed_ingest_reqs = True # default diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index 453f57b2..35cf82b2 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -29,8 +29,7 @@ from compliance_checker import MemoizedDataset, __version__, tempnc from compliance_checker.base import BaseCheck, GenericFile, Result, fix_return_value -from compliance_checker.cf.cf_base import CFBaseCheck -from compliance_checker.protocols import cdl, erddap, netcdf, opendap +from compliance_checker.protocols import cdl, netcdf, opendap # Ensure output is encoded as Unicode when checker output is redirected or piped diff --git a/compliance_checker/tests/conftest.py b/compliance_checker/tests/conftest.py index 348e76d6..94e8e5c0 100644 --- a/compliance_checker/tests/conftest.py +++ b/compliance_checker/tests/conftest.py @@ -9,7 +9,7 @@ from netCDF4 import Dataset from pkg_resources import resource_filename -from compliance_checker.cf import CF1_6Check, CF1_7Check, util +from compliance_checker.cf import util from compliance_checker.suite import CheckSuite diff --git a/compliance_checker/tests/test_acdd.py b/compliance_checker/tests/test_acdd.py index f06ed781..09dce10c 100644 --- a/compliance_checker/tests/test_acdd.py +++ b/compliance_checker/tests/test_acdd.py @@ -5,8 +5,8 @@ from netCDF4 import Dataset from compliance_checker.acdd import ACDD1_1Check, ACDD1_3Check -from compliance_checker.tests import BaseTestCase, pytestBaseTest -from compliance_checker.tests.helpers import MockTimeSeries, MockVariable +from compliance_checker.tests import BaseTestCase +from compliance_checker.tests.helpers import MockTimeSeries from compliance_checker.tests.resources import STATIC_FILES diff --git a/compliance_checker/tests/test_cf.py b/compliance_checker/tests/test_cf.py index 87779417..707d41fb 100644 --- a/compliance_checker/tests/test_cf.py +++ b/compliance_checker/tests/test_cf.py @@ -1138,7 +1138,7 @@ def test_check_calendar(self): assert bad_month_msg in messages dataset = MockTimeSeries() - time = dataset.variables["time"] + dataset.variables["time"] dataset.variables["time"].calendar = "custom" dataset.variables["time"].month_lengths = np.array([30.3], dtype=np.double) results = self.cf.check_calendar(dataset) @@ -1497,7 +1497,7 @@ def test_check_standard_name_modifier_units(self): scored, out_of, messages = get_results(self.cf.check_units(dataset)) assert scored == out_of - temp_counts = dataset.createVariable("temp_counts", "i1", ("time",)) + dataset.createVariable("temp_counts", "i1", ("time",)) temp.ancillary_variables += " temp_counts" def test_check_duplicates(self): diff --git a/compliance_checker/tests/test_cf_integration.py b/compliance_checker/tests/test_cf_integration.py index cc1579c6..b6a01b9c 100644 --- a/compliance_checker/tests/test_cf_integration.py +++ b/compliance_checker/tests/test_cf_integration.py @@ -3,7 +3,6 @@ import pytest -from netCDF4 import Dataset from compliance_checker.cf import util diff --git a/compliance_checker/tests/test_ioos_profile.py b/compliance_checker/tests/test_ioos_profile.py index 5dff822b..b28ff159 100644 --- a/compliance_checker/tests/test_ioos_profile.py +++ b/compliance_checker/tests/test_ioos_profile.py @@ -12,7 +12,7 @@ NamingAuthorityValidator, ) from compliance_checker.tests import BaseTestCase -from compliance_checker.tests.helpers import MockTimeSeries, MockVariable, MockNetCDF +from compliance_checker.tests.helpers import MockNetCDF, MockTimeSeries from compliance_checker.tests.resources import STATIC_FILES from compliance_checker.tests.test_cf import get_results From 065cfec38ad9347d8577389de45d992ca37f14a2 Mon Sep 17 00:00:00 2001 From: Benjamin Adams Date: Thu, 19 May 2022 18:58:16 -0400 Subject: [PATCH 3/3] Multitudinous flake8 fixes --- compliance_checker/acdd.py | 2 +- compliance_checker/cf/cf.py | 22 +- compliance_checker/cf/cf_1_6.py | 5 +- compliance_checker/cf/cf_1_7.py | 7 +- compliance_checker/cf/cf_1_8.py | 25 +- compliance_checker/cf/cf_base.py | 11 +- compliance_checker/ioos.py | 9 +- compliance_checker/tests/test_acdd.py | 4 +- compliance_checker/tests/test_cf.py | 273 ------------------ compliance_checker/tests/test_cli.py | 1 + compliance_checker/tests/test_ioos_profile.py | 56 +--- compliance_checker/util.py | 3 +- 12 files changed, 31 insertions(+), 387 deletions(-) diff --git a/compliance_checker/acdd.py b/compliance_checker/acdd.py index a27d9eb4..7f490df5 100644 --- a/compliance_checker/acdd.py +++ b/compliance_checker/acdd.py @@ -829,7 +829,7 @@ def check_var_coverage_content_type(self, ds): } if ctype not in valid_ctypes: msgs.append( - 'coverage_content_type in "%s"' % (variable, sorted(valid_ctypes)) + 'coverage_content_type "%s" not in %s' % (variable, sorted(valid_ctypes)) ) results.append( Result( diff --git a/compliance_checker/cf/cf.py b/compliance_checker/cf/cf.py index 531a2b67..747e3b23 100644 --- a/compliance_checker/cf/cf.py +++ b/compliance_checker/cf/cf.py @@ -1,27 +1,27 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from compliance_checker import cfutil -from compliance_checker.base import BaseCheck, BaseNCCheck, Result, TestCtx -from compliance_checker.cf import util -from compliance_checker.cf.appendix_d import ( +from compliance_checker import cfutil # noqa: F401 +from compliance_checker.base import BaseCheck, BaseNCCheck, Result, TestCtx # noqa: F401 +from compliance_checker.cf import util # noqa: F401 +from compliance_checker.cf.appendix_d import ( # noqa: F401 dimless_vertical_coordinates_1_6, dimless_vertical_coordinates_1_7, - no_missing_terms + no_missing_terms, ) -from compliance_checker.cf.appendix_e import cell_methods16, cell_methods17 -from compliance_checker.cf.appendix_f import ( +from compliance_checker.cf.appendix_e import cell_methods16, cell_methods17 # noqa: F401 +from compliance_checker.cf.appendix_f import ( # noqa: F401 ellipsoid_names17, grid_mapping_attr_types16, grid_mapping_attr_types17, grid_mapping_dict16, grid_mapping_dict17, horizontal_datum_names17, - prime_meridian_names17 + prime_meridian_names17, ) # Version specific checkers organized in other modules -from compliance_checker.cf.cf_1_6 import CF1_6Check -from compliance_checker.cf.cf_1_7 import CF1_7Check -from compliance_checker.cf.cf_1_8 import CF1_8Check +from compliance_checker.cf.cf_1_6 import CF1_6Check # noqa: F401 +from compliance_checker.cf.cf_1_7 import CF1_7Check # noqa: F401 +from compliance_checker.cf.cf_1_8 import CF1_8Check # noqa: F401 diff --git a/compliance_checker/cf/cf_1_6.py b/compliance_checker/cf/cf_1_6.py index 4074d767..fb87b1d6 100644 --- a/compliance_checker/cf/cf_1_6.py +++ b/compliance_checker/cf/cf_1_6.py @@ -818,6 +818,9 @@ def _check_valid_standard_units(self, ds, variable_name): self._std_names._root, standard_name ) + if std_name_units_dimensionless: + return valid_standard_units.to_result() + standard_name, standard_name_modifier = self._split_standard_name(standard_name) # Other standard_name modifiers have the same units as the @@ -2934,7 +2937,7 @@ def check_climatological_statistics(self, ds): for clim_coord_var in clim_time_coord_vars: climatology_ctx = TestCtx(BaseCheck.MEDIUM, self.section_titles["7.3"]) if hasattr(clim_coord_var, "bounds"): - climatology_result.out_of += 1 + climatology_ctx.out_of += 1 climatology_ctx.messages.append( f"Variable {clim_coord_var.name} has a climatology " "attribute and cannot also have a bounds attribute." diff --git a/compliance_checker/cf/cf_1_7.py b/compliance_checker/cf/cf_1_7.py index 014764c3..c4728189 100644 --- a/compliance_checker/cf/cf_1_7.py +++ b/compliance_checker/cf/cf_1_7.py @@ -54,11 +54,6 @@ class CF1_7Check(CF1_6Check): "attr_loc": {"G"}, "cf_section": "2.6.3", }, - "actual_range": { - "Type": "N", - "attr_loc": {"D", "C"}, - "cf_section": "2.5.1", - }, "scale_factor": {"Type": "N", "attr_loc": {"D", "C"}, "cf_section": "8.1"}, } ) @@ -731,7 +726,7 @@ def _evaluate_towgs84(self, val): return (True, msg) def check_grid_mapping(self, ds): - __doc__ = super(CF1_7Check, self).check_grid_mapping.__doc__ + super(CF1_7Check, self).check_grid_mapping.__doc__ prev_return = super(CF1_7Check, self).check_grid_mapping(ds) grid_mapping_variables = cfutil.get_grid_mapping_variables(ds) for var_name in sorted(grid_mapping_variables): diff --git a/compliance_checker/cf/cf_1_8.py b/compliance_checker/cf/cf_1_8.py index f2241a56..f2a15548 100644 --- a/compliance_checker/cf/cf_1_8.py +++ b/compliance_checker/cf/cf_1_8.py @@ -2,7 +2,6 @@ from compliance_checker import MemoizedDataset from compliance_checker.cf.cf_1_7 import CF1_7Check from netCDF4 import Dataset -from compliance_checker.base import BaseCheck, TestCtx import requests from lxml import etree from shapely.geometry import Polygon @@ -11,14 +10,6 @@ from compliance_checker.cf.util import reference_attr_variables, string_from_var_type import itertools import warnings -from shapely.geometry import ( - MultiPoint, - LineString, - MultiLineString, - Polygon, - MultiPolygon, -) -from compliance_checker.cf.util import reference_attr_variables """ What's new in CF-1.8 @@ -145,10 +136,9 @@ def check_geometry(self, ds: Dataset): geometry_var = ds.variables[geometry_var_name] geometry_type = getattr(geometry_var, "geometry_type") - valid_geometry_types = {"point", "line", "polygon"} try: node_coord_var_names = geometry_var.node_coordinates - except AttributeError as e: + except AttributeError: geom_valid.messages.append( "Could not find required attribute " '"node_coordinates" in geometry ' @@ -180,7 +170,6 @@ def check_geometry(self, ds: Dataset): ) results.append(geom_valid.to_result()) continue - return error_msgs node_count = reference_attr_variables( ds, getattr(geometry_var, "node_count", None) @@ -251,13 +240,6 @@ def check_taxa(self, ds: Dataset): given for those taxa that do not have an identifier. """ ret_val = [] - # taxa identification variables - taxa_name_variables = ds.get_variables_by_attributes( - standard_name="biological_taxon_name" - ) - taxa_lsid_variables = ds.get_variables_by_attributes( - standard_name="biological_taxon_identifier" - ) def match_taxa_standard_names(standard_name_string): """ @@ -512,9 +494,9 @@ def check_geometry(self): super().check_geometry() # non-multipoint should have exactly one feature if self.node_count is None: - expected_node_count = 1 + pass else: - expected_node_count = self.node_count + self.node_count if all(len(cv.dimensions) != 0 for cv in self.coord_vars): same_dim_group = itertools.groupby(self.coord_vars, lambda x: x.dimensions) @@ -623,7 +605,6 @@ def check_geometry(self): ring_orientation = self.interior_ring[:].astype(bool) else: ring_orientation = np.zeros(len(self.part_count), dtype=bool) - current_node_count = self.node_count[:].copy() node_indexer_len = len(self.part_node_count) else: extents = np.concatenate([np.array([0]), self.node_count[:].cumsum()]) diff --git a/compliance_checker/cf/cf_base.py b/compliance_checker/cf/cf_base.py index 32fe81ec..895d2089 100644 --- a/compliance_checker/cf/cf_base.py +++ b/compliance_checker/cf/cf_base.py @@ -188,7 +188,7 @@ def check_grid_mapping(self, ds): defines_grid_mapping.assert_true( (isinstance(grid_mapping, str) and grid_mapping), "{}'s grid_mapping attribute must be a " - + "space-separated non-empty string".format(variable.name), + "space-separated non-empty string".format(variable.name), ) if isinstance(grid_mapping, str): # TODO (badams): refactor functionality to split functionality @@ -1216,7 +1216,6 @@ class CFNCCheck(BaseNCCheck, CFBaseCheck): CFNCCheck.""" - appendix_a_base = { "Conventions": {"Type": "S", "attr_loc": {"G"}, "cf_section": None}, "_FillValue": {"Type": "D", "attr_loc": {"D", "C"}, "cf_section": None}, @@ -1243,7 +1242,6 @@ class CFNCCheck(BaseNCCheck, CFBaseCheck): "formula_terms": {"Type": "S", "attr_loc": {"C"}, "cf_section": "4.3.2"}, "grid_mapping": {"Type": "S", "attr_loc": {"D"}, "cf_section": "5.6"}, "history": {"Type": "S", "attr_loc": {"G"}, "cf_section": None}, - #'instance_dimension': {'Type': 'N', 'attr_loc': {'D'}, 'cf_section': '9.3'}, "institution": {"Type": "S", "attr_loc": {"G", "D"}, "cf_section": "2.6.2"}, "leap_month": {"Type": "N", "attr_loc": {"C"}, "cf_section": "4.4.1"}, "leap_year": {"Type": "N", "attr_loc": {"C"}, "cf_section": "4.4.1"}, @@ -1252,7 +1250,6 @@ class CFNCCheck(BaseNCCheck, CFBaseCheck): "month_lengths": {"Type": "N", "attr_loc": {"C"}, "cf_section": "4.4.1"}, "positive": {"Type": "S", "attr_loc": {"C"}, "cf_section": None}, "references": {"Type": "S", "attr_loc": {"G", "D"}, "cf_section": "2.6.2"}, - #'sample_dimension': {'Type': 'N', 'attr_loc': {'D'}, 'cf_section': '9.3'}, "scale_factor": {"Type": "N", "attr_loc": {"D"}, "cf_section": "8.1"}, "source": {"Type": "S", "attr_loc": {"G", "D"}, "cf_section": "2.6.2"}, "standard_error_multiplier": {"Type": "N", "attr_loc": {"D"}, "cf_section": None}, @@ -1263,9 +1260,3 @@ class CFNCCheck(BaseNCCheck, CFBaseCheck): "valid_min": {"Type": "N", "attr_loc": {"D", "C"}, "cf_section": None}, "valid_range": {"Type": "N", "attr_loc": {"D", "C"}, "cf_section": None}, } - - -class CFNCCheck(BaseNCCheck, CFBaseCheck): - @classmethod - def beliefs(cls): # @TODO - return {} diff --git a/compliance_checker/ioos.py b/compliance_checker/ioos.py index 1ddf93c0..6c9a1654 100644 --- a/compliance_checker/ioos.py +++ b/compliance_checker/ioos.py @@ -553,7 +553,7 @@ def __init__(self): ("infoUrl", base.UrlValidator()), "license", ("naming_authority", NamingAuthorityValidator()), - #'platform', # checked in check_platform_global + #'platform', # checked in check_platform_global # noqa "platform_name", "publisher_country", ("publisher_email", base.EmailValidator()), @@ -579,11 +579,11 @@ def __init__(self): "creator_postalcode", "creator_state", # checked in check_creator_and_publisher_type - #'creator_type', + #'creator_type', # noqa "institution", "instrument", # checked in check_ioos_ingest - #'ioos_ingest', + #'ioos_ingest', # noqa "keywords", ("platform_id", IOOS1_2_PlatformIDValidator()), # alphanumeric only "publisher_address", @@ -593,7 +593,7 @@ def __init__(self): "publisher_postalcode", "publisher_state", # checked in check_creator_and_publisher_type - #'publisher_type', + #'publisher_type', # noqa "references", "instrument_vocabulary", ] @@ -1441,7 +1441,6 @@ def _var_qualifies_for_gts_ingest(self, ds, var): bool """ - # should have an ancillary variable with standard_name aggregate_quality_flag avar_val = False anc_vars = str(getattr(var, "ancillary_variables", "")).split(" ") diff --git a/compliance_checker/tests/test_acdd.py b/compliance_checker/tests/test_acdd.py index 09dce10c..4e9ea6e8 100644 --- a/compliance_checker/tests/test_acdd.py +++ b/compliance_checker/tests/test_acdd.py @@ -10,12 +10,12 @@ from compliance_checker.tests.resources import STATIC_FILES -def to_singleton_var(l): +def to_singleton_var(item): """ Get the first value of a list if this implements iterator protocol and is not a string """ - return [x[0] if hasattr(x, "__iter__") and not isinstance(x, str) else x for x in l] + return [x[0] if hasattr(x, "__iter__") and not isinstance(x, str) else x for x in item] def check_varset_nonintersect(group0, group1): diff --git a/compliance_checker/tests/test_cf.py b/compliance_checker/tests/test_cf.py index 707d41fb..9fd18185 100644 --- a/compliance_checker/tests/test_cf.py +++ b/compliance_checker/tests/test_cf.py @@ -2776,279 +2776,6 @@ def test_taxonomy_skip_lsid(self): assert results[0].value[0] == results[0].value[1] -class TestCF1_8(BaseTestCase): - def setUp(self): - self.cf = CF1_8Check() - - def test_point_geometry_simple(self): - dataset = MockTimeSeries() - fake_data = dataset.createVariable("someData", "f8", ("time",)) - fake_data.geometry = "geometry" - x = dataset.createVariable("x", "f8", ()) - y = dataset.createVariable("y", "f8", ()) - geom_var = dataset.createVariable("geometry", "i4", ()) - geom_var.geometry_type = "point" - geom_var.node_coordinates = "x y" - x[:] = 1 - y[:] = 1 - self.cf.check_geometry(dataset) - - def test_point_geometry_multiple(self): - dataset = MockTimeSeries() - dataset.createDimension("point_count", 3) - fake_data = dataset.createVariable("someData", "f8", ("time",)) - fake_data.geometry = "geometry" - x = dataset.createVariable("x", "f8", ("point_count",)) - y = dataset.createVariable("y", "f8", ("point_count",)) - geom_var = dataset.createVariable("geometry", "i4", ()) - geom_var.geometry_type = "point" - geom_var.node_coordinates = "x y" - x[:] = np.array([10, 20, 30]) - y[:] = np.array([30, 35, 21]) - results = self.cf.check_geometry(dataset) - assert results[0].value[0] == results[0].value[1] - dataset.createDimension("point_count_2", 2) - # can't recreate y, even with del issued first - y2 = dataset.createVariable("y2", "f8", ("point_count_2",)) - geom_var.node_coordinates = "x y2" - y2[:] = np.array([30, 35]) - results = self.cf.check_geometry(dataset) - assert results[0].value[0] < results[0].value[1] - - def test_line_geometry(self): - dataset = self.load_dataset(STATIC_FILES["line_geometry"]) - self.cf.check_geometry(dataset) - - def test_polygon_geometry(self): - dataset = self.load_dataset(STATIC_FILES["polygon_geometry"]) - self.cf.check_geometry(dataset) - dataset.variables["interior_ring"] = MockVariable( - dataset.variables["interior_ring"] - ) - # Flip sign indicator for interior rings. Should cause failure - flip_ring_bits = (dataset.variables["interior_ring"][:] == 0).astype(int) - dataset.variables["interior_ring"][:] = flip_ring_bits - results = self.cf.check_geometry(dataset) - # There should be messages regarding improper polygon order - assert results[0].value[0] < results[0].value[1] - assert results[0].msgs - - def test_bad_lsid(self): - """ - Tests malformed and nonexistent LSIDs - """ - dataset = MockTimeSeries() - # TODO: handle scalar dimension - dataset.createDimension("taxon", 1) - abundance = dataset.createVariable("abundance", "f8", ("time",)) - abundance.standard_name = ( - "number_concentration_of_biological_taxon_in_sea_water" - ) - abundance.units = "m-3" - abundance.coordinates = "taxon_name taxon_lsid" - taxon_name = dataset.createVariable("taxon_name", str, ("taxon",)) - taxon_name.standard_name = "biological_taxon_name" - taxon_lsid = dataset.createVariable("taxon_lsid", str, ("taxon",)) - taxon_lsid.standard_name = "biological_taxon_lsid" - taxon_name[0] = "Esox lucius" - taxon_lsid[0] = "urn:lsid:itis.gov:itis_tsn:99999999999" - with requests_mock.Mocker() as m: - # bad ID - taxon_lsid[0] = "99999999999" - m.get( - "http://www.lsid.info/urn:lsid:marinespecies.org:taxname:99999999999", - status_code=400, - text="400 Bad Request</head><body><h1>Bad Request</h1><p>Unknown LSID</p></body></html>", - ) - results = self.cf.check_taxa(dataset) - assert len(results) == 1 - messages = results[0].msgs - assert results[0].value[0] < results[0].value[1] - assert len(messages) == 1 - taxon_lsid[ - 0 - ] = "http://www.lsid.info/urn:lsid:marinespecies.org:taxname:99999999999" - results = self.cf.check_taxa(dataset) - assert messages[0].startswith( - "Taxon id must match one of the following forms:" - ) - assert results[0].value[0] < results[0].value[1] - - def test_taxonomy_data_worms_valid(self): - """ - Tests taxonomy data with a mocked pyworms call - """ - with requests_mock.Mocker() as m: - # assume LSID lookups for WoRMS return valid HTTP status code - m.get( - re.compile( - r"^http://www.lsid.info/urn:lsid:marinespecies.org:taxname:\d+$" - ) - ) - response_1 = json.dumps( - { - "AphiaID": 104464, - "url": "http://www.marinespecies.org/aphia.php?p=taxdetails&id=104464", - "scientificname": "Calanus finmarchicus", - "authority": "(Gunnerus, 1770)", - "status": "accepted", - "unacceptreason": None, - "taxonRankID": 220, - "rank": "Species", - "valid_AphiaID": 104464, - "valid_name": "Calanus finmarchicus", - "valid_authority": "(Gunnerus, 1770)", - "parentNameUsageID": 104152, - "kingdom": "Animalia", - "phylum": "Arthropoda", - "class": "Hexanauplia", - "order": "Calanoida", - "family": "Calanidae", - "genus": "Calanus", - "citation": "Walter, T.C.; Boxshall, G. (2021). World of Copepods Database. Calanus finmarchicus (Gunnerus, 1770). Accessed through: World Register of Marine Species at: http://www.marinespecies.org/aphia.php?p=taxdetails&id=104464 on 2021-11-11", - "lsid": "urn:lsid:marinespecies.org:taxname:104464", - "isMarine": 1, - "isBrackish": 0, - "isFreshwater": 0, - "isTerrestrial": 0, - "isExtinct": None, - "match_type": "exact", - "modified": "2020-10-06T15:25:25.040Z", - } - ) - m.get( - "http://www.marinespecies.org/rest/AphiaRecordByAphiaID/104464", - text=response_1, - ) - response_2 = json.dumps( - { - "AphiaID": 104466, - "url": "http://www.marinespecies.org/aphia.php?p=taxdetails&id=104466", - "scientificname": "Calanus helgolandicus", - "authority": "(Claus, 1863)", - "status": "accepted", - "unacceptreason": None, - "taxonRankID": 220, - "rank": "Species", - "valid_AphiaID": 104466, - "valid_name": "Calanus helgolandicus", - "valid_authority": "(Claus, 1863)", - "parentNameUsageID": 104152, - "kingdom": "Animalia", - "phylum": "Arthropoda", - "class": "Hexanauplia", - "order": "Calanoida", - "family": "Calanidae", - "genus": "Calanus", - "citation": "Walter, T.C.; Boxshall, G. (2021). World of Copepods Database. Calanus helgolandicus (Claus, 1863). Accessed through: World Register of Marine Species at: http://www.marinespecies.org/aphia.php?p=taxdetails&id=104466 on 2021-11-11", - "lsid": "urn:lsid:marinespecies.org:taxname:104466", - "isMarine": 1, - "isBrackish": 0, - "isFreshwater": 0, - "isTerrestrial": 0, - "isExtinct": None, - "match_type": "exact", - "modified": "2004-12-21T15:54:05Z", - } - ) - m.get( - "http://www.marinespecies.org/rest/AphiaRecordByAphiaID/104466", - text=response_2, - ) - dataset = self.load_dataset(STATIC_FILES["taxonomy_example"]) - - results = self.cf.check_taxa(dataset) - assert len(results) == 1 - assert results[0].value[0] == results[0].value[1] - - def test_taxonomy_data_itis_valid(self): - """ - Tests taxonomy data with a mocked ITIS call - """ - dataset = MockTimeSeries() - # TODO: handle scalar dimension - dataset.createDimension("taxon", 1) - abundance = dataset.createVariable("abundance", "f8", ("time",)) - abundance.standard_name = ( - "number_concentration_of_biological_taxon_in_sea_water" - ) - abundance.units = "m-3" - abundance.coordinates = "taxon_name taxon_lsid" - taxon_name = dataset.createVariable("taxon_name", str, ("taxon",)) - taxon_name.standard_name = "biological_taxon_name" - taxon_lsid = dataset.createVariable("taxon_lsid", str, ("taxon",)) - taxon_lsid.standard_name = "biological_taxon_lsid" - taxon_name[0] = "Esox lucius" - taxon_lsid[0] = "urn:lsid:itis.gov:itis_tsn:162139" - - with requests_mock.Mocker() as m: - m.get(re.compile(r"^http://www.lsid.info/urn:lsid:itis.gov:itis_tsn:\d+$")) - response = r"""{"acceptedNameList":{"acceptedNames":[null],"class":"gov.usgs.itis.itis_service.data.SvcAcceptedNameList","tsn":"162139"},"class":"gov.usgs.itis.itis_service.data.SvcFullRecord","commentList":{"class":"gov.usgs.itis.itis_service.data.SvcTaxonCommentList","comments":[null],"tsn":"162139"},"commonNameList":{"class":"gov.usgs.itis.itis_service.data.SvcCommonNameList","commonNames":[{"class":"gov.usgs.itis.itis_service.data.SvcCommonName","commonName":"northern pike","language":"English","tsn":"162139"},{"class":"gov.usgs.itis.itis_service.data.SvcCommonName","commonName":"grand brochet","language":"French","tsn":"162139"}],"tsn":"162139"},"completenessRating":{"class":"gov.usgs.itis.itis_service.data.SvcGlobalSpeciesCompleteness","completeness":"","rankId":220,"tsn":"162139"},"coreMetadata":{"class":"gov.usgs.itis.itis_service.data.SvcCoreMetadata","credRating":"TWG standards met","rankId":220,"taxonCoverage":"","taxonCurrency":"","taxonUsageRating":"valid","tsn":"162139","unacceptReason":""},"credibilityRating":{"class":"gov.usgs.itis.itis_service.data.SvcCredibilityData","credRating":"TWG standards met","tsn":"162139"},"currencyRating":{"class":"gov.usgs.itis.itis_service.data.SvcCurrencyData","rankId":220,"taxonCurrency":"","tsn":"162139"},"dateData":{"class":"gov.usgs.itis.itis_service.data.SvcTaxonDateData","initialTimeStamp":"1996-06-13 14:51:08.0","tsn":"162139","updateDate":"2004-01-22"},"expertList":{"class":"gov.usgs.itis.itis_service.data.SvcTaxonExpertList","experts":[{"class":"gov.usgs.itis.itis_service.data.SvcTaxonExpert","comment":"Research Curator of Fishes, North Carolina State Museum of Natural Sciences, Research Laboratory, 4301 Reedy Creek Rd., Raleigh, NC, 27607, USA","expert":"Wayne C. Starnes","referenceFor":[{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"Esox lucius","refLanguage":null,"referredTsn":"162139"}],"updateDate":"2004-02-23"}],"tsn":"162139"},"geographicDivisionList":{"class":"gov.usgs.itis.itis_service.data.SvcTaxonGeoDivisionList","geoDivisions":[{"class":"gov.usgs.itis.itis_service.data.SvcTaxonGeoDivision","geographicValue":"North America","updateDate":"1998-09-14"}],"tsn":"162139"},"hierarchyUp":{"author":null,"class":"gov.usgs.itis.itis_service.data.SvcHierarchyRecord","parentName":"Esox","parentTsn":"162138","rankName":"Species","taxonName":"Esox lucius","tsn":"162139"},"jurisdictionalOriginList":{"class":"gov.usgs.itis.itis_service.data.SvcTaxonJurisdictionalOriginList","jurisdictionalOrigins":[{"class":"gov.usgs.itis.itis_service.data.SvcTaxonJurisdictionalOrigin","jurisdictionValue":"Alaska","origin":"Native","updateDate":"2004-01-22"},{"class":"gov.usgs.itis.itis_service.data.SvcTaxonJurisdictionalOrigin","jurisdictionValue":"Canada","origin":"Native","updateDate":"2004-01-22"},{"class":"gov.usgs.itis.itis_service.data.SvcTaxonJurisdictionalOrigin","jurisdictionValue":"Continental US","origin":"Native & Introduced","updateDate":"2004-01-22"}],"tsn":"162139"},"kingdom":{"class":"gov.usgs.itis.itis_service.data.SvcKingdomInfo","kingdomId":"5","kingdomName":"Animalia ","tsn":"162139"},"otherSourceList":{"class":"gov.usgs.itis.itis_service.data.SvcTaxonOtherSourceList","otherSources":[{"acquisitionDate":"2003-03-17","class":"gov.usgs.itis.itis_service.data.SvcTaxonOtherSource","referenceFor":[{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"Esox lucius","refLanguage":null,"referredTsn":"162139"}],"source":"Catalog of Fishes, 17-Mar-2003","sourceComment":"http://www.calacademy.org/research/ichthyology/catalog/","sourceType":"website","updateDate":"2004-02-11","version":"13-Mar-03"},{"acquisitionDate":"1996-07-29","class":"gov.usgs.itis.itis_service.data.SvcTaxonOtherSource","referenceFor":[{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"Esox lucius","refLanguage":null,"referredTsn":"162139"}],"source":"NODC Taxonomic Code","sourceComment":"","sourceType":"database","updateDate":"2010-01-14","version":"8.0"},{"acquisitionDate":"2003-05-06","class":"gov.usgs.itis.itis_service.data.SvcTaxonOtherSource","referenceFor":[{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"northern pike","refLanguage":"English","referredTsn":"162139"},{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"Esox lucius","refLanguage":null,"referredTsn":"162139"},{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"grand brochet","refLanguage":"French","referredTsn":"162139"}],"source":"<a href=\"http://www.menv.gouv.qc.ca/biodiversite/centre.htm\">CDP","sourceComment":"","sourceType":"database","updateDate":"2003-05-08","version":"1999"}],"tsn":"162139"},"parentTSN":{"class":"gov.usgs.itis.itis_service.data.SvcParentTsn","parentTsn":"162138","tsn":"162139"},"publicationList":{"class":"gov.usgs.itis.itis_service.data.SvcTaxonPublicationList","publications":[{"actualPubDate":"2004-07-01","class":"gov.usgs.itis.itis_service.data.SvcTaxonPublication","isbn":"1-888569-61-1","issn":"0097-0638","listedPubDate":"2004-01-01","pages":"ix + 386","pubComment":"Full author list: Nelson, Joseph S., Edwin J. Crossman, H�ctor Espinosa-P�rez, Lloyd T. Findley, Carter R. Gilbert, Robert N. Lea, and James D. Williams","pubName":"American Fisheries Society Special Publication, no. 29","pubPlace":"Bethesda, Maryland, USA","publisher":"American Fisheries Society","referenceAuthor":"Nelson, Joseph S., Edwin J. Crossman, H. Espinosa-P�rez, L. T. Findley, C. R. Gilbert, et al., eds.","referenceFor":[{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"grand brochet","refLanguage":"French","referredTsn":"162139"},{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"Esox lucius","refLanguage":null,"referredTsn":"162139"},{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"northern pike","refLanguage":"English","referredTsn":"162139"}],"title":"Common and scientific names of fishes from the United States, Canada, and Mexico, Sixth Edition","updateDate":"2021-10-27"},{"actualPubDate":"2003-12-31","class":"gov.usgs.itis.itis_service.data.SvcTaxonPublication","isbn":"","issn":"","listedPubDate":"2003-12-31","pages":"","pubComment":"As-yet (2003) unpublished manuscript from 1998","pubName":"Checklist of Vertebrates of the United States, the U.S. Territories, and Canada","pubPlace":"","publisher":"","referenceAuthor":"Banks, R. C., R. W. McDiarmid, A. L. Gardner, and W. C. Starnes","referenceFor":[{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"Esox lucius","refLanguage":null,"referredTsn":"162139"}],"title":"","updateDate":"2021-08-26"},{"actualPubDate":"1980-01-01","class":"gov.usgs.itis.itis_service.data.SvcTaxonPublication","isbn":"","issn":"0097-0638","listedPubDate":"1980-01-01","pages":"174","pubComment":"","pubName":"American Fisheries Society Special Publication, no. 12","pubPlace":"Bethesda, Maryland, USA","publisher":"American Fisheries Society","referenceAuthor":"Robins, Richard C., Reeve M. Bailey, Carl E. Bond, James R. Brooker, Ernest A. Lachner, et al.","referenceFor":[{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"Esox lucius","refLanguage":null,"referredTsn":"162139"},{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"northern pike","refLanguage":"English","referredTsn":"162139"}],"title":"A List of Common and Scientific Names of Fishes from the United States and Canada, Fourth Edition","updateDate":"2021-10-27"},{"actualPubDate":"1991-01-01","class":"gov.usgs.itis.itis_service.data.SvcTaxonPublication","isbn":"0-913235-70-9","issn":"0097-0638","listedPubDate":"1991-01-01","pages":"183","pubComment":"","pubName":"American Fisheries Society Special Publication, no. 20","pubPlace":"Bethesda, Maryland, USA","publisher":"American Fisheries Society","referenceAuthor":"Robins, Richard C., Reeve M. Bailey, Carl E. Bond, James R. Brooker, Ernest A. Lachner, et al.","referenceFor":[{"class":"gov.usgs.itis.itis_service.data.SvcReferenceForElement","name":"Esox lucius","refLanguage":null,"referredTsn":"162139"}],"title":"Common and Scientific Names of Fishes from the United States and Canada, Fifth Edition","updateDate":"2021-10-27"}],"tsn":"162139"},"scientificName":{"author":"Linnaeus, 1758","class":"gov.usgs.itis.itis_service.data.SvcScientificName","combinedName":"Esox lucius","kingdom":null,"tsn":"162139","unitInd1":null,"unitInd2":null,"unitInd3":null,"unitInd4":null,"unitName1":"Esox ","unitName2":"lucius","unitName3":null,"unitName4":null},"synonymList":{"class":"gov.usgs.itis.itis_service.data.SvcSynonymNameList","synonyms":[null],"tsn":"162139"},"taxRank":{"class":"gov.usgs.itis.itis_service.data.SvcTaxonRankInfo","kingdomId":"5","kingdomName":"Animalia ","rankId":"220","rankName":"Species ","tsn":"162139"},"taxonAuthor":{"authorship":"Linnaeus, 1758","class":"gov.usgs.itis.itis_service.data.SvcTaxonAuthorship","tsn":"162139","updateDate":"2004-04-09"},"tsn":"162139","unacceptReason":{"class":"gov.usgs.itis.itis_service.data.SvcUnacceptData","tsn":"162139","unacceptReason":null},"usage":{"class":"gov.usgs.itis.itis_service.data.SvcTaxonUsageData","taxonUsageRating":"valid","tsn":"162139"}}""" - m.get( - "https://www.itis.gov/ITISWebService/jsonservice/getFullRecordFromTSN?tsn=162139", - text=response, - ) - - results = self.cf.check_taxa(dataset) - - assert len(results) == 1 - assert results[0].value[0] == results[0].value[1] - - # try non-matching name - taxon_name[0] = "Morone saxitilis" - results = self.cf.check_taxa(dataset) - result = results[0] - assert result.msgs == [ - "Supplied taxon name and ITIS scientific name do not match. " - "Supplied taxon name is 'Morone saxitilis', ITIS scientific name " - "for TSN 162139 is 'Esox lucius.'" - ] - - def test_taxonomy_skip_lsid(self): - """ - Tests that nodata/unset LSID values are skipped for validation - """ - dataset = MockTimeSeries() - # TODO: handle scalar dimension - dataset.createDimension("taxon", 1) - abundance = dataset.createVariable("abundance", "f8", ("time",)) - abundance.standard_name = ( - "number_concentration_of_biological_taxon_in_sea_water" - ) - abundance.units = "m-3" - abundance.coordinates = "taxon_name taxon_lsid" - taxon_name = dataset.createVariable("taxon_name", str, ("taxon",)) - taxon_name.standard_name = "biological_taxon_name" - taxon_lsid = dataset.createVariable("taxon_lsid", str, ("taxon",)) - taxon_lsid.standard_name = "biological_taxon_lsid" - # This would fail if checked against an LSID or even for binomial - # nomenclature, obviously. - taxon_name[0] = "No check" - results = self.cf.check_taxa(dataset) - assert len(results[0].msgs) == 0 - assert results[0].value[0] == results[0].value[1] - - dataset = MockTimeSeries() - # TODO: handle scalar dimension? - dataset.createDimension("string80", 80) - dataset.createDimension("taxon", 1) - abundance = dataset.createVariable("abundance", "f8", ("time",)) - abundance.standard_name = ( - "number_concentration_of_biological_taxon_in_sea_water" - ) - abundance.units = "m-3" - abundance.coordinates = "taxon_name taxon_lsid" - taxon_name = dataset.createVariable("taxon_name", "S1", ("taxon", "string80")) - taxon_name.standard_name = "biological_taxon_name" - taxon_lsid = dataset.createVariable("taxon_lsid", "S1", ("taxon", "string80")) - taxon_lsid.standard_name = "biological_taxon_lsid" - fake_str = "No check" - taxon_name[0] = stringtoarr(fake_str, 80) - results = self.cf.check_taxa(dataset) - assert len(results[0].msgs) == 0 - assert results[0].value[0] == results[0].value[1] - - class TestCFUtil(BaseTestCase): """ Class to test the cfutil module. diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index 7b572162..c12c3c1c 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -86,6 +86,7 @@ def test_list_checks(self): # since SimpleNamespace is Python 3.3+ only CheckSuite.checkers.clear() # need to mock setuptools entrypoints here in order to load in checkers + def checker_1(): return Namespace(name="checker_1") diff --git a/compliance_checker/tests/test_ioos_profile.py b/compliance_checker/tests/test_ioos_profile.py index b28ff159..2ecad34e 100644 --- a/compliance_checker/tests/test_ioos_profile.py +++ b/compliance_checker/tests/test_ioos_profile.py @@ -415,60 +415,6 @@ def test_check_geospatial_vars_have_attrs(self): scored, out_of, messages = get_results(results) self.assertEqual(scored, out_of) - def test_check_contributor_role_and_vocabulary(self): - ds = MockTimeSeries() # time, lat, lon, depth - - # no contributor_role or vocab, fail both - results = self.ioos.check_contributor_role_and_vocabulary(ds) - self.assertFalse(all(r.value for r in results)) - - # bad contributor_role and vocab - ds.setncattr("contributor_role", "bad") - ds.setncattr("contributor_role_vocabulary", "bad") - results = self.ioos.check_contributor_role_and_vocabulary(ds) - self.assertFalse(all(r.value for r in results)) - - # good role, bad vocab - ds.setncattr("contributor_role", "contributor") - results = self.ioos.check_contributor_role_and_vocabulary(ds) - self.assertTrue(results[0].value) - self.assertEqual(results[0].msgs, []) - self.assertFalse(results[1].value) - - # bad role, good vocab - ds.setncattr("contributor_role", "bad") - ds.setncattr( - "contributor_role_vocabulary", - "http://vocab.nerc.ac.uk/collection/G04/current/", - ) - results = self.ioos.check_contributor_role_and_vocabulary(ds) - self.assertFalse(results[0].value) - self.assertTrue(results[1].value) - self.assertEqual(results[1].msgs, []) - - # good role, good vocab - ds.setncattr("contributor_role", "contributor") - ds.setncattr( - "contributor_role_vocabulary", - "http://vocab.nerc.ac.uk/collection/G04/current/", - ) - results = self.ioos.check_contributor_role_and_vocabulary(ds) - self.assertTrue(results[0].value) - self.assertEqual(results[0].msgs, []) - self.assertTrue(results[1].value) - self.assertEqual(results[1].msgs, []) - - ds.setncattr("contributor_role", "resourceProvider") - ds.setncattr( - "contributor_role_vocabulary", - "https://www.ngdc.noaa.gov/wiki/index.php?title=ISO_19115_and_19115-2_CodeList_Dictionaries#CI_RoleCode", - ) - results = self.ioos.check_contributor_role_and_vocabulary(ds) - self.assertTrue(results[0].value) - self.assertEqual(results[0].msgs, []) - self.assertTrue(results[1].value) - self.assertEqual(results[1].msgs, []) - def test_check_creator_and_publisher_type(self): """ Checks the creator_type and publisher_type global attributes with @@ -999,7 +945,7 @@ def test_check_contributor_role_and_vocabulary(self): def test_check_feattype_timeseries_cf_role(self): - ### featureType: timeseries and timeseries - msingle station require same tests ### + # featureType: timeseries and timeseries - msingle station require same tests # for ftype in ("timeseries", "timeseries - single station", "timeseries - multiple station"): ftype = "timeseries" diff --git a/compliance_checker/util.py b/compliance_checker/util.py index 71279650..a9bb16ee 100644 --- a/compliance_checker/util.py +++ b/compliance_checker/util.py @@ -43,5 +43,6 @@ def kvp_convert(input_coll): return input_coll else: return OrderedDict( - (l, None) if not isinstance(l, tuple) else (l[0], l[1]) for l in input_coll + (thing, None) if not isinstance(thing, tuple) else + (thing[0], thing[1]) for thing in input_coll )