Skip to content

Commit

Permalink
Merge pull request #624 from daltonkell/acdd_reformat_rebase
Browse files Browse the repository at this point in the history
Goal: collapse ACDD checks into compact groups
  • Loading branch information
Bob Fratantonio authored Feb 6, 2019
2 parents e14d827 + fa166ef commit e32ef92
Show file tree
Hide file tree
Showing 10 changed files with 207 additions and 150 deletions.
87 changes: 48 additions & 39 deletions compliance_checker/acdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,11 @@ def __init__(self):
# the method isn't executed repeatedly.
self._applicable_variables = None

# to be used to format variable Result groups headers
self._var_header = "variable \"{}\" missing the following attributes:"

# set up attributes according to version
@check_has(BaseCheck.HIGH)
@check_has(BaseCheck.HIGH, gname="Global Attributes")
def check_high(self, ds):
'''
Performs a check on each highly recommended attributes' existence in the dataset
Expand All @@ -91,7 +94,7 @@ def check_high(self, ds):
'''
return self.high_rec_atts

@check_has(BaseCheck.MEDIUM)
@check_has(BaseCheck.MEDIUM, gname="Global Attributes")
def check_recommended(self, ds):
'''
Performs a check on each recommended attributes' existence in the dataset
Expand All @@ -100,7 +103,7 @@ def check_recommended(self, ds):
'''
return self.rec_atts

@check_has(BaseCheck.LOW)
@check_has(BaseCheck.LOW, gname="Global Attributes")
def check_suggested(self, ds):
'''
Performs a check on each suggested attributes' existence in the dataset
Expand All @@ -120,17 +123,19 @@ def get_applicable_variables(self, ds):
if self._applicable_variables is None:
self.applicable_variables = cfutil.get_geophysical_variables(ds)
varname = cfutil.get_time_variable(ds)
if varname:
# avoid duplicates by checking if already present
if varname and (varname not in self.applicable_variables):
self.applicable_variables.append(varname)
varname = cfutil.get_lon_variable(ds)
if varname:
if varname and (varname not in self.applicable_variables):
self.applicable_variables.append(varname)
varname = cfutil.get_lat_variable(ds)
if varname:
if varname and (varname not in self.applicable_variables):
self.applicable_variables.append(varname)
varname = cfutil.get_z_variable(ds)
if varname:
if varname and (varname not in self.applicable_variables):
self.applicable_variables.append(varname)

return self.applicable_variables

def check_var_long_name(self, ds):
Expand All @@ -149,8 +154,8 @@ def check_var_long_name(self, ds):
long_name = getattr(ds.variables[variable], 'long_name', None)
check = long_name is not None
if not check:
msgs.append("Var %s missing attribute long_name" % variable)
results.append(Result(BaseCheck.HIGH, check, "variable {}".format(variable), msgs))
msgs.append("long_name")
results.append(Result(BaseCheck.HIGH, check, self._var_header.format(variable), msgs))

return results

Expand All @@ -166,8 +171,8 @@ def check_var_standard_name(self, ds):
std_name = getattr(ds.variables[variable], 'standard_name', None)
check = std_name is not None
if not check:
msgs.append("Var %s missing attribute standard_name" % variable)
results.append(Result(BaseCheck.HIGH, check, "variable {}".format(variable), msgs))
msgs.append("standard_name")
results.append(Result(BaseCheck.HIGH, check, self._var_header.format(variable), msgs))

return results

Expand All @@ -188,14 +193,16 @@ def check_var_units(self, ds):
continue
# Check if we have no units
if not unit_check:
msgs.append("Var %s missing attribute units" % variable)
results.append(Result(BaseCheck.HIGH, unit_check, "variable {}".format(variable), msgs))
msgs.append("units")
results.append(Result(BaseCheck.HIGH, unit_check, self._var_header.format(variable), msgs))

return results

def check_acknowledgment(self, ds):
'''
Check if acknowledgment/acknowledgment attribute is present.
Check if acknowledgment/acknowledgment attribute is present. Because
acknowledgement has its own check, we are keeping it out of the Global
Attributes (even though it is a Global Attr).
:param netCDF4.Dataset ds: An open netCDF dataset
'''
Expand All @@ -204,9 +211,10 @@ def check_acknowledgment(self, ds):
if hasattr(ds, 'acknowledgment') or hasattr(ds, 'acknowledgement'):
check = True
else:
messages.append("Attr acknowledgement not present")
messages.append("acknowledgment/acknowledgement not present")

return Result(BaseCheck.MEDIUM, check, 'acknowledgment/acknowledgement', msgs=messages)
# name="Global Attributes" so gets grouped with Global Attributes
return Result(BaseCheck.MEDIUM, check, "Global Attributes", msgs=messages)

def check_lat_extents(self, ds):
'''
Expand Down Expand Up @@ -346,20 +354,20 @@ def verify_geospatial_bounds(self, ds):
check = var is not None
if not check:
return ratable_result(False,
'geospatial_bounds',
["Attr geospatial_bounds not present"])
"Global Attributes", # grouped with Globals
["geospatial_bounds not present"])

try:
# TODO: verify that WKT is valid given CRS (defaults to EPSG:4326
# in ACDD.
from_wkt(ds.geospatial_bounds)
except AttributeError:
return ratable_result(False,
'geospatial_bounds',
"Global Attributes", # grouped with Globals
['Could not parse WKT, possible bad value for WKT'])
# parsed OK
else:
return ratable_result(True, 'geospatial_bounds', tuple())
return ratable_result(True, "Global Attributes", tuple())

def _check_total_z_extents(self, ds, z_variable):
'''
Expand Down Expand Up @@ -516,19 +524,19 @@ def verify_convention_version(self, ds):
"""
Verify that the version in the Conventions field is correct
"""
for convention in ds.Conventions.replace(' ', '').split(','):
if convention == 'ACDD-' + self._cc_spec_version:
return ratable_result((2, 2),
'Conventions',
[])
# Conventions attribute is present, but does not include
# proper ACDD version
messages = [
"Global Attribute 'Conventions' does not contain 'ACDD-{}'".format(self._cc_spec_version)
]
return ratable_result((1, 2),
'Conventions',
messages)
try:
for convention in getattr(ds, "Conventions", '').replace(' ', '').split(','):
if convention == 'ACDD-' + self._cc_spec_version:
return ratable_result((2, 2), None, []) # name=None so grouped with Globals

# if no/wrong ACDD convention, return appropriate result
# Result will have name "Global Attributes" to group with globals
m = ["Conventions does not contain 'ACDD-{}'".format(self._cc_spec_version)]
return ratable_result((1, 2), "Global Attributes", m)
except AttributeError: # NetCDF attribute not found
m = ["No Conventions attribute present; must contain ACDD-{}".format(self._cc_spec_version)]
# Result will have name "Global Attributes" to group with globals
return ratable_result((0, 2), "Global Attributes", m)


class ACDDNCCheck(BaseNCCheck, ACDDBaseCheck):
Expand Down Expand Up @@ -567,7 +575,8 @@ def __init__(self):
('Conventions', self.verify_convention_version)
])

self.rec_atts.extend(['geospatial_vertical_positive',
self.rec_atts.extend([
'geospatial_vertical_positive',
'geospatial_bounds_crs',
'geospatial_bounds_vertical_crs',
'publisher_name', # publisher,dataCenter
Expand Down Expand Up @@ -689,11 +698,9 @@ def check_var_coverage_content_type(self, ds):
'coverage_content_type', None)
check = ctype is not None
if not check:
msgs.append("Var %s missing attribute coverage_content_type" %
variable)
msgs.append("coverage_content")
results.append(Result(BaseCheck.HIGH, check,
"variable {}".format(variable),
msgs))
self._var_header.format(variable), msgs))
continue

# ISO 19115-1 codes
Expand All @@ -708,7 +715,9 @@ def check_var_coverage_content_type(self, ds):
'coordinate'
}
if ctype not in valid_ctypes:
msgs.append("Var %s does not have a coverage_content_type in %s"
msgs.append("coverage_content_type in \"%s\""
% (variable, sorted(valid_ctypes)))
results.append(Result(BaseCheck.HIGH, check, # append to list
self._var_header.format(variable), msgs))

return results
69 changes: 53 additions & 16 deletions compliance_checker/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,13 +231,20 @@ def xpath_check(tree, xpath):
return len(xpath(tree)) > 0


def attr_check(l, ds, priority, ret_val):
def attr_check(l, ds, priority, ret_val, gname=None):
"""
Handles attribute checks for simple presence of an attribute, presence of
one of several attributes, and passing a validation function. Returns a
status along with an error message in the event of a failure. Mutates
ret_val parameter
:param tuple(str, func) or str l: the attribute being checked
:param netCDF4 dataset ds : dataset being checked
:param int priority : priority level of check
:param list ret_val : result to be returned
:param str or None gname : group name assigned to a group of attribute Results
"""

msgs = []
if isinstance(l, tuple):
name, other = l
Expand All @@ -246,18 +253,32 @@ def attr_check(l, ds, priority, ret_val):
# check instead
res = std_check_in(ds, name, other)
if res == 0:
msgs.append("Attr %s not present" % name)
msgs.append("%s not present" % name)
elif res == 1:
msgs.append("Attr %s present, but not in expected value list (%s)" % (name, other))

ret_val.append(Result(priority, (res, 2), name, msgs))
msgs.append("%s present, but not in expected value list (%s)" % (name, other))

ret_val.append(
Result(
priority,
(res, 2),
gname if gname else name, # groups Globals if supplied
msgs
)
)
# if we have an XPath expression, call it on the document
elif type(other) is etree.XPath:
# TODO: store tree instead of creating it each time?
res = xpath_check(ds._root, other)
if not res:
msgs = ["XPath for {} not found".format(name)]
ret_val.append(Result(priority, res, name, msgs))
ret_val.append(
Result(
priority,
res,
gname if gname else name,
msgs
)
)
# if the attribute is a function, call it
# right now only supports single attribute
# important note: current magic approach uses all functions
Expand All @@ -270,10 +291,18 @@ def attr_check(l, ds, priority, ret_val):
# to check whether the attribute is present every time
# and instead focuses on the core functionality of the
# test
res = std_check(ds, name)

res = other(ds) # call the method on the dataset
if not res:
msgs = ["Attr %s not present" % name]
ret_val.append(Result(priority, res, name, msgs))
msgs = ["%s not present" % name]
ret_val.append(
Result(
priority,
res,
gname if gname else name,
msgs
)
)
else:
ret_val.append(other(ds)(priority))
# unsupported second type in second
Expand All @@ -283,25 +312,33 @@ def attr_check(l, ds, priority, ret_val):
else:
res = std_check(ds, l)
if not res:
msgs = ["Attr %s not present" % l]
msgs = ["%s not present" % l]
else:
try:
# see if this attribute is a string, try stripping
# whitespace, and return an error if empty
att_strip = getattr(ds, l).strip()
if not att_strip:
res = False
msgs = ["Attr %s is empty or completely whitespace" % l]
msgs = ["%s is empty or completely whitespace" % l]
# if not a string/has no strip method we should be OK
except AttributeError:
pass

ret_val.append(Result(priority, res, l, msgs))
# gname arg allows the global attrs to be grouped together
ret_val.append(Result(
priority,
value=res,
name=gname if gname else l,
msgs=msgs
))

return ret_val


def check_has(priority=BaseCheck.HIGH):
def check_has(priority=BaseCheck.HIGH, gname=None):
"""Decorator to wrap a function to check if a dataset has given attributes.
:param function func: function to wrap"""

def _inner(func):
def _dec(s, ds):
Expand All @@ -312,7 +349,7 @@ def _dec(s, ds):
# effects on `ret_val`
for l in list_vars:
# function mutates ret_val
attr_check(l, ds, priority, ret_val)
attr_check(l, ds, priority, ret_val, gname)
return ret_val

return wraps(func)(_dec)
Expand All @@ -325,12 +362,12 @@ def fix_return_value(v, method_name, method=None, checker=None):
Transforms scalar return values into Result.
"""
# remove common check prefix
method_name = (method_name or method.__func__.__name__).replace("check_",
"")
method_name = (method_name or method.__func__.__name__).replace("check_","")
if v is None or not isinstance(v, Result):
v = Result(value=v, name=method_name)

v.name = v.name or method_name

v.checker = checker
v.check_method = method

Expand Down
4 changes: 2 additions & 2 deletions compliance_checker/cf/cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -939,7 +939,7 @@ def check_units(self, ds):
# side effects, but better than teasing out the individual result
if units_attr_is_string.assert_true(
isinstance(units, basestring),
"units ({}) attribute of '{}' must be a string".format(units, variable.name)
"units ({}) attribute of '{}' must be a string compatible with UDUNITS".format(units, variable.name)
):
valid_udunits = self._check_valid_udunits(ds, name)
ret_val.append(valid_udunits)
Expand Down Expand Up @@ -1032,7 +1032,7 @@ def _check_valid_udunits(self, ds, variable_name):
should_be_dimensionless = (variable.dtype.char == 'S' or
std_name_units_dimensionless)

valid_udunits = TestCtx(BaseCheck.LOW, self.section_titles["3.1"])
valid_udunits = TestCtx(BaseCheck.HIGH, self.section_titles["3.1"])
are_udunits = (units is not None and util.units_known(units))
valid_udunits.assert_true(should_be_dimensionless or are_udunits,
'units for {}, "{}" are not recognized by UDUNITS'.format(variable_name, units))
Expand Down
2 changes: 2 additions & 0 deletions compliance_checker/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def run_checker(cls, ds_loc, checker_names, verbose, criteria,
else:
score_dict[loc] = score_groups

# define a score limit to truncate the ouput to the strictness level
# specified by the user
if criteria == 'normal':
limit = 2
elif criteria == 'strict':
Expand Down
Loading

0 comments on commit e32ef92

Please sign in to comment.