Toolkit 1.1 update

ucoProject · Apr 8, 2021 · 352ba36 · 352ba36
1 parent bca77f7
commit 352ba36
Show file tree

Hide file tree

Showing 17 changed files with 624 additions and 252 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,10 @@ notes
 playpen
 __pycache__
 venv
+data
+deprecated
+legal
+unittest
+build
+backup_of_data
+backup_of_unittest
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,20 @@
+# Changelog
+
+## [1.0] - 2021-03-13
+### Added
+- Initial release of Valditor Toolkot
+
+## [1.1] - 2021-04-05
+### Fixed
+- Found missing line numbers
+- Found missing reports
+- Allow data type to be a subclass of data type in property constraint
+- Handle conflicting namespaces in ontology and data
+- Reject pickle file whose Version does not match the Toolkit Version
+- Fixed regex to match @type statement in json-ld files
+
+### Changed
+- Toolkit document
+
+### Added
+- Changelog file
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # UCO-Utility-Pre-0.7.0-Validator
 
-## Description -  (Alpha Release)
+## Description -  (Beta Release  Version 1.1)
 
 The UCO/CASE Validation Toolkit provides the capability to validate JSON-LD data files against a turtle-file based ontology such as the Unified Cyber Ontology (UCO) and Cyber-Investigation Analysis Standard Expression (CASE).
 

diff --git a/UCO-CASE Validation Toolkit.pdf b/UCO-CASE Validation Toolkit.pdf
diff --git a/UCO-CASE_Validation_Toolkit.pdf b/UCO-CASE_Validation_Toolkit.pdf
diff --git a/requirements.txt b/requirements.txt
@@ -2,3 +2,4 @@
 
 ontospy
 rdflib
+lxml
diff --git a/src/casedata.py b/src/casedata.py
@@ -15,11 +15,12 @@
 import tempfile
 from ontospy import Ontospy
 import serializer
-import precondition
+from precondition import precondition, postcondition
+from context import Context
 
-VERSION = '1.0'   # Appears in the metadata when serialized
+VERSION = '1.1'   # Appears in the metadata when serialized
 
-def get_casedata(path, output_filepath=None, verbose=True, **kwargs):
+def get_casedata(path, output_filepath=None, verbose=False, **kwargs):
     '''
     If path is a serialized casedata file, deserialize it and return it.
     If path is file containing valid json-ld, ingest it.
@@ -47,8 +48,14 @@ def get_casedata(path, output_filepath=None, verbose=True, **kwargs):
 
     # If path is a serialized casedata file, deserialize it and return casedata
     try:
-        identifier, _metadata, casedata.__dict__ = serializer.deserialize(path)
+        identifier, metadata, casedata.__dict__ = serializer.deserialize(path)
         if identifier == serializer.CASEDATA:
+            if metadata['version'] != VERSION:
+                print('{} was serialized with a different version of the toolkit.  Use this command to reserialize:'.format(path))
+                print()
+                print('    serialize {}'.format(metadata['path']))
+                print()
+                raise Exception('{} was serialized with a different version of the toolkit.'.format(path))
             return casedata
     except serializer.DeserializeError:
         pass
@@ -78,6 +85,7 @@ def __init__(self):
         self.jsonld_filepath = None   # Path to json-ld file
         self.graph = None             # rdflib.Graph of json-ld data
         self.line_numbers = {}        # {node:line_number} where node is a URIRef or a BNode
+        self.bindings = []            # [(prefix, uri)] from ontospy.namespaces
 
 
     def serialize(self, output_filepath, comment):
@@ -113,17 +121,18 @@ def _read_jsonld_file(jsonld_filepath, output_filepath, verbose, **kwargs):
 
     Return:  dictionary
         {
-            'jsonld_filepath':filepath,      # Full path to json-ld file
-            'graph':rdflib_graph_obj,        # Json-ld data decomponsed to an rdflib.Graph of triples
-            'line_numbers':line_numbers_dict # {node:line_number}, where node is a URIRef or a BNode
+            'jsonld_filepath':filepath,        # Full path to json-ld file
+            'graph':rdflib_graph_obj,          # Json-ld data decomponsed to an rdflib.Graph of triples
+            'line_numbers':line_numbers_dict   # {node:line_number}, where node is a URIRef or a BNode
+            'bindings':(qualiifer:uri_string)  # List of binding tuples, e.g. ('core', 'http://unifiedcyberontology.org/core')
         }
     '''
     # Read the jsonld file  (could raise exception)
     with open(jsonld_filepath, 'r') as infile:
         text = infile.read()
 
     # Precondition it
-    preconditioned_text = precondition.precondition(text)
+    preconditioned_text = precondition(text)
 
     # If specified, save text in output_filepath
     if output_filepath:
@@ -142,11 +151,15 @@ def _read_jsonld_file(jsonld_filepath, output_filepath, verbose, **kwargs):
         temp_filepath = os.path.join(tempdirname, 'preconditioned.json')
         with open(temp_filepath, 'w') as outfile:
             outfile.write(preconditioned_text)
-        graph = Ontospy(
+        ontospy = Ontospy(
             uri_or_path=temp_filepath,
             rdf_format='jsonld',
             verbose=verbose,
-            **kwargs).rdflib_graph
+            **kwargs)
+
+    graph = ontospy.rdflib_graph
+    context = Context().populate(ontospy.namespaces)
+
 
     # If ontospy cannot read the file, it prints an error message
     # and returns an object with a zero-length graph
@@ -156,18 +169,12 @@ def _read_jsonld_file(jsonld_filepath, output_filepath, verbose, **kwargs):
 
     # Build new graph by remove the embedded line number from ontospy's graph
     # and remember the line numbers in mapping {Node:line_number}
-    graph, line_numbers_dict = precondition.postcondition(graph, json.loads(preconditioned_text)['@context'])
-
-    #for s,p,o in graph.triples((None, None, None)):
-    #    print(repr(s))
-    #    print(repr(p))
-    #    print(repr(o))
-    #    print()
-    #pprint.pprint(line_numbers_dict)
+    graph, line_numbers_dict = postcondition(graph, context)
 
     # Construct and return results
     return {
         'jsonld_filepath':jsonld_filepath,
         'graph':graph,
+        'bindings':context.bindings,
         'line_numbers':line_numbers_dict
     }
diff --git a/src/class_constraints.py b/src/class_constraints.py
@@ -10,8 +10,8 @@
 from rdflib.namespace import OWL, RDF
 from property_constraints import PropertyConstraints
 from message import OntologyError, UnsupportedFeature
-from message import pretty_uri
 from triples import get_spo_dict
+from context import Context
 
 
 
@@ -135,12 +135,6 @@ def __init__(self, onto_class_uri):
         self.onto_class_uri = onto_class_uri
         self.property_constraints_dict = {}         # {property_uri:PropertyConstraints}
 
-    def __str__(self):
-        if self.property_constraints_dict:
-            return '\n'.join([str(property_constraints) for property_constraints in self.property_constraints_dict.values()])
-        else:
-            return '<Empty>'
-
     def set_property_constraints(self, property_uri, property_constraints):
         '''
         Attach Contraint object to specified property_uri.
@@ -190,20 +184,29 @@ def get_required_properties(self):
         return required_properties
 
 
-    def describe(self):
+    def describe(self, context=None):
         '''
         Assemble and return a plain-text description of the class and
         property constraints in this object.
         '''
         lines = []
+        if context is None:
+            context = Context()
         for property_constraints in self.property_constraints_dict.values():
             lines.append(property_constraints.describe())
         for required_property in self.get_required_properties():
             lines.append('Class {}: Property {} is required'.format(
-                pretty_uri(self.onto_class_uri), required_property))
+                context.format(self.onto_class_uri), required_property))
         for forbidden_property in self.get_forbidden_properties():
             lines.append('Class {}: Property {} is forbidden'.format(
-                pretty_uri(self.onto_class_uri), forbidden_property))
+                context.format(self.onto_class_uri), forbidden_property))
         if not lines:
             lines.append('Empty')
         return '\n'.join(lines)
+
+    def __str__(self):
+        if self.property_constraints_dict:
+            return '\n'.join([str(property_constraints) for property_constraints in self.property_constraints_dict.values()])
+        else:
+            return '<Empty>'
+