Skip to content

Commit

Permalink
Toolkit 1.1 update
Browse files Browse the repository at this point in the history
  • Loading branch information
asovern-mitre committed Apr 8, 2021
1 parent bca77f7 commit 352ba36
Show file tree
Hide file tree
Showing 17 changed files with 624 additions and 252 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,10 @@ notes
playpen
__pycache__
venv
data
deprecated
legal
unittest
build
backup_of_data
backup_of_unittest
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Changelog

## [1.0] - 2021-03-13
### Added
- Initial release of Valditor Toolkot

## [1.1] - 2021-04-05
### Fixed
- Found missing line numbers
- Found missing reports
- Allow data type to be a subclass of data type in property constraint
- Handle conflicting namespaces in ontology and data
- Reject pickle file whose Version does not match the Toolkit Version
- Fixed regex to match @type statement in json-ld files

### Changed
- Toolkit document

### Added
- Changelog file
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# UCO-Utility-Pre-0.7.0-Validator

## Description - (Alpha Release)
## Description - (Beta Release Version 1.1)

The UCO/CASE Validation Toolkit provides the capability to validate JSON-LD data files against a turtle-file based ontology such as the Unified Cyber Ontology (UCO) and Cyber-Investigation Analysis Standard Expression (CASE).

Expand Down
Binary file removed UCO-CASE Validation Toolkit.pdf
Binary file not shown.
Binary file added UCO-CASE_Validation_Toolkit.pdf
Binary file not shown.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@

ontospy
rdflib
lxml
43 changes: 25 additions & 18 deletions src/casedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
import tempfile
from ontospy import Ontospy
import serializer
import precondition
from precondition import precondition, postcondition
from context import Context

VERSION = '1.0' # Appears in the metadata when serialized
VERSION = '1.1' # Appears in the metadata when serialized

def get_casedata(path, output_filepath=None, verbose=True, **kwargs):
def get_casedata(path, output_filepath=None, verbose=False, **kwargs):
'''
If path is a serialized casedata file, deserialize it and return it.
If path is file containing valid json-ld, ingest it.
Expand Down Expand Up @@ -47,8 +48,14 @@ def get_casedata(path, output_filepath=None, verbose=True, **kwargs):

# If path is a serialized casedata file, deserialize it and return casedata
try:
identifier, _metadata, casedata.__dict__ = serializer.deserialize(path)
identifier, metadata, casedata.__dict__ = serializer.deserialize(path)
if identifier == serializer.CASEDATA:
if metadata['version'] != VERSION:
print('{} was serialized with a different version of the toolkit. Use this command to reserialize:'.format(path))
print()
print(' serialize {}'.format(metadata['path']))
print()
raise Exception('{} was serialized with a different version of the toolkit.'.format(path))
return casedata
except serializer.DeserializeError:
pass
Expand Down Expand Up @@ -78,6 +85,7 @@ def __init__(self):
self.jsonld_filepath = None # Path to json-ld file
self.graph = None # rdflib.Graph of json-ld data
self.line_numbers = {} # {node:line_number} where node is a URIRef or a BNode
self.bindings = [] # [(prefix, uri)] from ontospy.namespaces


def serialize(self, output_filepath, comment):
Expand Down Expand Up @@ -113,17 +121,18 @@ def _read_jsonld_file(jsonld_filepath, output_filepath, verbose, **kwargs):
Return: dictionary
{
'jsonld_filepath':filepath, # Full path to json-ld file
'graph':rdflib_graph_obj, # Json-ld data decomponsed to an rdflib.Graph of triples
'line_numbers':line_numbers_dict # {node:line_number}, where node is a URIRef or a BNode
'jsonld_filepath':filepath, # Full path to json-ld file
'graph':rdflib_graph_obj, # Json-ld data decomponsed to an rdflib.Graph of triples
'line_numbers':line_numbers_dict # {node:line_number}, where node is a URIRef or a BNode
'bindings':(qualiifer:uri_string) # List of binding tuples, e.g. ('core', 'http://unifiedcyberontology.org/core')
}
'''
# Read the jsonld file (could raise exception)
with open(jsonld_filepath, 'r') as infile:
text = infile.read()

# Precondition it
preconditioned_text = precondition.precondition(text)
preconditioned_text = precondition(text)

# If specified, save text in output_filepath
if output_filepath:
Expand All @@ -142,11 +151,15 @@ def _read_jsonld_file(jsonld_filepath, output_filepath, verbose, **kwargs):
temp_filepath = os.path.join(tempdirname, 'preconditioned.json')
with open(temp_filepath, 'w') as outfile:
outfile.write(preconditioned_text)
graph = Ontospy(
ontospy = Ontospy(
uri_or_path=temp_filepath,
rdf_format='jsonld',
verbose=verbose,
**kwargs).rdflib_graph
**kwargs)

graph = ontospy.rdflib_graph
context = Context().populate(ontospy.namespaces)


# If ontospy cannot read the file, it prints an error message
# and returns an object with a zero-length graph
Expand All @@ -156,18 +169,12 @@ def _read_jsonld_file(jsonld_filepath, output_filepath, verbose, **kwargs):

# Build new graph by remove the embedded line number from ontospy's graph
# and remember the line numbers in mapping {Node:line_number}
graph, line_numbers_dict = precondition.postcondition(graph, json.loads(preconditioned_text)['@context'])

#for s,p,o in graph.triples((None, None, None)):
# print(repr(s))
# print(repr(p))
# print(repr(o))
# print()
#pprint.pprint(line_numbers_dict)
graph, line_numbers_dict = postcondition(graph, context)

# Construct and return results
return {
'jsonld_filepath':jsonld_filepath,
'graph':graph,
'bindings':context.bindings,
'line_numbers':line_numbers_dict
}
23 changes: 13 additions & 10 deletions src/class_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from rdflib.namespace import OWL, RDF
from property_constraints import PropertyConstraints
from message import OntologyError, UnsupportedFeature
from message import pretty_uri
from triples import get_spo_dict
from context import Context



Expand Down Expand Up @@ -135,12 +135,6 @@ def __init__(self, onto_class_uri):
self.onto_class_uri = onto_class_uri
self.property_constraints_dict = {} # {property_uri:PropertyConstraints}

def __str__(self):
if self.property_constraints_dict:
return '\n'.join([str(property_constraints) for property_constraints in self.property_constraints_dict.values()])
else:
return '<Empty>'

def set_property_constraints(self, property_uri, property_constraints):
'''
Attach Contraint object to specified property_uri.
Expand Down Expand Up @@ -190,20 +184,29 @@ def get_required_properties(self):
return required_properties


def describe(self):
def describe(self, context=None):
'''
Assemble and return a plain-text description of the class and
property constraints in this object.
'''
lines = []
if context is None:
context = Context()
for property_constraints in self.property_constraints_dict.values():
lines.append(property_constraints.describe())
for required_property in self.get_required_properties():
lines.append('Class {}: Property {} is required'.format(
pretty_uri(self.onto_class_uri), required_property))
context.format(self.onto_class_uri), required_property))
for forbidden_property in self.get_forbidden_properties():
lines.append('Class {}: Property {} is forbidden'.format(
pretty_uri(self.onto_class_uri), forbidden_property))
context.format(self.onto_class_uri), forbidden_property))
if not lines:
lines.append('Empty')
return '\n'.join(lines)

def __str__(self):
if self.property_constraints_dict:
return '\n'.join([str(property_constraints) for property_constraints in self.property_constraints_dict.values()])
else:
return '<Empty>'

Loading

0 comments on commit 352ba36

Please sign in to comment.