diff --git a/.gitignore b/.gitignore index 2ca8682..385ecc9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,9 @@ _site/ .sass-cache/ .jekyll-cache/ .jekyll-metadata +__pycache__ +build +*egg-info +.coverage +coverage.xml +dist diff --git a/MANIFEST.in b/MANIFEST.in index 9ee7a21..2b60f0b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,7 @@ graft docs graft src +include *.shex include *.rst include LICENSE include requirements.txt diff --git a/setup.cfg b/setup.cfg index 7bc0931..444cf5c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,7 +36,13 @@ install_requires= cwlprov==0.1.1 networkx==2.8 prov>=1.5.1 - rocrate~=0.7 + rocrate~=0.7 + rdflib~=6.2 + PyShEx~=0.8.1 + arcp~=0.2.1 + rdflib_shim>=1.0.3 + rocrateValidator @ git+https://github.com/ResearchObject/ro-crate-validator-py.git@v0.2.16-RC4 + importlib_resources~=5.12 [options.entry_points] console_scripts= diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py index 5567a2f..395e292 100644 --- a/src/runcrate/cli.py +++ b/src/runcrate/cli.py @@ -14,10 +14,10 @@ import sys from pathlib import Path - import click from . import __version__ +from .validator import CrateValidator from .convert import ProvCrateBuilder from .report import dump_crate_actions @@ -56,8 +56,7 @@ def cli(): help="path to a README file (should be README.md in Markdown format)", ) def convert(root, output, license, workflow_name, readme): - """\ - Convert a CWLProv RO bundle into a Workflow Run RO-Crate. + """Convert a CWLProv RO bundle into a Workflow Run RO-Crate. RO_DIR: top-level directory of the CWLProv RO """ @@ -72,6 +71,64 @@ def convert(root, output, license, workflow_name, readme): @cli.command() +@click.option("-s", "--skip-ro-crate-check", is_flag=True, help="Skip general RO-Crate validation") +@click.option("-p", "--process-run", is_flag=True, help="Validate against the Process Run Crate profile") +@click.option("-w", "--workflow-run", is_flag=True, help="Validate against the Workflow Run Crate profile") +@click.option("-P", "--provenance-run", is_flag=True, help="Validate against the Provenance Run Crate profile") +@click.option("-W", "--workflow", is_flag=True, help="Validate against the Workflow RO-Crate profile") +@click.option("-b", "--bioschemas", is_flag=True, help="Validate against Bioschemas profiles (ComputationalWorkflow FormalParameter)") +@click.option("-d", "--debug", is_flag=True, help="Enable debug output") + +@click.argument( + "crate", + metavar="CRATE", + type=click.Path(exists=True, file_okay=False, readable=True, path_type=Path), +) + +def validate(crate, skip_ro_crate_check, workflow, process_run, workflow_run, provenance_run, bioschemas, debug): + """Validate a Process/Workflow/Provenance Run Crate (experimental) + + CRATE: RO-Crate Root directory + + Unless forced (e.g. --workflow-run), the validation will use + the crate's profile(s) as indicated with conformsTo. + """ + validator = CrateValidator(crate) + if debug: + validator.debug = True + if not skip_ro_crate_check: + validator.ro_crate_check() + # TODO: Check output + + # Make sure Metadata File is readable and described + if not validator.metadata_file_check(): + return -2 + + guess_profile = not workflow and not process_run and not workflow_run and not process_run and not bioschemas + if guess_profile: + # Detect profile from conformsTo + (workflow,process_run,workflow_run,provenance_run,bioschemas) = validator._detect_profiles() + + if not workflow and not process_run and not workflow_run and not process_run: + print("Could not detect profile, check \"conformsTo\" or force profile check (e.g. --workflow-run)", file=sys.stderr) + return -1 + + if workflow: + print("Validating against Workflow profile") + validator.workflow_check() + if bioschemas: + print("Validating against Bioschemas ComputationalWorkflow profile") + validator.computationalworkflow_check() + if process_run: + print("Validating against Process Run profile") + validator.process_run_check() + if workflow_run: + print("Validating against Workflow Run profile") + validator.workflow_run_check() + if provenance_run: + print("Validating against Provenance Run profile") + validator.provenance_run_check() + return @click.argument( "crate", metavar="RO_CRATE", diff --git a/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex b/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex new file mode 100644 index 0000000..d6e0caf --- /dev/null +++ b/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex @@ -0,0 +1,197 @@ +# Copyright 2023 The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## ShEX shape for https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE and +# https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE +# following MUST requirements ("marginality: minimum"), with +# all SHOULD/COULD (recommended/optional) marked as optional (*/?) +# +# NOTE: As this shape is meant to be used with a workflow-crate, it assumes the +# workflow is mainEntity of an RO-Crate root, which is the start focus. That also +# means some contextual entities are assumed to be explicit with a @type and s:name +# rather than just a IRI. + +## FIXME: Update namespace +PREFIX p: +PREFIX xsd: +PREFIX s: +PREFIX foaf: +PREFIX dct: +PREFIX bioschemas: +PREFIX rdf: +PREFIX sh: + +start = @p:Root + +p:Root { + s:mainEntity IRI @p:MainWorkflow; +} +p:MainWorkflow extra rdf:type { + ## Marginality: Minimum + + a [bioschemas:ComputationalWorkflow]; + dct:conformsTo []; + s:creator @p:PersonOrOrg+; + # NOTE: We won't be picky about xsd type even if profile insists Date or DateTime + s:dateCreated LITERAL; + + # NOTE: interpret profile liberally, in/out can be 0 if no input/output, otherwise required + s:input @p:FormalParameter*; + s:output @p:FormalParameter*; + + ( s:licence LITERAL | + s:licence { + # Require contextual entity in RO-Crate + a [s:Creativework]; + s:name LITERAL; + } + )+; + s:name LITERAL; + + ( s:programmingLanguage LITERAL | + s:programmingLanguage { + # Require contextual entity in RO-Crate + a [s:ComputerLanguage]; + s:name LITERAL; + } + ); + + s:sdPublisher @p:PersonOrOrg; + ( # Depends on context + s:url IRI| + s:url xsd:string; + ); + + s:version LITERAL; + + ## Marginality: Recommended + + ( s:citation LITERAL | + s:citation { + # FIXME: Won't detect subtypes like s:ScholarlyArticle without rdfs inferencing + a [s:CreativeWork]; +# s:name xsd:string + }; + )? // sh:severity sh:Info // sh:message "It is RECOMMENDED to include a citation"; + + s:contributor @p:PersonOrOrg*; + + ( s:creativeWorkStatus LITERAL | + s:creativeWorkStatus IRI; + )?; + + ( s:documentation { + a [s:CreativeWork]; + } | + s:documentation IRI; + ); + + s:funding { + a [s:Grant]; + }*; + + s:hasPart { + # Extended types to include tools + ( a [s:CreativeWork] | + a [s:SoftwareApplication] | + a [s:SoftwareSourceCode] + ) ; + }*; + + ( + s:isBasedOn { a [s:CreativeWork] } | + s:isBasedOn { a [s:Product] } | + s:isBasedOn IRI + )?; + + s:keywords LITERAL?; + + s:maintainer @p:PersonOrOrg*; + + s:producer @p:PersonOrOrg*; + + s:publisher @p:PersonOrOrg*; + + s:runtimePlatform LITERAL?; + ( + s:softwareRequirements LITERAL | + s:softwareRequirements IRI + )*; + + s:targetProduct { + a [s:SoftwareApplication]; + }*; + + ## Marginality: Optional + + s:alternateName Literal*; + s:conditionsOfAccess Literal?; + # FIXME: ISO8601 regex pattern without forcing xsd:dateTime/xsd:date/xsd:timestamp etc. declaration + s:dateModified LITERAL?; + s:datePublished LITERAL?; + + ( + s:encodingFormat LITERAL | + s:encodingFormat IRI + )?; + + $p:identifier ( + s:identifier { + a [s:PropertyValue]; + } | + s:identifier xsd:string; + s:identifier IRI; + )*; + + ( + s:image { + a [s:ImageObject]; + } | + s:image IRI; + )?; +} + +p:PersonOrOrg extra rdf:type { + # As we're in RO-Crate, the contextual entity is required + (a [s:Person] | + a [s:Organization] + ); + s:name LITERAL; + } + +p:FormalParameter { + a [bioschemas:FormalParameter]; + dct:conformsTo []; + s:name LITERAL; + s:additionalType IRI*; + s:description LITERAL?; + ( + s:encodingFormat LITERAL | + s:encodingFormat IRI; + )?; + ( + s:defaultValue Literal | + s:defaultValue IRI; + )?; + #&identifier*; + ( + s:identifier { + a [s:PropertyValue]; + } | + s:identifier xsd:string; + s:identifier IRI; + )*; + s:valueRequired xsd:boolean?; +} + diff --git a/src/runcrate/shex/process-crate-0.1.shex b/src/runcrate/shex/process-crate-0.1.shex new file mode 100644 index 0000000..5cac75c --- /dev/null +++ b/src/runcrate/shex/process-crate-0.1.shex @@ -0,0 +1,149 @@ +# Copyright 2023 The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## ShEx shape for https://w3id.org/ro/wfrun/process/0.1 + +## FIXME: Update namespace +PREFIX p: +PREFIX xsd: +PREFIX s: +PREFIX foaf: +PREFIX dct: +PREFIX bioschemas: +PREFIX sh: + + +start = @p:Root + +p:Process { + ( a [s:CreateAction] | + a [s:ActivateAction] | + a [s:UpdateAction] + ); + s:name xsd:string; + s:description xsd:string; + s:endTime xsd:string; + s:startTime xsd:string?; + s:instrument @p:SoftwareApplication; + s:agent @p:Agent; + ( + s:object @p:File | + s:object @p:Dataset | + s:object @p:Collection | + s:object @p:PropertyValue + )?; + ( + s:result @p:File | + s:result @p:Dataset | + s:object @p:PropertyValue + ); + # TODO: Do we permit literals here? + s:actionStatus [s:CompletedActionStatus "CompletedActionStatus" s:FailedActionStatus "FailedActionStatus"]?; + ( # Error, if present, can be string literal or entity + s:error LITERAL | + s:error NonLiteral + )?; +} + + +p:Agent { + a [s:Person]; + s:name xsd:string; +} + +p:SoftwareApplication EXTRA a { + ( a [s:SoftwareApplication] | + a [s:SoftwareSourceCode ] | + a [bioschemas:ComputationalWorkflow] + ); + s:name xsd:string; + ( s:url xsd:string | + s:url IRI + ); + ( s:version xsd:string | + s:softwareVersion xsd:string + ); +} + +p:PropertyValue { + a [s:PropertyValue ]; + s:name xsd:string; + s:value Literal; +} + +p:File { + a [s:MediaObject]; + a IRI*; + # Either part of another dataset or the root + ( ^s:hasPart @p:Root | + ^s:hasPart @p:Dataset ; + ); + s:alternateName xsd:string?; + ( + s:encodingFormat xsd:string | + s:encodingFormat IRI; + ); + s:description xsd:string?; +} + + +# A multi-file object +p:Collection { + a [s:Collection]; + # FIXME: Do collections need a name? + s:name xsd:string?; + ^s:mentions @p:Root; + ( + s:mainEntity @p:Dataset | + s:mainEntity @p:File + )?; + ( s:hasPart @p:Dataset | + s:hasPart @p:File; + ); +} + +# An aggregated (potentially nested) dataset +p:Dataset { + a [s:Dataset]; + s:name xsd:string; + ( ^s:hasPart @p:Root | + ^s:hasPart @p:Dataset + ); +} + +# FIXME: Need a skeleton Root node for ^s:hasPart back-references +# to avoid recursion? + +## NOTE: EXTRA should be enabled to allow extension of the profile, +# but may cause unneccessary recursions and confusing errors +# when something deeper does not match. + +p:Root EXTRA s:mentions { + ^s:about @p:MetadataFile; + dct:conformsTo []; + dct:conformsTo IRI*; + s:mentions @p:Process+; +} + +# https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#ro-crate-metadata-file-descriptor +p:MetadataFile EXTRA dct:conformsTo { + a [s:CreativeWork]; + dct:conformsTo @p:ROCrateSpec; +} + +# https://www.researchobject.org/ro-crate/1.1/appendix/relative-uris.html#establishing-absolute-uri-for-ro-crate-root +p:ROCrateSpec IRI + /^https:\/\/w3id.org\/ro\/crate\// + { +} \ No newline at end of file diff --git a/src/runcrate/shex/provenance-crate-0.1.shex b/src/runcrate/shex/provenance-crate-0.1.shex new file mode 100644 index 0000000..fefcf5c --- /dev/null +++ b/src/runcrate/shex/provenance-crate-0.1.shex @@ -0,0 +1,72 @@ +# Copyright 2023 The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## ShEX shape for https://w3id.org/ro/wfrun/provenance/0.1 + +## TODO: +# This is work in progress and does not quite validate yet + +## FIXME: Update namespace +PREFIX p: +PREFIX xsd: +PREFIX s: +PREFIX foaf: +PREFIX dct: +PREFIX bioschemas: + +start = @p:Root + +p:Root extra dct:conformsTo s:mentions { + s:mainEntity @p:MainWorkflow; + dct:conformsTo []; + dct:conformsTo []; + dct:conformsTo []; + dct:conformsTo []; + s:mentions @p:MainProcess+; +} + +p:MainProcess { + s:instrument @p:MainWorkflow; +} + +p:MainWorkflow { + a [s:MediaObject]; + a [s:SoftwareSourceCode]; + a [bioschemas:ComputationalWorkflow]; + a [s:HowTo]; + s:hasPart @p:SoftwareApplication+; + s:step @p:HowToStep; +} + +p:HowToStep { + a [s:HowToStep]; + s:workExample @p:SoftwareApplication+; + (s:position xsd:integer MinInclusive 0 | + s:position xsd:string /[1-9][0-9]*/); +} + +p:SoftwareApplication EXTRA a { + ( a [s:SoftwareApplication] | + a [s:SoftwareSourceCode ] | + a [bioschemas:ComputationalWorkflow] + ); + s:name xsd:string; + ( s:url xsd:string | + s:url IRI + ); + ( s:version xsd:string | + s:softwareVersion xsd:string + ); +} + diff --git a/src/runcrate/shex/workflow-crate-1.0.shex b/src/runcrate/shex/workflow-crate-1.0.shex new file mode 100644 index 0000000..83e6541 --- /dev/null +++ b/src/runcrate/shex/workflow-crate-1.0.shex @@ -0,0 +1,78 @@ +# Copyright 2023 The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## ShEX shape for https://w3id.org/workflowhub/workflow-ro-crate/1.0 + +## FIXME: Update namespace +PREFIX p: +PREFIX xsd: +PREFIX s: +PREFIX foaf: +PREFIX dct: +PREFIX bioschemas: + +start = @p:Root + +p:Root { + # Fails as WorkflowHub also adds ro-crate-preview.html with s:about + #^s:about @p:MetadataFile; + s:mainEntity @p:MainWorkflow; + ( + s:licence IRI | + s:licence xsd:string + ) +} + + +# https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#ro-crate-metadata-file-descriptor +p:MetadataFile EXTRA dct:conformsTo a { + a [s:CreativeWork]; + # FIXME: Update Workflow RO-Crate profile to move conformsTo to root + dct:conformsTo []; + dct:conformsTo [] +} + +p:MainWorkflow extra a { + a [s:MediaObject]; + a [s:SoftwareSourceCode]; + a [bioschemas:ComputationalWorkflow]; + # Must be data entity in crate + ( ^s:hasPart @p:Root | + ^s:hasPart @p:Dataset; + ); + # FIXME: Profile does not require the contextual entity? + #s:programmingLanguage @p:ComputerLanguage; + s:programmingLanguage IRI; + s:subjectOf @p:CWLDescription?; + s:image @p:WorkflowDiagram?; +} + +# An aggregated (potentially nested) dataset +p:Dataset extra a { + a [s:Dataset]; + s:name xsd:string; +} + + +p:CWLDescription { + a [s:MediaObject]; + a [s:SoftwareSourceCode]; + a [s:HowTo]; + s:programmingLanguage []; +} + +p:WorkflowDiagram { + a [s:MediaObject]; + a [s:ImageObject]; +} diff --git a/src/runcrate/shex/workflow-run-crate-0.1.shex b/src/runcrate/shex/workflow-run-crate-0.1.shex new file mode 100644 index 0000000..286dfd7 --- /dev/null +++ b/src/runcrate/shex/workflow-run-crate-0.1.shex @@ -0,0 +1,77 @@ +# Copyright 2023 The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## ShEX shape for https://w3id.org/ro/wfrun/workflow/0.1 + +## TODO: +# This is work in progress and does not quite validate yet + +## FIXME: Update namespace +PREFIX p: +PREFIX xsd: +PREFIX s: +PREFIX foaf: +PREFIX dct: +PREFIX bioschemas: + +start = @p:Root + +p:Root { + s:mainEntity @p:MainWorkflow; + dct:conformsTo []; + dct:conformsTo []; + dct:conformsTo []; + s:mentions @p:Process+; +} + +p:Process { + s:instrument @p:MainWorkflow; +} + +p:MainWorkflow extra a { + a [bioschemas:ComputationalWorkflow]; + # Must be data entity in crate + ( ^s:hasPart @p:Dataset | + ^s:hasPart @p:Root; + ); + bioschemas:input p:FormalParameter*; + bioschemas:output p:FormalParameter*; +} + +p:FormalParameter { + a [ bioschemas:FormalParameter ] + ## TODO: link with workExample etc. +} + +# An aggregated (potentially nested) dataset +p:Dataset extra a { + a [s:Dataset]; + s:name xsd:string; + ( ^s:hasPart @p:Dataset | + ^s:hasPart @p:Root; + ); +} + + +p:CWLDescription extra a { + a [s:MediaObject]; + a [s:SoftwareSourceCode]; + a [s:HowTo]; + s:programmingLanguage []; +} + +p:WorkflowDiagram { + a [s:MediaObject]; + a [s:ImageObject]; +} diff --git a/src/runcrate/validator.py b/src/runcrate/validator.py new file mode 100644 index 0000000..d5467b2 --- /dev/null +++ b/src/runcrate/validator.py @@ -0,0 +1,185 @@ +# Copyright 2023 The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path +from pyshex.evaluate import evaluate +from rdflib import Graph, Dataset, ConjunctiveGraph, Namespace, URIRef, DCTERMS +from arcp import arcp_random +from rocrate.rocrate import ROCrate +import rocrateValidator.validate + +from importlib_resources import files +# FIXME: For Python 3.10++, below code is sufficient: +#from importlib.resources import files + +import sys + +SCHEMA = Namespace("http://schema.org/") + +class ValidationError(TypeError): + """A type of validation""" + +class RootEntityError(ValidationError): + """Can't find the root data entity""" + +class CrateValidator(): + def __init__(self, root): + self.root = isinstance(root, Path) and root or Path(root) + self.base = URIRef(arcp_random()) + self.debug = False + # Caches + self._crate = None + self._graph = None + self._rootIRI = None + + @property + def crate(self): + if self._crate: + return self._crate + self._crate = ROCrate(self.root) + return self._crate + + def _detect_profiles(self): + """Auto-detect Run Crate profile based on conformsTo + + Return a tuple (workflow, process_run, workflow_run, provenance_run) with + the corresponding detected profiles URIs. Profiles not detected are + represented as ``None`` in the tuple. + """ + profiles = self.crate.root_dataset.get("conformsTo", []) + if not isinstance(profiles, list): + profiles = [profiles] + + workflow, process_run, workflow_run, provenance_run,bioschemas = (None,)*5 + for p in profiles: + # Check profiles on root data set + if self.debug: + print("Detected profile {}".format(p.id)) + if p.id.startswith("https://w3id.org/workflowhub/workflow-ro-crate/"): + # FIXME: Should we also detect this on legacy metadata file entity? + # (Note: wfrun/workflow profile require above on Dataset) + workflow = p + if p.id.startswith("https://w3id.org/ro/wfrun/process/"): + process_run = p + if p.id.startswith("https://w3id.org/ro/wfrun/workflow/"): + workflow_run = p + if p.id.startswith("https://w3id.org/ro/wfrun/provenance/"): + provenance_run = p + if self.crate.mainEntity: + # Detect ComputationalWorkflow profile on main entity + profiles = self.crate.mainEntity.get("conformsTo", []) + if not isinstance(profiles, list): + profiles = [profiles] + for p in profiles: + if p.id.startswith("https://bioschemas.org/profiles/ComputationalWorkflow/"): + bioschemas = p + + return (workflow, process_run, workflow_run, provenance_run, bioschemas) + + def ro_crate_check(self): + print("Validating RO-Crate {}".format(self.root)) + v = rocrateValidator.validate.validate(self.root) + # FIXME: Avoid extracted code below from v.validator() + # to get programmatic access to results + for method in v.functions: + result = method(v.tar_file, v.extension) + if result.code == 0: + print(" OK {}".format(result.NAME)) + else: + print("ERROR {}:\n {}".format(result.NAME, result.message)) + self.metadata_file_check() + + def metadata_file_check(self): + try: + return str(self.rootIRI) + except IOError as e: + print(str(e), file=sys.stderr) + return False + except RootEntityError: + print("Can't find Metadata File Descriptor, see https://www.researchobject.org/ro-crate/1.1/root-data-entity.html") + return False + + @property + def metadataFile(self) -> Path: + p = self.root / "ro-crate-metadata.json" + if not p.is_file(): + raise IOError("Can't find RO-Crate Metadata file {}".format(p)) + return p + + @property + def graph(self): + if self._graph: + return self._graph + g = Graph() # assuming default graph + metadataIRI = str(self.base) + "ro-crate-metadata.json" + if self.debug: + print("Parsing RDF " + metadataIRI) + g.parse(self.metadataFile, format="application/ld+json", base=metadataIRI) + self._graph = g + if self.debug: + print("Parsed {} triples, {} subjects".format(len(g), len(set(g.subjects())))) + return self._graph + + @property + def rootIRI(self): + """Identify IRI of the RO-Crate Root entity""" + if self._rootIRI: + return self._rootIRI + # Algorithm for finding root data entity + # https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#finding-the-root-data-entity + for metadataFile,_,conformsTo in self.graph.triples((None, DCTERMS.conformsTo, None)): + if conformsTo.startswith("https://w3id.org/ro/crate/"): + if (self.debug): + print("Checking Metadata File Descriptor {}".format(metadataFile)) + print("RO-Crate specification {}".format(conformsTo)) + roots = set(self.graph.objects(metadataFile, SCHEMA.about)) + for r in roots: + self._rootIRI = r + if (self.debug): + print("Identified RO-Crate root: {}".format(r)) + return self._rootIRI + # Still here? Bad luck.. + raise RootEntityError("Can't find RO-Crate root") + + def _load_shex(self, profile): + return files('runcrate.shex').joinpath(profile).read_text() + + def _validate_shex(self, profile): + shex = self._load_shex(profile) + rslt, reason = evaluate(self.graph, shex, self.rootIRI) + if rslt: + print("CONFORMS") + else: + print("DOES NOT CONFORM") + if reason: + print(reason) + return rslt + + def process_run_check(self): + self._validate_shex("process-crate-0.1.shex") + + def computationalworkflow_check(self): + self._validate_shex("bioschemas-computationalworkflow-1.0.shex") + + def workflow_check(self): + self._validate_shex("workflow-crate-1.0.shex") + + def workflow_run_check(self): + # TODO: Finish + self._validate_shex("workflow-run-crate-0.1.shex") + + def provenance_run_check(self): + # TODO: Implement + self._validate_shex("provenance-crate-0.1.shex") + diff --git a/tests/data/profile-examples/process-ex1/ro-crate-metadata.json b/tests/data/profile-examples/process-ex1/ro-crate-metadata.json new file mode 100644 index 0000000..3417be1 --- /dev/null +++ b/tests/data/profile-examples/process-ex1/ro-crate-metadata.json @@ -0,0 +1,63 @@ +{ "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "conformsTo": {"@id": "https://w3id.org/ro/wfrun/process/0.1"}, + "hasPart": [ + {"@id": "pics/2017-06-11%2012.56.14.jpg"}, + {"@id": "pics/sepia_fence.jpg"} + ], + "mentions": {"@id": "#SepiaConversion_1"}, + "name": "My Pictures" + }, + { "@id": "https://w3id.org/ro/wfrun/process/0.1", + "@type": "CreativeWork", + "name": "Process Run Crate", + "version": "0.1" + }, + { + "@id": "https://www.imagemagick.org/", + "@type": "SoftwareApplication", + "url": "https://www.imagemagick.org/", + "name": "ImageMagick", + "softwareVersion": "6.9.7-4" + }, + { + "@id": "#SepiaConversion_1", + "@type": "CreateAction", + "name": "Convert dog image to sepia", + "description": "convert -sepia-tone 80% test_data/sample/pics/2017-06-11\\ 12.56.14.jpg test_data/sample/pics/sepia_fence.jpg", + "endTime": "2018-09-19T17:01:07+10:00", + "instrument": {"@id": "https://www.imagemagick.org/"}, + "object": {"@id": "pics/2017-06-11%2012.56.14.jpg"}, + "result": {"@id": "pics/sepia_fence.jpg"}, + "agent": {"@id": "https://orcid.org/0000-0001-9842-9718"} + }, + { + "@id": "pics/2017-06-11%2012.56.14.jpg", + "@type": "File", + "description": "Original image", + "encodingFormat": "image/jpeg", + "name": "2017-06-11 12.56.14.jpg (input)" + }, + { + "@id": "pics/sepia_fence.jpg", + "@type": "File", + "description": "The converted picture, now sepia-colored", + "encodingFormat": "image/jpeg", + "name": "sepia_fence (output)" + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes" + } +] +} \ No newline at end of file diff --git a/tests/data/profile-examples/process-ex1/ro-crate-metadata.ttl b/tests/data/profile-examples/process-ex1/ro-crate-metadata.ttl new file mode 100644 index 0000000..037feba --- /dev/null +++ b/tests/data/profile-examples/process-ex1/ro-crate-metadata.ttl @@ -0,0 +1,35 @@ + . + "convert -sepia-tone 80% test_data/sample/pics/2017-06-11\\ 12.56.14.jpg test_data/sample/pics/sepia_fence.jpg" . + "2018-09-19T17:01:07+10:00" . + . + "Convert dog image to sepia" . + "CompletedActionStatus" . + . + . + . + . + . + . + . + "My Pictures" . + . + "Original image" . + "image/jpeg" . + "2017-06-11 12.56.14.jpg (input)" . + . + "The converted picture, now sepia-colored" . + "image/jpeg" . + "sepia_fence (output)" . + . + . + . + . + "Stian Soiland-Reyes" . + . + "Process Run Crate" . + "0.1" . + . + "ImageMagick" . + "6.9.7-4" . + "https://www.imagemagick.org/" . + .