From bcef3011e35536859bff9e7ce70288fb35ad8399 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 15:22:18 +0000 Subject: [PATCH 01/28] a preliminary ShEx profile for Process Run Crate --- shex/process-must.shex | 123 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 shex/process-must.shex diff --git a/shex/process-must.shex b/shex/process-must.shex new file mode 100644 index 0000000..0bbc696 --- /dev/null +++ b/shex/process-must.shex @@ -0,0 +1,123 @@ +PREFIX p: +PREFIX xsd: +PREFIX s: +PREFIX foaf: +PREFIX dct: +PREFIX bioschemas: + +start = @p:Root + +p:Process { + ( a [s:CreateAction] | + a [s:ActivateAction] | + a [s:UpdateAction] + ); + s:name xsd:string; + s:description xsd:string; + s:endTime xsd:string; + s:startTime xsd:string?; + s:instrument @p:SoftwareApplication; + s:agent @p:Agent; + ( + s:object @p:File | + s:object @p:Dataset | + s:object @p:Collection | + s:object @p:PropertyValue + )?; + ( + s:result @p:File | + s:result @p:Dataset | + s:object @p:PropertyValue + ); + # TODO: Do we permit literals here? + s:actionStatus [s:CompletedActionStatus "CompletedActionStatus" s:FailedActionStatus "FailedActionStatus"]?; + ( # Error, if present, can be string literal or entity + s:error LITERAL | + s:error NonLiteral + )?; +} + + +p:Agent { + a [s:Person]; + s:name xsd:string; +} + +p:SoftwareApplication EXTRA a { + ( a [s:SoftwareApplication] | + a [s:SoftwareSourceCode ] | + a [bioschema:ComputationalWorkflow] + ); + s:name xsd:string; + ( s:url xsd:string | + s:url IRI + ); + ( s:version xsd:string | + s:softwareVersion xsd:string + ); +} + +p:PropertyValue { + a [s:PropertyValue ]; + s:name xsd:string; + s:value Literal; +} + +p:File EXTRA a { + a [s:MediaObject]; + # Either part of another dataset or the root + ( ^s:hasPart @p:Dataset | + ^s:hasPart @p:Root; + ); + s:alternateName xsd:string?; + ( + s:encodingFormat xsd:string | + s:encodingFormat IRI; + ); + s:description xsd:string?; +} + + +# A multi-file object +p:Collection { + a [s:Collection]; + # FIXME: Do collections need a name? + s:name xsd:string?; + ^s:mentions @p:Root; + ( + s:mainEntity @p:Dataset | + s:mainEntity @p:File + )?; + ( s:hasPart @p:Dataset | + s:hasPart @p:File; + ); +} + +# An aggregated (potentially nested) dataset +p:Dataset { + a [s:Dataset]; + s:name xsd:string; + ( ^s:hasPart @p:Dataset | + ^s:hasPart @p:Root; + ); +} + +# FIXME: Need a skeleton Root node for ^s:hasPart back-references +# to avoid recursion? + +p:Root { # EXTRA s:mentions dct:conformsTo s:hasPart s:about { + ^s:about @p:MetadataFile; + dct:conformsTo []; + s:mentions @p:Process+; + s:hasPart @p:File+; +} + +p:MetadataFile { + a [s:CreativeWork]; + dct:conformsTo @p:ROCrateSpec; +} + +p:ROCrateSpec IRI + /^https:\/\/w3id.org\/ro\/crate\// + { +} \ No newline at end of file From 1e7a79a501b50b0f27b2ca9b16a63ff373884cca Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 15:23:49 +0000 Subject: [PATCH 02/28] Example from https://w3id.org/ro/wfrun/process/0.1 --- .../process-ex1/ro-crate-metadata.json | 63 +++++++++++++++++++ .../process-ex1/ro-crate-metadata.ttl | 35 +++++++++++ 2 files changed, 98 insertions(+) create mode 100644 tests/data/profile-examples/process-ex1/ro-crate-metadata.json create mode 100644 tests/data/profile-examples/process-ex1/ro-crate-metadata.ttl diff --git a/tests/data/profile-examples/process-ex1/ro-crate-metadata.json b/tests/data/profile-examples/process-ex1/ro-crate-metadata.json new file mode 100644 index 0000000..3417be1 --- /dev/null +++ b/tests/data/profile-examples/process-ex1/ro-crate-metadata.json @@ -0,0 +1,63 @@ +{ "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "conformsTo": {"@id": "https://w3id.org/ro/wfrun/process/0.1"}, + "hasPart": [ + {"@id": "pics/2017-06-11%2012.56.14.jpg"}, + {"@id": "pics/sepia_fence.jpg"} + ], + "mentions": {"@id": "#SepiaConversion_1"}, + "name": "My Pictures" + }, + { "@id": "https://w3id.org/ro/wfrun/process/0.1", + "@type": "CreativeWork", + "name": "Process Run Crate", + "version": "0.1" + }, + { + "@id": "https://www.imagemagick.org/", + "@type": "SoftwareApplication", + "url": "https://www.imagemagick.org/", + "name": "ImageMagick", + "softwareVersion": "6.9.7-4" + }, + { + "@id": "#SepiaConversion_1", + "@type": "CreateAction", + "name": "Convert dog image to sepia", + "description": "convert -sepia-tone 80% test_data/sample/pics/2017-06-11\\ 12.56.14.jpg test_data/sample/pics/sepia_fence.jpg", + "endTime": "2018-09-19T17:01:07+10:00", + "instrument": {"@id": "https://www.imagemagick.org/"}, + "object": {"@id": "pics/2017-06-11%2012.56.14.jpg"}, + "result": {"@id": "pics/sepia_fence.jpg"}, + "agent": {"@id": "https://orcid.org/0000-0001-9842-9718"} + }, + { + "@id": "pics/2017-06-11%2012.56.14.jpg", + "@type": "File", + "description": "Original image", + "encodingFormat": "image/jpeg", + "name": "2017-06-11 12.56.14.jpg (input)" + }, + { + "@id": "pics/sepia_fence.jpg", + "@type": "File", + "description": "The converted picture, now sepia-colored", + "encodingFormat": "image/jpeg", + "name": "sepia_fence (output)" + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes" + } +] +} \ No newline at end of file diff --git a/tests/data/profile-examples/process-ex1/ro-crate-metadata.ttl b/tests/data/profile-examples/process-ex1/ro-crate-metadata.ttl new file mode 100644 index 0000000..037feba --- /dev/null +++ b/tests/data/profile-examples/process-ex1/ro-crate-metadata.ttl @@ -0,0 +1,35 @@ + . + "convert -sepia-tone 80% test_data/sample/pics/2017-06-11\\ 12.56.14.jpg test_data/sample/pics/sepia_fence.jpg" . + "2018-09-19T17:01:07+10:00" . + . + "Convert dog image to sepia" . + "CompletedActionStatus" . + . + . + . + . + . + . + . + "My Pictures" . + . + "Original image" . + "image/jpeg" . + "2017-06-11 12.56.14.jpg (input)" . + . + "The converted picture, now sepia-colored" . + "image/jpeg" . + "sepia_fence (output)" . + . + . + . + . + "Stian Soiland-Reyes" . + . + "Process Run Crate" . + "0.1" . + . + "ImageMagick" . + "6.9.7-4" . + "https://www.imagemagick.org/" . + . From 495a6ac8171512b8e170ad0b1d99e16b91881a6d Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 15:24:16 +0000 Subject: [PATCH 03/28] PySheX dependencies --- setup.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.cfg b/setup.cfg index 26bd386..634a890 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,6 +37,9 @@ install_requires= networkx==2.8 prov>=1.5.1 rocrate~=0.7 + # Note: PySheX may fail with 6.0.1 or later.. + rdflib=ls=6.0.0 + PyShEx~=0.7.6 [options.entry_points] console_scripts= From 089adc23ecdc466703a46f744e8efc1468438e34 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 15:26:13 +0000 Subject: [PATCH 04/28] test command --- shex/process-must.shex | 3 +++ 1 file changed, 3 insertions(+) diff --git a/shex/process-must.shex b/shex/process-must.shex index 0bbc696..ffecd04 100644 --- a/shex/process-must.shex +++ b/shex/process-must.shex @@ -1,3 +1,6 @@ +## Test with +# shexeval -fn "arcp://uuid,1482c1de-4383-4e9e-8cca-ddd922bdf8ac/" tests/data/profile-examples/process-ex1/ro-crate-metadata.ttl shex/process-must.shex + PREFIX p: PREFIX xsd: PREFIX s: From 59fe1d513194d2eb475c11d49f74cfe277e9cf8a Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 15:27:18 +0000 Subject: [PATCH 05/28] process-crate-0.1 --- shex/{process-must.shex => process-crate-0.1.shex} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename shex/{process-must.shex => process-crate-0.1.shex} (100%) diff --git a/shex/process-must.shex b/shex/process-crate-0.1.shex similarity index 100% rename from shex/process-must.shex rename to shex/process-crate-0.1.shex From 820cf1b61f2b6a6180c61dc343988e039e4d20cd Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 15:52:04 +0000 Subject: [PATCH 06/28] correct command line --- shex/process-crate-0.1.shex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shex/process-crate-0.1.shex b/shex/process-crate-0.1.shex index ffecd04..2eecd16 100644 --- a/shex/process-crate-0.1.shex +++ b/shex/process-crate-0.1.shex @@ -1,5 +1,5 @@ ## Test with -# shexeval -fn "arcp://uuid,1482c1de-4383-4e9e-8cca-ddd922bdf8ac/" tests/data/profile-examples/process-ex1/ro-crate-metadata.ttl shex/process-must.shex +# shexeval -fn "arcp://uuid,1482c1de-4383-4e9e-8cca-ddd922bdf8ac/" tests/data/profile-examples/process-ex1/ro-crate-metadata.ttl shex/process-crate-0.1.shex PREFIX p: PREFIX xsd: From 0a443cf7ef36772f27b624c02383e35a09c1fa39 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 15:58:00 +0000 Subject: [PATCH 07/28] newer PyShEx --- setup.cfg | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index 634a890..47113eb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,10 +36,9 @@ install_requires= cwlprov==0.1.1 networkx==2.8 prov>=1.5.1 - rocrate~=0.7 - # Note: PySheX may fail with 6.0.1 or later.. - rdflib=ls=6.0.0 - PyShEx~=0.7.6 + rocrate~=0.7 + rdflib~=6.2 + PyShEx~=0.8.1 [options.entry_points] console_scripts= From 4a0004e579201451a843b315f70523b7a13dd7d0 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 17:04:57 +0000 Subject: [PATCH 08/28] basic RO-Crate validation --- setup.cfg | 3 ++ src/runcrate/cli.py | 43 ++++++++++++++++++++++++-- src/runcrate/validator.py | 64 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 src/runcrate/validator.py diff --git a/setup.cfg b/setup.cfg index 47113eb..06b9bfd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -39,6 +39,9 @@ install_requires= rocrate~=0.7 rdflib~=6.2 PyShEx~=0.8.1 + arcp~=0.2.1 + rocrateValidator==0.2.15 + pytest # FIXME: strangely needed by rocrateValidator [options.entry_points] console_scripts= diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py index 7fa8119..1ab18a6 100644 --- a/src/runcrate/cli.py +++ b/src/runcrate/cli.py @@ -12,11 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys from pathlib import Path - import click from . import ProvCrateBuilder +from .validator import CrateValidator @click.group() @@ -53,7 +54,8 @@ def cli(): help="path to a README file (should be README.md in Markdown format)", ) def convert(root, output, license, workflow_name, readme): - """\ + """Convert workflow run outputs to Workflow Run Crate + RO_DIR: top-level directory of the CWLProv RO """ if not output: @@ -66,5 +68,42 @@ def convert(root, output, license, workflow_name, readme): crate.write(output) +@cli.command() +@click.option("-s", "--skip-ro-crate-check", is_flag=True, help="Skip general RO-Crate validation") +@click.option("-p", "--process-run", is_flag=True, help="Validate against the Process Run Crate profile") +@click.option("-w", "--workflow-run", is_flag=True, help="Validate against the Workflow Run Crate profile") +@click.option("-P", "--provenance-run", is_flag=True, help="Validate against the Provenance Run Crate profile") +@click.argument( + "crate", + metavar="CRATE", + type=click.Path(exists=True, file_okay=False, readable=True, path_type=Path), +) + +def validate(crate, skip_ro_crate_check, process_run, workflow_run, provenance_run): + """Validate a Process/Workflow/Provenance Run Crate + + CRATE: RO-Crate Root directory + + Unless forced (e.g. --workflow-run), the validation will use + the crate's profile(s) as indicated with conformsTo. + """ + validator = CrateValidator(crate) + if not skip_ro_crate_check: + validator.ro_crate_check() + if not process_run and not workflow_run and not process_run: + # Detect profile from conformsTo + (process_run,workflow_run,provenance_run) = validator._detect_profiles() + + print("Validating {}".format(crate)) + if process_run: + print(validator.process_run_check()) + elif workflow_run: + print(validator.workflow_run_check()) + elif provenance_run: + print(validator.provenance_run_check()) + else: + print("Could not detect profile, check \"conformsTo\" or force profile check (e.g. --workflow-run)", file=sys.stderr) + return -1 + if __name__ == '__main__': cli() diff --git a/src/runcrate/validator.py b/src/runcrate/validator.py new file mode 100644 index 0000000..4730e67 --- /dev/null +++ b/src/runcrate/validator.py @@ -0,0 +1,64 @@ +# Copyright 2023 The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pyshex.evaluate import evaluate +from rdflib import Graph, Namespace +from arcp import arcp_random +from rocrate.rocrate import ROCrate +import rocrateValidator.validate + + +class CrateValidator(): + def __init__(self, root): + self.root = root + self.base = arcp_random() + self.crate = ROCrate(root) + + def _detect_profiles(self): + """Auto-detect Run Crate profile based on conformsTo + + Return a tuple (process_run, workflow_run, provenance_run) with + the corresponding detected profiles URIs. Profiles not detected are + represented as ``None`` in the tuple. + """ + profiles = self.crate.root_dataset.get("conformsTo", []) + if not isinstance(profiles, list): + profiles = [profiles] + + process_run, workflow_run, provenance_run = (None,)*3 + for p in profiles: + if p.id.startswith("https://w3id.org/ro/wfrun/process/"): + process_run = p + if p.id.startswith("https://w3id.org/ro/wfrun/workflow/"): + workflow_run = p + if p.id.startswith("https://w3id.org/ro/wfrun/provenance/"): + provenance_run = p + # TODO: Also check for Workflow profile stand-alone + return (process_run, workflow_run, provenance_run) + + def ro_crate_check(self): + v = rocrateValidator.validate.validate(self.root) + # FIXME upstream: This does an ugly JSON print instead of returning + # the validation result + v.validator() + + def process_run_check(self): + pass + + def workflow_run_check(self): + pass + + def provenance_run_check(self): + pass + From d8d99038988f114c056e3cdd341b1d8c6f4e76ee Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 17:05:16 +0000 Subject: [PATCH 09/28] Avoid EXTRA --- shex/process-crate-0.1.shex | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/shex/process-crate-0.1.shex b/shex/process-crate-0.1.shex index 2eecd16..98ba49c 100644 --- a/shex/process-crate-0.1.shex +++ b/shex/process-crate-0.1.shex @@ -108,18 +108,24 @@ p:Dataset { # FIXME: Need a skeleton Root node for ^s:hasPart back-references # to avoid recursion? -p:Root { # EXTRA s:mentions dct:conformsTo s:hasPart s:about { +## NOTE: EXTRA should be enabled to allow extension of the profile, +# but may cause unneccessary recursions and confusing errors +# when something deeper does not match. + +p:Root { # EXTRA s:mentions dct:conformsTo s:hasPart s:about { ^s:about @p:MetadataFile; dct:conformsTo []; s:mentions @p:Process+; s:hasPart @p:File+; } +# https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#ro-crate-metadata-file-descriptor p:MetadataFile { a [s:CreativeWork]; dct:conformsTo @p:ROCrateSpec; } +# https://www.researchobject.org/ro-crate/1.1/appendix/relative-uris.html#establishing-absolute-uri-for-ro-crate-root p:ROCrateSpec IRI /^https:\/\/w3id.org\/ro\/crate\// { From 8fe5b379a5a60d5b2f86484cee4ffc164fed78b7 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 21:18:55 +0000 Subject: [PATCH 10/28] more validation of RDF --- src/runcrate/cli.py | 19 ++++++- src/runcrate/validator.py | 112 +++++++++++++++++++++++++++++++++++--- 2 files changed, 119 insertions(+), 12 deletions(-) diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py index 1ab18a6..e7b3ccc 100644 --- a/src/runcrate/cli.py +++ b/src/runcrate/cli.py @@ -13,6 +13,7 @@ # limitations under the License. import sys + from pathlib import Path import click @@ -73,14 +74,16 @@ def convert(root, output, license, workflow_name, readme): @click.option("-p", "--process-run", is_flag=True, help="Validate against the Process Run Crate profile") @click.option("-w", "--workflow-run", is_flag=True, help="Validate against the Workflow Run Crate profile") @click.option("-P", "--provenance-run", is_flag=True, help="Validate against the Provenance Run Crate profile") +@click.option("-d", "--debug", is_flag=True, help="Enable debug output") + @click.argument( "crate", metavar="CRATE", type=click.Path(exists=True, file_okay=False, readable=True, path_type=Path), ) -def validate(crate, skip_ro_crate_check, process_run, workflow_run, provenance_run): - """Validate a Process/Workflow/Provenance Run Crate +def validate(crate, skip_ro_crate_check, process_run, workflow_run, provenance_run, debug): + """Validate a Process/Workflow/Provenance Run Crate (experimental) CRATE: RO-Crate Root directory @@ -88,18 +91,28 @@ def validate(crate, skip_ro_crate_check, process_run, workflow_run, provenance_r the crate's profile(s) as indicated with conformsTo. """ validator = CrateValidator(crate) + if debug: + validator.debug = True if not skip_ro_crate_check: validator.ro_crate_check() + # TODO: Check output + + # Make sure Metadata File is readable and described + if not validator.metadata_file_check(): + return -2 + if not process_run and not workflow_run and not process_run: # Detect profile from conformsTo (process_run,workflow_run,provenance_run) = validator._detect_profiles() - print("Validating {}".format(crate)) if process_run: + print("Validating against Process Run profile") print(validator.process_run_check()) elif workflow_run: + print("Validating against Workflow Run profile") print(validator.workflow_run_check()) elif provenance_run: + print("Validating against Provenance Run profile") print(validator.provenance_run_check()) else: print("Could not detect profile, check \"conformsTo\" or force profile check (e.g. --workflow-run)", file=sys.stderr) diff --git a/src/runcrate/validator.py b/src/runcrate/validator.py index 4730e67..87cc18b 100644 --- a/src/runcrate/validator.py +++ b/src/runcrate/validator.py @@ -12,18 +12,43 @@ # See the License for the specific language governing permissions and # limitations under the License. +from pathlib import Path from pyshex.evaluate import evaluate -from rdflib import Graph, Namespace +from rdflib import Graph, Dataset, ConjunctiveGraph, Namespace, URIRef, DCTERMS from arcp import arcp_random from rocrate.rocrate import ROCrate import rocrateValidator.validate - +from importlib_resources import files +# FIXME: For Python 3.10++ below is sufficient +#from importlib.resources import files + +import sys + +SCHEMA = Namespace("http://schema.org/") + +class ValidationError(TypeError): + """A type of validation""" + +class RootEntityError(ValidationError): + """Can't find the root data entity""" + class CrateValidator(): def __init__(self, root): - self.root = root - self.base = arcp_random() - self.crate = ROCrate(root) + self.root = isinstance(root, Path) and root or Path(root) + self.base = URIRef(arcp_random()) + self.debug = False + # Caches + self._crate = None + self._graph = None + self._rootIRI = None + + @property + def crate(self): + if self._crate: + return self._crate + self._crate = ROCrate(self.root) + return self._crate def _detect_profiles(self): """Auto-detect Run Crate profile based on conformsTo @@ -38,6 +63,8 @@ def _detect_profiles(self): process_run, workflow_run, provenance_run = (None,)*3 for p in profiles: + if self.debug: + print("Detected profile {}".format(p.id)) if p.id.startswith("https://w3id.org/ro/wfrun/process/"): process_run = p if p.id.startswith("https://w3id.org/ro/wfrun/workflow/"): @@ -48,14 +75,81 @@ def _detect_profiles(self): return (process_run, workflow_run, provenance_run) def ro_crate_check(self): + print("Validating RO-Crate {}".format(self.root)) v = rocrateValidator.validate.validate(self.root) - # FIXME upstream: This does an ugly JSON print instead of returning - # the validation result - v.validator() + # FIXME: Avoid extracted code below from v.validator() + # to get programmatic access to results + for method in v.functions: + result = method(v.tar_file, v.extension) + if result.code == 0: + print(" OK {}".format(result.NAME)) + else: + print("ERROR {}:\n {}".format(result.NAME, result.message)) + self.metadata_file_check() + + def metadata_file_check(self): + try: + return str(self.rootIRI) + except IOError as e: + print(str(e), file=sys.stderr) + return False + except RootEntityError: + print("Can't find Metadata File Descriptor, see https://www.researchobject.org/ro-crate/1.1/root-data-entity.html") + return False + + @property + def metadataFile(self) -> Path: + p = self.root / "ro-crate-metadata.jsonld" + if not p.is_file(): + raise IOError("Can't find RO-Crate Metadata file {}".format(p)) + return p + + @property + def graph(self): + if self._graph: + return self._graph + g = Graph() # assuming default graph + metadataIRI = str(self.base) + "ro-crate-metadata.json" + if self.debug: + print("Parsing RDF " + metadataIRI) + g.parse(self.metadataFile, format="application/ld+json", base=metadataIRI) + self._graph = g + if self.debug: + print("Parsed {} triples, {} subjects".format(len(g), len(set(g.subjects())))) + return self._graph + + @property + def rootIRI(self): + """Identify IRI of the RO-Crate Root entity""" + if self._rootIRI: + return self._rootIRI + # Algorithm for finding root data entity + # https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#finding-the-root-data-entity + for metadataFile,_,conformsTo in self.graph.triples((None, DCTERMS.conformsTo, None)): + if conformsTo.startswith("https://w3id.org/ro/crate/"): + if (self.debug): + print("Checking Metadata File Descriptor {}".format(metadataFile)) + print("RO-Crate specification {}".format(conformsTo)) + roots = set(self.graph.objects(metadataFile, SCHEMA.about)) + for r in roots: + self._rootIRI = r + if (self.debug): + print("Identified RO-Crate root: {}".format(r)) + return self._rootIRI + # Still here? Bad luck.. + raise RootEntityError("Can't find RO-Crate root") + + def _load_shex(self, profile): + def process_run_check(self): + print(self.rootIRI) + + + + def workflow_check(self): pass - + def workflow_run_check(self): pass From 40508d88a29ed58cbd4fcaec9b5626d838106ab3 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 21:19:36 +0000 Subject: [PATCH 11/28] Making shex into package info --- MANIFEST.in | 1 + setup.cfg | 2 ++ {shex => src/runcrate/shex/runcrate}/process-crate-0.1.shex | 0 3 files changed, 3 insertions(+) rename {shex => src/runcrate/shex/runcrate}/process-crate-0.1.shex (100%) diff --git a/MANIFEST.in b/MANIFEST.in index b07a44c..eef6c3a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,7 @@ graft docs graft src +include *.shex include *.rst include LICENSE include requirements.txt diff --git a/setup.cfg b/setup.cfg index 06b9bfd..bd574f8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,8 +40,10 @@ install_requires= rdflib~=6.2 PyShEx~=0.8.1 arcp~=0.2.1 + rdflib_shim>=1.0.3 rocrateValidator==0.2.15 pytest # FIXME: strangely needed by rocrateValidator + importlib_resources~=5.12 [options.entry_points] console_scripts= diff --git a/shex/process-crate-0.1.shex b/src/runcrate/shex/runcrate/process-crate-0.1.shex similarity index 100% rename from shex/process-crate-0.1.shex rename to src/runcrate/shex/runcrate/process-crate-0.1.shex From 7d92a6af145631f91cbf7fc189a70cbc690a0904 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 21:36:11 +0000 Subject: [PATCH 12/28] now validating! --- src/runcrate/cli.py | 6 +++--- .../{runcrate => }/process-crate-0.1.shex | 0 src/runcrate/validator.py | 21 +++++++++++++------ 3 files changed, 18 insertions(+), 9 deletions(-) rename src/runcrate/shex/{runcrate => }/process-crate-0.1.shex (100%) diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py index e7b3ccc..e0ae0be 100644 --- a/src/runcrate/cli.py +++ b/src/runcrate/cli.py @@ -107,13 +107,13 @@ def validate(crate, skip_ro_crate_check, process_run, workflow_run, provenance_r if process_run: print("Validating against Process Run profile") - print(validator.process_run_check()) + validator.process_run_check() elif workflow_run: print("Validating against Workflow Run profile") - print(validator.workflow_run_check()) + validator.workflow_run_check() elif provenance_run: print("Validating against Provenance Run profile") - print(validator.provenance_run_check()) + validator.provenance_run_check() else: print("Could not detect profile, check \"conformsTo\" or force profile check (e.g. --workflow-run)", file=sys.stderr) return -1 diff --git a/src/runcrate/shex/runcrate/process-crate-0.1.shex b/src/runcrate/shex/process-crate-0.1.shex similarity index 100% rename from src/runcrate/shex/runcrate/process-crate-0.1.shex rename to src/runcrate/shex/process-crate-0.1.shex diff --git a/src/runcrate/validator.py b/src/runcrate/validator.py index 87cc18b..4f90f6a 100644 --- a/src/runcrate/validator.py +++ b/src/runcrate/validator.py @@ -20,7 +20,7 @@ import rocrateValidator.validate from importlib_resources import files -# FIXME: For Python 3.10++ below is sufficient +# FIXME: For Python 3.10++, below code is sufficient: #from importlib.resources import files import sys @@ -99,7 +99,7 @@ def metadata_file_check(self): @property def metadataFile(self) -> Path: - p = self.root / "ro-crate-metadata.jsonld" + p = self.root / "ro-crate-metadata.json" if not p.is_file(): raise IOError("Can't find RO-Crate Metadata file {}".format(p)) return p @@ -140,12 +140,21 @@ def rootIRI(self): raise RootEntityError("Can't find RO-Crate root") def _load_shex(self, profile): - + return files('runcrate.shex').joinpath(profile).read_text() + + def _validate_shex(self, profile): + shex = self._load_shex(profile) + rslt, reason = evaluate(self.graph, shex, self.rootIRI) + if rslt: + print("CONFORMS") + else: + print("DOES NOT CONFORM") + if reason: + print(reason) + return rslt def process_run_check(self): - print(self.rootIRI) - - + self._validate_shex("process-crate-0.1.shex") def workflow_check(self): pass From 631ad12164173bf8b05f4f25731c46956a0e8935 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 21:36:22 +0000 Subject: [PATCH 13/28] ignore stuff from pip install --editable . --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 2ca8682..c2d113c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,6 @@ _site/ .sass-cache/ .jekyll-cache/ .jekyll-metadata +__pycache__ +build +*egg-info From f293e0d7afb95d487b158b98e68822e021f8c833 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 22:24:12 +0000 Subject: [PATCH 14/28] attempt to shape workflow crate profile --- src/runcrate/cli.py | 22 +++--- src/runcrate/shex/process-crate-0.1.shex | 22 ++++-- src/runcrate/shex/workflow-crate-1.0.shex | 84 +++++++++++++++++++++++ src/runcrate/validator.py | 11 +-- 4 files changed, 122 insertions(+), 17 deletions(-) create mode 100644 src/runcrate/shex/workflow-crate-1.0.shex diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py index e0ae0be..4d77294 100644 --- a/src/runcrate/cli.py +++ b/src/runcrate/cli.py @@ -74,6 +74,7 @@ def convert(root, output, license, workflow_name, readme): @click.option("-p", "--process-run", is_flag=True, help="Validate against the Process Run Crate profile") @click.option("-w", "--workflow-run", is_flag=True, help="Validate against the Workflow Run Crate profile") @click.option("-P", "--provenance-run", is_flag=True, help="Validate against the Provenance Run Crate profile") +@click.option("-W", "--workflow", is_flag=True, help="Validate against the Workflow RO-Crate profile") @click.option("-d", "--debug", is_flag=True, help="Enable debug output") @click.argument( @@ -82,7 +83,7 @@ def convert(root, output, license, workflow_name, readme): type=click.Path(exists=True, file_okay=False, readable=True, path_type=Path), ) -def validate(crate, skip_ro_crate_check, process_run, workflow_run, provenance_run, debug): +def validate(crate, skip_ro_crate_check, workflow, process_run, workflow_run, provenance_run, debug): """Validate a Process/Workflow/Provenance Run Crate (experimental) CRATE: RO-Crate Root directory @@ -101,22 +102,27 @@ def validate(crate, skip_ro_crate_check, process_run, workflow_run, provenance_r if not validator.metadata_file_check(): return -2 - if not process_run and not workflow_run and not process_run: + if not workflow and not process_run and not workflow_run and not process_run: # Detect profile from conformsTo - (process_run,workflow_run,provenance_run) = validator._detect_profiles() + (workflow,process_run,workflow_run,provenance_run) = validator._detect_profiles() + if not workflow and not process_run and not workflow_run and not process_run: + print("Could not detect profile, check \"conformsTo\" or force profile check (e.g. --workflow-run)", file=sys.stderr) + return -1 + + if workflow: + print("Validating against Workflow profile") + validator.workflow_check() if process_run: print("Validating against Process Run profile") validator.process_run_check() - elif workflow_run: + if workflow_run: print("Validating against Workflow Run profile") validator.workflow_run_check() - elif provenance_run: + if provenance_run: print("Validating against Provenance Run profile") validator.provenance_run_check() - else: - print("Could not detect profile, check \"conformsTo\" or force profile check (e.g. --workflow-run)", file=sys.stderr) - return -1 + return if __name__ == '__main__': cli() diff --git a/src/runcrate/shex/process-crate-0.1.shex b/src/runcrate/shex/process-crate-0.1.shex index 98ba49c..1621ef9 100644 --- a/src/runcrate/shex/process-crate-0.1.shex +++ b/src/runcrate/shex/process-crate-0.1.shex @@ -1,7 +1,21 @@ -## Test with -# shexeval -fn "arcp://uuid,1482c1de-4383-4e9e-8cca-ddd922bdf8ac/" tests/data/profile-examples/process-ex1/ro-crate-metadata.ttl shex/process-crate-0.1.shex - -PREFIX p: +# Copyright 2023 The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## ShEx shape for https://w3id.org/ro/wfrun/process/0.1 + +## FIXME: Reflect back in namespace +PREFIX p: PREFIX xsd: PREFIX s: PREFIX foaf: diff --git a/src/runcrate/shex/workflow-crate-1.0.shex b/src/runcrate/shex/workflow-crate-1.0.shex new file mode 100644 index 0000000..25bb6c2 --- /dev/null +++ b/src/runcrate/shex/workflow-crate-1.0.shex @@ -0,0 +1,84 @@ +# Copyright 2023 The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## ShEX shape for https://w3id.org/workflowhub/workflow-ro-crate/1.0 + +## FIXME: Reflect back in namespace +PREFIX p: +PREFIX xsd: +PREFIX s: +PREFIX foaf: +PREFIX dct: +PREFIX bioschemas: + +start = @p:Root + +p:Root EXTRA s:hasPart { + #^s:about @p:MetadataFile; + s:mainEntity @p:MainWorkflow; + ( + s:licence IRI | + s:licence xsd:string + ) +} + + +# https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#ro-crate-metadata-file-descriptor +p:MetadataFile { + a [s:CreativeWork]; + # FIXME: Update Workflow RO-Crate profile to move conformsTo to root + dct:conformsTo []; + dct:conformsTo [] +} + +p:MainWorkflow { + a [s:MediaObject]; + a [s:SoftwareSourceCode]; + a [bioschemas:ComputationalWorkflow]; + # Must be data entity in crate + ( ^s:hasPart @p:Dataset | + ^s:hasPart @p:Root; + ); + s:programmingLanguage p:ComputerLanguage; + s:subjectOf @p:CWLDescription?; + s:image @p:WorkflowDiagram?; +} + +# An aggregated (potentially nested) dataset +p:Dataset { + a [s:Dataset]; + s:name xsd:string; + ( ^s:hasPart @p:Dataset | + ^s:hasPart @p:Root; + ); +} + +p:ComputerLanguage { + a [s:ComputerLanguage]; + # TODO: Name not strictly required by profile? + s:name xsd:string; +} + +p:CWLDescription { + a [s:MediaObject]; + a [s:SoftwareSourceCode]; + a [s:HowTo]; + s:programmingLanguage []; +} + + +p:WorkflowDiagram { + a [s:MediaObject]; + a [s:ImageObject]; +} diff --git a/src/runcrate/validator.py b/src/runcrate/validator.py index 4f90f6a..be74a71 100644 --- a/src/runcrate/validator.py +++ b/src/runcrate/validator.py @@ -53,7 +53,7 @@ def crate(self): def _detect_profiles(self): """Auto-detect Run Crate profile based on conformsTo - Return a tuple (process_run, workflow_run, provenance_run) with + Return a tuple (workflow, process_run, workflow_run, provenance_run) with the corresponding detected profiles URIs. Profiles not detected are represented as ``None`` in the tuple. """ @@ -61,18 +61,19 @@ def _detect_profiles(self): if not isinstance(profiles, list): profiles = [profiles] - process_run, workflow_run, provenance_run = (None,)*3 + workflow, process_run, workflow_run, provenance_run = (None,)*4 for p in profiles: if self.debug: print("Detected profile {}".format(p.id)) + if p.id.startswith("https://w3id.org/workflowhub/workflow-ro-crate/"): + workflow = p if p.id.startswith("https://w3id.org/ro/wfrun/process/"): process_run = p if p.id.startswith("https://w3id.org/ro/wfrun/workflow/"): workflow_run = p if p.id.startswith("https://w3id.org/ro/wfrun/provenance/"): provenance_run = p - # TODO: Also check for Workflow profile stand-alone - return (process_run, workflow_run, provenance_run) + return (workflow, process_run, workflow_run, provenance_run) def ro_crate_check(self): print("Validating RO-Crate {}".format(self.root)) @@ -157,7 +158,7 @@ def process_run_check(self): self._validate_shex("process-crate-0.1.shex") def workflow_check(self): - pass + self._validate_shex("workflow-crate-1.0.shex") def workflow_run_check(self): pass From 4176e6b443d6663433f22e41a8c09c23c29757be Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 22:30:58 +0000 Subject: [PATCH 15/28] programmingLanguage does not seem to require contextual entity --- src/runcrate/shex/workflow-crate-1.0.shex | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/runcrate/shex/workflow-crate-1.0.shex b/src/runcrate/shex/workflow-crate-1.0.shex index 25bb6c2..c3c12c9 100644 --- a/src/runcrate/shex/workflow-crate-1.0.shex +++ b/src/runcrate/shex/workflow-crate-1.0.shex @@ -24,7 +24,8 @@ PREFIX bioschemas: start = @p:Root -p:Root EXTRA s:hasPart { +p:Root { + # Fails as WorkflowHub also adds ro-crate-preview.html with s:about #^s:about @p:MetadataFile; s:mainEntity @p:MainWorkflow; ( @@ -50,7 +51,9 @@ p:MainWorkflow { ( ^s:hasPart @p:Dataset | ^s:hasPart @p:Root; ); - s:programmingLanguage p:ComputerLanguage; + # FIXME: Profile does not require the contextual entity? + #s:programmingLanguage @p:ComputerLanguage; + s:programmingLanguage IRI; s:subjectOf @p:CWLDescription?; s:image @p:WorkflowDiagram?; } @@ -64,11 +67,6 @@ p:Dataset { ); } -p:ComputerLanguage { - a [s:ComputerLanguage]; - # TODO: Name not strictly required by profile? - s:name xsd:string; -} p:CWLDescription { a [s:MediaObject]; @@ -77,7 +75,6 @@ p:CWLDescription { s:programmingLanguage []; } - p:WorkflowDiagram { a [s:MediaObject]; a [s:ImageObject]; From 52f5772a0fb0c4734d58327bf45f71767e3d4d77 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 7 Mar 2023 22:44:18 +0000 Subject: [PATCH 16/28] workflow-run-crate draft --- src/runcrate/cli.py | 3 +- src/runcrate/shex/workflow-run-crate-0.1.shex | 77 +++++++++++++++++++ src/runcrate/validator.py | 7 +- 3 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 src/runcrate/shex/workflow-run-crate-0.1.shex diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py index 4d77294..df6ecf2 100644 --- a/src/runcrate/cli.py +++ b/src/runcrate/cli.py @@ -102,7 +102,8 @@ def validate(crate, skip_ro_crate_check, workflow, process_run, workflow_run, pr if not validator.metadata_file_check(): return -2 - if not workflow and not process_run and not workflow_run and not process_run: + guess_profile = not workflow and not process_run and not workflow_run and not process_run + if guess_profile: # Detect profile from conformsTo (workflow,process_run,workflow_run,provenance_run) = validator._detect_profiles() diff --git a/src/runcrate/shex/workflow-run-crate-0.1.shex b/src/runcrate/shex/workflow-run-crate-0.1.shex new file mode 100644 index 0000000..3b825dd --- /dev/null +++ b/src/runcrate/shex/workflow-run-crate-0.1.shex @@ -0,0 +1,77 @@ +# Copyright 2023 The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## ShEX shape for https://w3id.org/ro/wfrun/workflow/0.1 + +## TODO: +# This is work in progress and does not quite validate yet + +## FIXME: Reflect back in namespace +PREFIX p: +PREFIX xsd: +PREFIX s: +PREFIX foaf: +PREFIX dct: +PREFIX bioschemas: + +start = @p:Root + +p:Root { + s:mainEntity @p:MainWorkflow; + dct:conformsTo []; + dct:conformsTo []; + dct:conformsTo []; + s:mentions @p:Process+; +} + +p:Process { + s:instrument @p:MainWorkflow; +} + +p:MainWorkflow { + a [bioschemas:ComputationalWorkflow]; + # Must be data entity in crate + ( ^s:hasPart @p:Dataset | + ^s:hasPart @p:Root; + ); + bioschemas:input p:FormalParameter*; + bioschemas:output p:FormalParameter*; +} + +p:FormalParameter { + a [ bioschemas:FormalParameter ] + ## TODO: link with workExample etc. +} + +# An aggregated (potentially nested) dataset +p:Dataset { + a [s:Dataset]; + s:name xsd:string; + ( ^s:hasPart @p:Dataset | + ^s:hasPart @p:Root; + ); +} + + +p:CWLDescription { + a [s:MediaObject]; + a [s:SoftwareSourceCode]; + a [s:HowTo]; + s:programmingLanguage []; +} + +p:WorkflowDiagram { + a [s:MediaObject]; + a [s:ImageObject]; +} diff --git a/src/runcrate/validator.py b/src/runcrate/validator.py index be74a71..3e617b8 100644 --- a/src/runcrate/validator.py +++ b/src/runcrate/validator.py @@ -159,10 +159,13 @@ def process_run_check(self): def workflow_check(self): self._validate_shex("workflow-crate-1.0.shex") + # TODO: Add Bioschemas profile def workflow_run_check(self): - pass + # TODO: Finish + self._validate_shex("workflow-run-crate-0.1.shex") def provenance_run_check(self): - pass + # TODO: Implement + self._validate_shex("provenance-crate-0.1.shex") From 3940a93c8839e653b0cb466f2966adb4c57e8850 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Thu, 9 Mar 2023 07:28:53 +0000 Subject: [PATCH 17/28] Draft of Bioschemas profile (untested) --- .../bioschemas-computationalworkflow-1.0.shex | 188 ++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex diff --git a/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex b/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex new file mode 100644 index 0000000..3734289 --- /dev/null +++ b/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex @@ -0,0 +1,188 @@ +# Copyright 2023 The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## ShEX shape for https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE and +# https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE +# following MUST requirements ("marginality: minimum"), with +# all SHOULD/COULD (recommended/optional) marked as optional (*/?) +# +# NOTE: As this shape is meant to be used with a workflow-crate, it assumes the +# workflow is mainEntity of an RO-Crate root, which is the start focus. That also +# means some contextual entities are assumed to be explicit with a @type and s:name +# rather than just a IRI. + +## FIXME: Update namespace +PREFIX p: +PREFIX xsd: +PREFIX s: +PREFIX foaf: +PREFIX dct: +PREFIX bioschemas: + +start = @p:Root + +p:Root { + s:mainEntity @p:MainWorkflow; +} +p:MainWorkflow { + ## Marginality: Minimum + + a [bioschemas:ComputationalWorkflow]; + dct:conformsTo [] + s:creator @sd:PersonOrOrg+; + # NOTE: We won't be picky about xsd type even if profile insists Date or DateTime + s:dateCreated Literal; + + # NOTE: interpret profile liberally, in/out can be 0 if no input/output, otherwise required + s:input @p:FormalParameter*; + s:output @p:FormalParameter*; + + ( s:licence LITERAL | + s:licence { + # Require contextual entity in RO-Crate + a [s:Creativework] + s:name LITERAL; + } + )+; + s:name LITERAL; + + ( s:programmingLanguage Literal | + s:programmingLanguage { + # Require contextual entity in RO-Crate + a [s:ComputerLanguage]; + s:name LITERAL; + } + ); + + s:sdPublisher @sd:PersonOrOrg; + ( # Depends on context + s:url IRI| + s:url xsd:string; + ); + + s:version LITERAL; + + ## Marginality: Recommended + + ( s:citation LITERAL | + s:citation { + # FIXME: Won't detect subtypes like s:ScholarlyArticle without rdfs inferencing + a [s:CreativeWork]; +# s:name xsd:string + }; + )*; + + s:contributor @sd:PersonOrOrg*; + + ( s:creativeWorkStatus LITERAL | + s:creativeWorkStatus IRI; + )?; + + ( s:documentation { + a [s:CreativeWork]; + } | + s:documentation IRI; + ); + + s:funding { + a [s:Grant]; + }*; + + s:hasPart { + # Extended types to include tools + ( a [s:CreativeWork] | + a [s:SoftwareApplication] | + a [s:SoftwareSourceCode] + ) ; + }*; + + ( + s:isBasedOn { a [s:CreativeWork] } | + s:isBasedOn { a [s:Product] } | + s:isBasedOn IRI + )?; + + s:keywords LITERAL?; + + s:maintainer @sd:PersonOrOrg*; + + s:producer @sd:PersonOrOrg*; + + s:publisher @sd:PersonOrOrg*; + + s:runtimePlatform LITERAL?; + ( + s:softwareRequirements LITERAL | + s:softwareRequirements IRI + )*; + + s:targetProduct { + a [s:SoftwareApplication]; + }*; + + ## Marginality: Optional + + s:alternateName Literal*; + s:conditionsOfAccess Literal?; + # FIXME: ISO8601 regex pattern without forcing xsd:dateTime/xsd:date/xsd:timestamp etc. declaration + s:dateModified LITERAL?; + s:datePublished LITERAL?; + + ( + s:encodingFormat LITERAL | + s:encodingFormat IRI + )?; + + $p:identifier ( + s:identifier { + a [s:PropertyValue]; + } | + s:identifier xsd:string; + s:identifier IRI; + )*; + + ( + s:image { + a [s:ImageObject]; + } | + s:image IRI; + )?; +} + +p:PersonOrOrg { + # As we're in RO-Crate, the contextual entity is required + (a [s:Person] | + a [s:Organization] + ) + s:name LITERAL; + } + +p:FormalParameter { + a [bioschemas:FormalParameter]; + dct:conformsTo [<>]; + s:name LITERAL; + s:additionalType IRI*; + s:description LITERAL?; + ( + s:encodingFormat LITERAL | + s:encodingFormat IRI; + )?; + ( + s:defaultValue Literal | + s:defaultValue IR + )?; + &identifier*; + s:valueRequired xsd:boolean?; +} + From 443ebb2fc8992e177132eb616ec55f1081babad1 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Thu, 9 Mar 2023 07:30:11 +0000 Subject: [PATCH 18/28] namespaces --- src/runcrate/shex/process-crate-0.1.shex | 4 ++-- src/runcrate/shex/workflow-crate-1.0.shex | 4 ++-- src/runcrate/shex/workflow-run-crate-0.1.shex | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/runcrate/shex/process-crate-0.1.shex b/src/runcrate/shex/process-crate-0.1.shex index 1621ef9..0efc422 100644 --- a/src/runcrate/shex/process-crate-0.1.shex +++ b/src/runcrate/shex/process-crate-0.1.shex @@ -14,8 +14,8 @@ ## ShEx shape for https://w3id.org/ro/wfrun/process/0.1 -## FIXME: Reflect back in namespace -PREFIX p: +## FIXME: Update namespace +PREFIX p: PREFIX xsd: PREFIX s: PREFIX foaf: diff --git a/src/runcrate/shex/workflow-crate-1.0.shex b/src/runcrate/shex/workflow-crate-1.0.shex index c3c12c9..c166e36 100644 --- a/src/runcrate/shex/workflow-crate-1.0.shex +++ b/src/runcrate/shex/workflow-crate-1.0.shex @@ -14,8 +14,8 @@ ## ShEX shape for https://w3id.org/workflowhub/workflow-ro-crate/1.0 -## FIXME: Reflect back in namespace -PREFIX p: +## FIXME: Update namespace +PREFIX p: PREFIX xsd: PREFIX s: PREFIX foaf: diff --git a/src/runcrate/shex/workflow-run-crate-0.1.shex b/src/runcrate/shex/workflow-run-crate-0.1.shex index 3b825dd..fa2dd14 100644 --- a/src/runcrate/shex/workflow-run-crate-0.1.shex +++ b/src/runcrate/shex/workflow-run-crate-0.1.shex @@ -17,8 +17,8 @@ ## TODO: # This is work in progress and does not quite validate yet -## FIXME: Reflect back in namespace -PREFIX p: +## FIXME: Update namespace +PREFIX p: PREFIX xsd: PREFIX s: PREFIX foaf: From 4cdd9bb820091f569ae3f72a76f3fbeb2012ffd5 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Sat, 18 Mar 2023 11:31:31 +0000 Subject: [PATCH 19/28] correct import --- src/runcrate/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py index 40718c9..f89cdfb 100644 --- a/src/runcrate/cli.py +++ b/src/runcrate/cli.py @@ -17,7 +17,6 @@ import click from . import __version__ -from . import ProvCrateBuilder from .validator import CrateValidator from .convert import ProvCrateBuilder from .report import dump_crate_actions From cad484219a168dd71798e305db8fe1b1a7f815ac Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Sat, 18 Mar 2023 11:31:38 +0000 Subject: [PATCH 20/28] ignore tox output --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index c2d113c..385ecc9 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ _site/ __pycache__ build *egg-info +.coverage +coverage.xml +dist From de413fdd63704e689283f6d59cf2e11a0a1277b3 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Sat, 18 Mar 2023 12:05:53 +0000 Subject: [PATCH 21/28] broken start of provenance profile --- src/runcrate/shex/provenance-crate-0.1.shex | 65 +++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 src/runcrate/shex/provenance-crate-0.1.shex diff --git a/src/runcrate/shex/provenance-crate-0.1.shex b/src/runcrate/shex/provenance-crate-0.1.shex new file mode 100644 index 0000000..328468e --- /dev/null +++ b/src/runcrate/shex/provenance-crate-0.1.shex @@ -0,0 +1,65 @@ +# Copyright 2023 The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## ShEX shape for https://w3id.org/ro/wfrun/provenance/0.1 + +## TODO: +# This is work in progress and does not quite validate yet + +## FIXME: Update namespace +PREFIX p: +PREFIX xsd: +PREFIX s: +PREFIX foaf: +PREFIX dct: +PREFIX bioschemas: + +start = @p:Root + +p:Root { + s:mainEntity @p:MainWorkflow; + dct:conformsTo []; + dct:conformsTo []; + dct:conformsTo []; + dct:conformsTo []; + s:mentions @p:MainProcess+; +} + +p:MainProcess { + s:instrument @p:MainWorkflow; +} + +p:MainWorkflow { + a [s:MediaObject]; + a [s:SoftwareSourceCode]; + a [bioschemas:ComputationalWorkflow]; + a [s:HowTo]; + s:hasPart @p:SoftwareApplication+; + s:step +} + +p:SoftwareApplication EXTRA a { + ( a [s:SoftwareApplication] | + a [s:SoftwareSourceCode ] | + a [bioschema:ComputationalWorkflow] + ); + s:name xsd:string; + ( s:url xsd:string | + s:url IRI + ); + ( s:version xsd:string | + s:softwareVersion xsd:string + ); +} + From 51c92335d680a15a26f155608d51616b09d61093 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Thu, 30 Mar 2023 17:22:27 +0100 Subject: [PATCH 22/28] avoid recursion recursion --- .../shex/bioschemas-computationalworkflow-1.0.shex | 6 +++--- src/runcrate/shex/process-crate-0.1.shex | 14 +++++++------- src/runcrate/shex/provenance-crate-0.1.shex | 2 +- src/runcrate/shex/workflow-crate-1.0.shex | 13 +++++-------- src/runcrate/shex/workflow-run-crate-0.1.shex | 6 +++--- 5 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex b/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex index 3734289..9a7c472 100644 --- a/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex +++ b/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex @@ -35,7 +35,7 @@ start = @p:Root p:Root { s:mainEntity @p:MainWorkflow; } -p:MainWorkflow { +p:MainWorkflow extra a { ## Marginality: Minimum a [bioschemas:ComputationalWorkflow]; @@ -57,7 +57,7 @@ p:MainWorkflow { )+; s:name LITERAL; - ( s:programmingLanguage Literal | + ( s:programmingLanguage LITERAL | s:programmingLanguage { # Require contextual entity in RO-Crate a [s:ComputerLanguage]; @@ -160,7 +160,7 @@ p:MainWorkflow { )?; } -p:PersonOrOrg { +p:PersonOrOrg extra a { # As we're in RO-Crate, the contextual entity is required (a [s:Person] | a [s:Organization] diff --git a/src/runcrate/shex/process-crate-0.1.shex b/src/runcrate/shex/process-crate-0.1.shex index 0efc422..637944d 100644 --- a/src/runcrate/shex/process-crate-0.1.shex +++ b/src/runcrate/shex/process-crate-0.1.shex @@ -83,8 +83,8 @@ p:PropertyValue { p:File EXTRA a { a [s:MediaObject]; # Either part of another dataset or the root - ( ^s:hasPart @p:Dataset | - ^s:hasPart @p:Root; + ( ^s:hasPart @p:Root | + ^s:hasPart @p:Dataset ; ); s:alternateName xsd:string?; ( @@ -114,8 +114,8 @@ p:Collection { p:Dataset { a [s:Dataset]; s:name xsd:string; - ( ^s:hasPart @p:Dataset | - ^s:hasPart @p:Root; + ( ^s:hasPart @p:Root | + ^s:hasPart @p:Dataset ); } @@ -126,15 +126,15 @@ p:Dataset { # but may cause unneccessary recursions and confusing errors # when something deeper does not match. -p:Root { # EXTRA s:mentions dct:conformsTo s:hasPart s:about { +p:Root EXTRA s:mentions { ^s:about @p:MetadataFile; dct:conformsTo []; + dct:conformsTo IRI*; s:mentions @p:Process+; - s:hasPart @p:File+; } # https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#ro-crate-metadata-file-descriptor -p:MetadataFile { +p:MetadataFile EXTRA dct:conformsTo { a [s:CreativeWork]; dct:conformsTo @p:ROCrateSpec; } diff --git a/src/runcrate/shex/provenance-crate-0.1.shex b/src/runcrate/shex/provenance-crate-0.1.shex index 328468e..d5c2526 100644 --- a/src/runcrate/shex/provenance-crate-0.1.shex +++ b/src/runcrate/shex/provenance-crate-0.1.shex @@ -27,7 +27,7 @@ PREFIX bioschemas: start = @p:Root -p:Root { +p:Root extra dct:conformsTo s:mentions { s:mainEntity @p:MainWorkflow; dct:conformsTo []; dct:conformsTo []; diff --git a/src/runcrate/shex/workflow-crate-1.0.shex b/src/runcrate/shex/workflow-crate-1.0.shex index c166e36..83e6541 100644 --- a/src/runcrate/shex/workflow-crate-1.0.shex +++ b/src/runcrate/shex/workflow-crate-1.0.shex @@ -36,20 +36,20 @@ p:Root { # https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#ro-crate-metadata-file-descriptor -p:MetadataFile { +p:MetadataFile EXTRA dct:conformsTo a { a [s:CreativeWork]; # FIXME: Update Workflow RO-Crate profile to move conformsTo to root dct:conformsTo []; dct:conformsTo [] } -p:MainWorkflow { +p:MainWorkflow extra a { a [s:MediaObject]; a [s:SoftwareSourceCode]; a [bioschemas:ComputationalWorkflow]; # Must be data entity in crate - ( ^s:hasPart @p:Dataset | - ^s:hasPart @p:Root; + ( ^s:hasPart @p:Root | + ^s:hasPart @p:Dataset; ); # FIXME: Profile does not require the contextual entity? #s:programmingLanguage @p:ComputerLanguage; @@ -59,12 +59,9 @@ p:MainWorkflow { } # An aggregated (potentially nested) dataset -p:Dataset { +p:Dataset extra a { a [s:Dataset]; s:name xsd:string; - ( ^s:hasPart @p:Dataset | - ^s:hasPart @p:Root; - ); } diff --git a/src/runcrate/shex/workflow-run-crate-0.1.shex b/src/runcrate/shex/workflow-run-crate-0.1.shex index fa2dd14..286dfd7 100644 --- a/src/runcrate/shex/workflow-run-crate-0.1.shex +++ b/src/runcrate/shex/workflow-run-crate-0.1.shex @@ -39,7 +39,7 @@ p:Process { s:instrument @p:MainWorkflow; } -p:MainWorkflow { +p:MainWorkflow extra a { a [bioschemas:ComputationalWorkflow]; # Must be data entity in crate ( ^s:hasPart @p:Dataset | @@ -55,7 +55,7 @@ p:FormalParameter { } # An aggregated (potentially nested) dataset -p:Dataset { +p:Dataset extra a { a [s:Dataset]; s:name xsd:string; ( ^s:hasPart @p:Dataset | @@ -64,7 +64,7 @@ p:Dataset { } -p:CWLDescription { +p:CWLDescription extra a { a [s:MediaObject]; a [s:SoftwareSourceCode]; a [s:HowTo]; From 9d0bf16204d253f14df80b57070452f6e0112441 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Thu, 30 Mar 2023 23:44:45 +0100 Subject: [PATCH 23/28] now hopefully valid shex --- .../bioschemas-computationalworkflow-1.0.shex | 38 +++++++++++-------- src/runcrate/shex/process-crate-0.1.shex | 2 +- src/runcrate/shex/provenance-crate-0.1.shex | 11 +++++- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex b/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex index 9a7c472..f336294 100644 --- a/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex +++ b/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex @@ -29,20 +29,21 @@ PREFIX s: PREFIX foaf: PREFIX dct: PREFIX bioschemas: +PREFIX rdf: start = @p:Root p:Root { s:mainEntity @p:MainWorkflow; } -p:MainWorkflow extra a { +p:MainWorkflow extra rdf:type { ## Marginality: Minimum a [bioschemas:ComputationalWorkflow]; - dct:conformsTo [] - s:creator @sd:PersonOrOrg+; + dct:conformsTo []; + s:creator @p:PersonOrOrg+; # NOTE: We won't be picky about xsd type even if profile insists Date or DateTime - s:dateCreated Literal; + s:dateCreated LITERAL; # NOTE: interpret profile liberally, in/out can be 0 if no input/output, otherwise required s:input @p:FormalParameter*; @@ -51,7 +52,7 @@ p:MainWorkflow extra a { ( s:licence LITERAL | s:licence { # Require contextual entity in RO-Crate - a [s:Creativework] + a [s:Creativework]; s:name LITERAL; } )+; @@ -65,7 +66,7 @@ p:MainWorkflow extra a { } ); - s:sdPublisher @sd:PersonOrOrg; + s:sdPublisher @p:PersonOrOrg; ( # Depends on context s:url IRI| s:url xsd:string; @@ -83,7 +84,7 @@ p:MainWorkflow extra a { }; )*; - s:contributor @sd:PersonOrOrg*; + s:contributor @p:PersonOrOrg*; ( s:creativeWorkStatus LITERAL | s:creativeWorkStatus IRI; @@ -115,11 +116,11 @@ p:MainWorkflow extra a { s:keywords LITERAL?; - s:maintainer @sd:PersonOrOrg*; + s:maintainer @p:PersonOrOrg*; - s:producer @sd:PersonOrOrg*; + s:producer @p:PersonOrOrg*; - s:publisher @sd:PersonOrOrg*; + s:publisher @p:PersonOrOrg*; s:runtimePlatform LITERAL?; ( @@ -160,17 +161,17 @@ p:MainWorkflow extra a { )?; } -p:PersonOrOrg extra a { +p:PersonOrOrg extra rdf:type { # As we're in RO-Crate, the contextual entity is required (a [s:Person] | a [s:Organization] - ) + ); s:name LITERAL; } p:FormalParameter { a [bioschemas:FormalParameter]; - dct:conformsTo [<>]; + dct:conformsTo []; s:name LITERAL; s:additionalType IRI*; s:description LITERAL?; @@ -180,9 +181,16 @@ p:FormalParameter { )?; ( s:defaultValue Literal | - s:defaultValue IR + s:defaultValue IRI; )?; - &identifier*; + #&identifier*; + ( + s:identifier { + a [s:PropertyValue]; + } | + s:identifier xsd:string; + s:identifier IRI; + )*; s:valueRequired xsd:boolean?; } diff --git a/src/runcrate/shex/process-crate-0.1.shex b/src/runcrate/shex/process-crate-0.1.shex index 637944d..3a442f8 100644 --- a/src/runcrate/shex/process-crate-0.1.shex +++ b/src/runcrate/shex/process-crate-0.1.shex @@ -63,7 +63,7 @@ p:Agent { p:SoftwareApplication EXTRA a { ( a [s:SoftwareApplication] | a [s:SoftwareSourceCode ] | - a [bioschema:ComputationalWorkflow] + a [bioschemas:ComputationalWorkflow] ); s:name xsd:string; ( s:url xsd:string | diff --git a/src/runcrate/shex/provenance-crate-0.1.shex b/src/runcrate/shex/provenance-crate-0.1.shex index d5c2526..fefcf5c 100644 --- a/src/runcrate/shex/provenance-crate-0.1.shex +++ b/src/runcrate/shex/provenance-crate-0.1.shex @@ -46,13 +46,20 @@ p:MainWorkflow { a [bioschemas:ComputationalWorkflow]; a [s:HowTo]; s:hasPart @p:SoftwareApplication+; - s:step + s:step @p:HowToStep; +} + +p:HowToStep { + a [s:HowToStep]; + s:workExample @p:SoftwareApplication+; + (s:position xsd:integer MinInclusive 0 | + s:position xsd:string /[1-9][0-9]*/); } p:SoftwareApplication EXTRA a { ( a [s:SoftwareApplication] | a [s:SoftwareSourceCode ] | - a [bioschema:ComputationalWorkflow] + a [bioschemas:ComputationalWorkflow] ); s:name xsd:string; ( s:url xsd:string | From 5e527cf55b2626bc350a4556f59889f6f06ea3ff Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Fri, 31 Mar 2023 00:59:18 +0100 Subject: [PATCH 24/28] example of using shacl severity in shex --- src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex b/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex index f336294..21dbe7d 100644 --- a/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex +++ b/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex @@ -30,11 +30,12 @@ PREFIX foaf: PREFIX dct: PREFIX bioschemas: PREFIX rdf: +PREFIX sh: start = @p:Root p:Root { - s:mainEntity @p:MainWorkflow; + s:mainEntity IRI @p:MainWorkflow; } p:MainWorkflow extra rdf:type { ## Marginality: Minimum @@ -82,7 +83,7 @@ p:MainWorkflow extra rdf:type { a [s:CreativeWork]; # s:name xsd:string }; - )*; + )* // sh:severity sh:Info // sh:message "It is RECOMMENDED to include a citation"; s:contributor @p:PersonOrOrg*; From c5eca5e6b4d0d810a7918d7928a18e1102a49747 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 4 Apr 2023 15:54:42 +0100 Subject: [PATCH 25/28] Avoid pytest import Relates to ResearchObject/ro-crate-validator-py#4 git link for now as awaiting pypi permission for new release of rocrateValidator --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index bd574f8..a175b9f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,8 +41,8 @@ install_requires= PyShEx~=0.8.1 arcp~=0.2.1 rdflib_shim>=1.0.3 - rocrateValidator==0.2.15 - pytest # FIXME: strangely needed by rocrateValidator +# rocrateValidator==0.2.16-RC1 + rocrateValidator @ git+ssh://git@github.com/example_org/ExampleRepo.git@0.2.16-RC1 importlib_resources~=5.12 [options.entry_points] From 183aec0dcfe27b4b843042cc376489ff670685bd Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 4 Apr 2023 16:16:37 +0100 Subject: [PATCH 26/28] Depend on v0.2.16-RC4 of runcrateValidator for now --- setup.cfg | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index a175b9f..839241d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,8 +41,7 @@ install_requires= PyShEx~=0.8.1 arcp~=0.2.1 rdflib_shim>=1.0.3 -# rocrateValidator==0.2.16-RC1 - rocrateValidator @ git+ssh://git@github.com/example_org/ExampleRepo.git@0.2.16-RC1 + rocrateValidator @ git+https://github.com/ResearchObject/ro-crate-validator-py.git@v0.2.16-RC4 importlib_resources~=5.12 [options.entry_points] From 2f0c2f0da8c1d89d6971ed4ea666444300b54fe0 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 4 Apr 2023 16:16:56 +0100 Subject: [PATCH 27/28] some bioschemas --- src/runcrate/cli.py | 10 +++++++--- src/runcrate/validator.py | 20 +++++++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py index df6ecf2..64f48fb 100644 --- a/src/runcrate/cli.py +++ b/src/runcrate/cli.py @@ -75,6 +75,7 @@ def convert(root, output, license, workflow_name, readme): @click.option("-w", "--workflow-run", is_flag=True, help="Validate against the Workflow Run Crate profile") @click.option("-P", "--provenance-run", is_flag=True, help="Validate against the Provenance Run Crate profile") @click.option("-W", "--workflow", is_flag=True, help="Validate against the Workflow RO-Crate profile") +@click.option("-b", "--bioschemas", is_flag=True, help="Validate against Bioschemas profiles (ComputationalWorkflow FormalParameter)") @click.option("-d", "--debug", is_flag=True, help="Enable debug output") @click.argument( @@ -83,7 +84,7 @@ def convert(root, output, license, workflow_name, readme): type=click.Path(exists=True, file_okay=False, readable=True, path_type=Path), ) -def validate(crate, skip_ro_crate_check, workflow, process_run, workflow_run, provenance_run, debug): +def validate(crate, skip_ro_crate_check, workflow, process_run, workflow_run, provenance_run, bioschemas, debug): """Validate a Process/Workflow/Provenance Run Crate (experimental) CRATE: RO-Crate Root directory @@ -102,10 +103,10 @@ def validate(crate, skip_ro_crate_check, workflow, process_run, workflow_run, pr if not validator.metadata_file_check(): return -2 - guess_profile = not workflow and not process_run and not workflow_run and not process_run + guess_profile = not workflow and not process_run and not workflow_run and not process_run and not bioschemas if guess_profile: # Detect profile from conformsTo - (workflow,process_run,workflow_run,provenance_run) = validator._detect_profiles() + (workflow,process_run,workflow_run,provenance_run,bioschemas) = validator._detect_profiles() if not workflow and not process_run and not workflow_run and not process_run: print("Could not detect profile, check \"conformsTo\" or force profile check (e.g. --workflow-run)", file=sys.stderr) @@ -114,6 +115,9 @@ def validate(crate, skip_ro_crate_check, workflow, process_run, workflow_run, pr if workflow: print("Validating against Workflow profile") validator.workflow_check() + if bioschemas: + print("Validating against Bioschemas ComputationalWorkflow profile") + validator.computationalworkflow_check() if process_run: print("Validating against Process Run profile") validator.process_run_check() diff --git a/src/runcrate/validator.py b/src/runcrate/validator.py index 3e617b8..d5467b2 100644 --- a/src/runcrate/validator.py +++ b/src/runcrate/validator.py @@ -61,11 +61,14 @@ def _detect_profiles(self): if not isinstance(profiles, list): profiles = [profiles] - workflow, process_run, workflow_run, provenance_run = (None,)*4 + workflow, process_run, workflow_run, provenance_run,bioschemas = (None,)*5 for p in profiles: + # Check profiles on root data set if self.debug: print("Detected profile {}".format(p.id)) if p.id.startswith("https://w3id.org/workflowhub/workflow-ro-crate/"): + # FIXME: Should we also detect this on legacy metadata file entity? + # (Note: wfrun/workflow profile require above on Dataset) workflow = p if p.id.startswith("https://w3id.org/ro/wfrun/process/"): process_run = p @@ -73,7 +76,16 @@ def _detect_profiles(self): workflow_run = p if p.id.startswith("https://w3id.org/ro/wfrun/provenance/"): provenance_run = p - return (workflow, process_run, workflow_run, provenance_run) + if self.crate.mainEntity: + # Detect ComputationalWorkflow profile on main entity + profiles = self.crate.mainEntity.get("conformsTo", []) + if not isinstance(profiles, list): + profiles = [profiles] + for p in profiles: + if p.id.startswith("https://bioschemas.org/profiles/ComputationalWorkflow/"): + bioschemas = p + + return (workflow, process_run, workflow_run, provenance_run, bioschemas) def ro_crate_check(self): print("Validating RO-Crate {}".format(self.root)) @@ -157,9 +169,11 @@ def _validate_shex(self, profile): def process_run_check(self): self._validate_shex("process-crate-0.1.shex") + def computationalworkflow_check(self): + self._validate_shex("bioschemas-computationalworkflow-1.0.shex") + def workflow_check(self): self._validate_shex("workflow-crate-1.0.shex") - # TODO: Add Bioschemas profile def workflow_run_check(self): # TODO: Finish From 8445dd2f409a2b82945eb9c37018a3cc29a16189 Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Tue, 23 Jan 2024 13:57:14 +0000 Subject: [PATCH 28/28] more shex --- src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex | 2 +- src/runcrate/shex/process-crate-0.1.shex | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex b/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex index 21dbe7d..d6e0caf 100644 --- a/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex +++ b/src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex @@ -83,7 +83,7 @@ p:MainWorkflow extra rdf:type { a [s:CreativeWork]; # s:name xsd:string }; - )* // sh:severity sh:Info // sh:message "It is RECOMMENDED to include a citation"; + )? // sh:severity sh:Info // sh:message "It is RECOMMENDED to include a citation"; s:contributor @p:PersonOrOrg*; diff --git a/src/runcrate/shex/process-crate-0.1.shex b/src/runcrate/shex/process-crate-0.1.shex index 3a442f8..5cac75c 100644 --- a/src/runcrate/shex/process-crate-0.1.shex +++ b/src/runcrate/shex/process-crate-0.1.shex @@ -21,6 +21,8 @@ PREFIX s: PREFIX foaf: PREFIX dct: PREFIX bioschemas: +PREFIX sh: + start = @p:Root @@ -80,8 +82,9 @@ p:PropertyValue { s:value Literal; } -p:File EXTRA a { +p:File { a [s:MediaObject]; + a IRI*; # Either part of another dataset or the root ( ^s:hasPart @p:Root | ^s:hasPart @p:Dataset ;