Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: ShEx validation method #17

Draft
wants to merge 32 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
bcef301
a preliminary ShEx profile for Process Run Crate
stain Mar 7, 2023
1e7a79a
Example from https://w3id.org/ro/wfrun/process/0.1
stain Mar 7, 2023
495a6ac
PySheX dependencies
stain Mar 7, 2023
089adc2
test command
stain Mar 7, 2023
59fe1d5
process-crate-0.1
stain Mar 7, 2023
820cf1b
correct command line
stain Mar 7, 2023
0a443cf
newer PyShEx
stain Mar 7, 2023
4a0004e
basic RO-Crate validation
stain Mar 7, 2023
d8d9903
Avoid EXTRA
stain Mar 7, 2023
8fe5b37
more validation of RDF
stain Mar 7, 2023
40508d8
Making shex into package info
stain Mar 7, 2023
7d92a6a
now validating!
stain Mar 7, 2023
631ad12
ignore stuff from pip install --editable .
stain Mar 7, 2023
f293e0d
attempt to shape workflow crate profile
stain Mar 7, 2023
4176e6b
programmingLanguage does not seem to require contextual entity
stain Mar 7, 2023
52f5772
workflow-run-crate draft
stain Mar 7, 2023
3940a93
Draft of Bioschemas profile (untested)
stain Mar 9, 2023
443ebb2
namespaces
stain Mar 9, 2023
6a0e6a8
Merge branch 'main' into shex
stain Mar 18, 2023
4cdd9bb
correct import
stain Mar 18, 2023
cad4842
ignore tox output
stain Mar 18, 2023
de413fd
broken start of provenance profile
stain Mar 18, 2023
51c9233
avoid recursion recursion
stain Mar 30, 2023
9d0bf16
now hopefully valid shex
stain Mar 30, 2023
5e527cf
example of using shacl severity in shex
stain Mar 30, 2023
c5eca5e
Avoid pytest import
stain Apr 4, 2023
183aec0
Depend on v0.2.16-RC4 of runcrateValidator for now
stain Apr 4, 2023
2f0c2f0
some bioschemas
stain Apr 4, 2023
7b8dd3f
Merge branch 'shex' of github.com:ResearchObject/runcrate into shex
stain Apr 4, 2023
b7e70c7
Merge remote-tracking branch 'origin/main' into shex
stain Apr 4, 2023
8445dd2
more shex
stain Jan 23, 2024
670a653
Merge remote-tracking branch 'refs/remotes/origin/shex' into shex
stain Jan 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,9 @@ _site/
.sass-cache/
.jekyll-cache/
.jekyll-metadata
__pycache__
build
*egg-info
.coverage
coverage.xml
dist
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
graft docs
graft src

include *.shex
include *.rst
include LICENSE
include requirements.txt
Expand Down
8 changes: 7 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@ install_requires=
cwlprov==0.1.1
networkx==2.8
prov>=1.5.1
rocrate~=0.7
rocrate~=0.7
rdflib~=6.2
PyShEx~=0.8.1
arcp~=0.2.1
rdflib_shim>=1.0.3
rocrateValidator @ git+https://github.com/ResearchObject/ro-crate-validator-py.git@v0.2.16-RC4
importlib_resources~=5.12

[options.entry_points]
console_scripts=
Expand Down
63 changes: 60 additions & 3 deletions src/runcrate/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@

import sys
from pathlib import Path

import click

from . import __version__
from .validator import CrateValidator
from .convert import ProvCrateBuilder
from .report import dump_crate_actions

Expand Down Expand Up @@ -56,8 +56,7 @@ def cli():
help="path to a README file (should be README.md in Markdown format)",
)
def convert(root, output, license, workflow_name, readme):
"""\
Convert a CWLProv RO bundle into a Workflow Run RO-Crate.
"""Convert a CWLProv RO bundle into a Workflow Run RO-Crate.

RO_DIR: top-level directory of the CWLProv RO
"""
Expand All @@ -72,6 +71,64 @@ def convert(root, output, license, workflow_name, readme):


@cli.command()
@click.option("-s", "--skip-ro-crate-check", is_flag=True, help="Skip general RO-Crate validation")
@click.option("-p", "--process-run", is_flag=True, help="Validate against the Process Run Crate profile")
@click.option("-w", "--workflow-run", is_flag=True, help="Validate against the Workflow Run Crate profile")
@click.option("-P", "--provenance-run", is_flag=True, help="Validate against the Provenance Run Crate profile")
@click.option("-W", "--workflow", is_flag=True, help="Validate against the Workflow RO-Crate profile")
@click.option("-b", "--bioschemas", is_flag=True, help="Validate against Bioschemas profiles (ComputationalWorkflow FormalParameter)")
@click.option("-d", "--debug", is_flag=True, help="Enable debug output")

@click.argument(
"crate",
metavar="CRATE",
type=click.Path(exists=True, file_okay=False, readable=True, path_type=Path),
)

def validate(crate, skip_ro_crate_check, workflow, process_run, workflow_run, provenance_run, bioschemas, debug):
"""Validate a Process/Workflow/Provenance Run Crate (experimental)

CRATE: RO-Crate Root directory

Unless forced (e.g. --workflow-run), the validation will use
the crate's profile(s) as indicated with conformsTo.
"""
validator = CrateValidator(crate)
if debug:
validator.debug = True
if not skip_ro_crate_check:
validator.ro_crate_check()
# TODO: Check output

# Make sure Metadata File is readable and described
if not validator.metadata_file_check():
return -2

guess_profile = not workflow and not process_run and not workflow_run and not process_run and not bioschemas
if guess_profile:
# Detect profile from conformsTo
(workflow,process_run,workflow_run,provenance_run,bioschemas) = validator._detect_profiles()

if not workflow and not process_run and not workflow_run and not process_run:
print("Could not detect profile, check \"conformsTo\" or force profile check (e.g. --workflow-run)", file=sys.stderr)
return -1

if workflow:
print("Validating against Workflow profile")
validator.workflow_check()
if bioschemas:
print("Validating against Bioschemas ComputationalWorkflow profile")
validator.computationalworkflow_check()
if process_run:
print("Validating against Process Run profile")
validator.process_run_check()
if workflow_run:
print("Validating against Workflow Run profile")
validator.workflow_run_check()
if provenance_run:
print("Validating against Provenance Run profile")
validator.provenance_run_check()
return
@click.argument(
"crate",
metavar="RO_CRATE",
Expand Down
197 changes: 197 additions & 0 deletions src/runcrate/shex/bioschemas-computationalworkflow-1.0.shex
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
# Copyright 2023 The University of Manchester
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## ShEX shape for https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE and
# https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE
# following MUST requirements ("marginality: minimum"), with
# all SHOULD/COULD (recommended/optional) marked as optional (*/?)
#
# NOTE: As this shape is meant to be used with a workflow-crate, it assumes the
# workflow is mainEntity of an RO-Crate root, which is the start focus. That also
# means some contextual entities are assumed to be explicit with a @type and s:name
# rather than just a IRI.

## FIXME: Update namespace
PREFIX p: <https://bioschemas.example.org/profiles/ComputationalWorkflow/1.0-RELEASE#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX s: <http://schema.org/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX bioschemas: <https://bioschemas.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX sh: <http://www.w3.org/ns/shacl#>

start = @p:Root

p:Root {
s:mainEntity IRI @p:MainWorkflow;
}
p:MainWorkflow extra rdf:type {
## Marginality: Minimum

a [bioschemas:ComputationalWorkflow];
dct:conformsTo [<https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE>];
s:creator @p:PersonOrOrg+;
# NOTE: We won't be picky about xsd type even if profile insists Date or DateTime
s:dateCreated LITERAL;

# NOTE: interpret profile liberally, in/out can be 0 if no input/output, otherwise required
s:input @p:FormalParameter*;
s:output @p:FormalParameter*;

( s:licence LITERAL |
s:licence {
# Require contextual entity in RO-Crate
a [s:Creativework];
s:name LITERAL;
}
)+;
s:name LITERAL;

( s:programmingLanguage LITERAL |
s:programmingLanguage {
# Require contextual entity in RO-Crate
a [s:ComputerLanguage];
s:name LITERAL;
}
);

s:sdPublisher @p:PersonOrOrg;
( # Depends on context
s:url IRI|
s:url xsd:string;
);

s:version LITERAL;

## Marginality: Recommended

( s:citation LITERAL |
s:citation {
# FIXME: Won't detect subtypes like s:ScholarlyArticle without rdfs inferencing
a [s:CreativeWork];
# s:name xsd:string
};
)? // sh:severity sh:Info // sh:message "It is RECOMMENDED to include a citation";

s:contributor @p:PersonOrOrg*;

( s:creativeWorkStatus LITERAL |
s:creativeWorkStatus IRI;
)?;

( s:documentation {
a [s:CreativeWork];
} |
s:documentation IRI;
);

s:funding {
a [s:Grant];
}*;

s:hasPart {
# Extended types to include tools
( a [s:CreativeWork] |
a [s:SoftwareApplication] |
a [s:SoftwareSourceCode]
) ;
}*;

(
s:isBasedOn { a [s:CreativeWork] } |
s:isBasedOn { a [s:Product] } |
s:isBasedOn IRI
)?;

s:keywords LITERAL?;

s:maintainer @p:PersonOrOrg*;

s:producer @p:PersonOrOrg*;

s:publisher @p:PersonOrOrg*;

s:runtimePlatform LITERAL?;
(
s:softwareRequirements LITERAL |
s:softwareRequirements IRI
)*;

s:targetProduct {
a [s:SoftwareApplication];
}*;

## Marginality: Optional

s:alternateName Literal*;
s:conditionsOfAccess Literal?;
# FIXME: ISO8601 regex pattern without forcing xsd:dateTime/xsd:date/xsd:timestamp etc. declaration
s:dateModified LITERAL?;
s:datePublished LITERAL?;

(
s:encodingFormat LITERAL |
s:encodingFormat IRI
)?;

$p:identifier (
s:identifier {
a [s:PropertyValue];
} |
s:identifier xsd:string;
s:identifier IRI;
)*;

(
s:image {
a [s:ImageObject];
} |
s:image IRI;
)?;
}

p:PersonOrOrg extra rdf:type {
# As we're in RO-Crate, the contextual entity is required
(a [s:Person] |
a [s:Organization]
);
s:name LITERAL;
}

p:FormalParameter {
a [bioschemas:FormalParameter];
dct:conformsTo [<https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE>];
s:name LITERAL;
s:additionalType IRI*;
s:description LITERAL?;
(
s:encodingFormat LITERAL |
s:encodingFormat IRI;
)?;
(
s:defaultValue Literal |
s:defaultValue IRI;
)?;
#&identifier*;
(
s:identifier {
a [s:PropertyValue];
} |
s:identifier xsd:string;
s:identifier IRI;
)*;
s:valueRequired xsd:boolean?;
}

Loading