diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..1ad710f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,25 @@ +shinyproxy.log* +*.jar +*.ipynb_checkpoints +*.csv +.DS_Store +node_modules +tests/*.png +.idea +_site +_freeze +/.quarto/ +*.pyc +.task +site_libs +src/docs/*.html +src/*.html +src/notebooks/**/*.html +src/*-listing.json + +*.parquet +!**/example-data/**/*.parquet +*.sig +ko*.pathview.png +src/notebooks/R Examples/*.tsv +src/notebooks/R Examples/*.txt \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 9c77a32..b6d38df 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -72,12 +72,10 @@ jobs: username: ${{ secrets.QUAY_USERNAME }} password: ${{ secrets.QUAY_PASSWORD }} - - name: Push - run: | - docker tag quay.io/microbiome-informatics/emg-notebooks.dev:latest quay.io/microbiome-informatics/emg-notebooks.dev:${{ github.ref_name }} - docker push quay.io/microbiome-informatics/emg-notebooks.dev:${{ github.ref_name }} + - id: docker-tag + uses: yuya-takeyama/docker-tag-from-github-ref-action@v1 - - name: Push latest - if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) + - name: Push run: | - docker push quay.io/microbiome-informatics/emg-notebooks.dev:latest + docker tag quay.io/microbiome-informatics/emg-notebooks.dev:latest quay.io/microbiome-informatics/emg-notebooks.dev:${{ steps.docker-tag.outputs.tag }} + docker push quay.io/microbiome-informatics/emg-notebooks.dev:${{ steps.docker-tag.outputs.tag }} diff --git a/Taskfile.yml b/Taskfile.yml index 29ab74e..a8f03bc 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -53,7 +53,7 @@ tasks: NOT needed if you're just editing/adding notebooks with no additional dependencies. cmds: - - docker build -f docker/Dockerfile -t quay.io/microbiome-informatics/emg-notebooks.dev:latest . + - docker build --load -f docker/Dockerfile -t quay.io/microbiome-informatics/emg-notebooks.dev:latest . build-static-docker: summary: | @@ -61,7 +61,7 @@ tasks: The built image is tagged as `notebooks-static`. cmds: - - docker build -f docker/docs.Dockerfile -t notebooks-static . + - docker build --load -f docker/docs.Dockerfile -t notebooks-static . sources: - docker/docs.Dockerfile - docker/Dockerfile diff --git a/src/docs/about.md b/src/docs/about.md index 49e23d1..29fbdb4 100644 --- a/src/docs/about.md +++ b/src/docs/about.md @@ -8,6 +8,7 @@ author: date: last-modified citation: false description: Background to the MGnify service, our funding and citation info, and how to contact us. +order: 1 --- ## The MGnify service diff --git a/src/docs/additional-analyses.md b/src/docs/additional-analyses.md new file mode 100644 index 0000000..b0844b2 --- /dev/null +++ b/src/docs/additional-analyses.md @@ -0,0 +1,44 @@ +--- +title: Additional analyses +author: + - name: MGnify + url: https://www.ebi.ac.uk/metagenomics + affiliation: EMBL-EBI + affiliation-url: https://www.ebi.ac.uk +date: last-modified +citation: true +description: Metagenomic analyses created by workflows other than MGnify's standardised pipelines. +order: 5 +--- +## Overview + +The MGnify database and website include metagenome analyses generated by workflows other than [MGnify's standardised pipelines](analysis). +These additional analyses originate from workflows targetting specific subsets of data (for example, data from certain projects or biomes) as well as from tools that were released after the latest MGnify Pipeline version. + +## Implementation within MGnify + +MGnify currently supports additional analyses of [runs](glossary.md#run) and [assemblies](glossary.md#assembly), in the form of [GFF3](http://www.ensembl.org/info/website/upload/gff3.html) packaged into [RO-Crates](glossary.md#ro-crate) conforming to the [Workflow Run Crate profile](https://w3id.org/ro/wfrun/workflow/0.3). + +RO-Crates are based around a JSON-LD schema file, which in the case of a Workflow Run Crate describes a workflow (e.g. a pipeline), a set of inputs, and a set of outputs. +In the context of MGnify, these outputs consist of a GFF3 file and the inputs include a run or assembly that has already been analysed by MGnify's standard pipelines. + + +## Finding additional analyses +RO-Crates containing additional analyses can be found tabulated on the detail pages of the run or assembly they correspond to. +There are also [API](api.md) endpoints for these: `https://www.ebi.ac.uk/metagenomics/api/v1/runs//extra-annotations` and `https://www.ebi.ac.uk/metagenomics/api/v1/assemblies//extra-annotations`. + +RO-Crates in MGnify are packaged as ZIP files, which can be downloaded. +These crates can also be browsed directly on the MGnify website: the human-readable representation of the crate is displated. +Some crates include extra browsable information, like quality control reports. + +### Browsing additional assembly analyses in the contig viewer +Where an additional analysis targets an assembly, the annotation tracks provided by the additional analysis are shown alongde the standard [MGnify Assembly Analysis](analysis.md) of that assembly. +This enables users to browse these additional analyses in the context of existing functional annotations. +For example, biosynthetic gene clusters predicted by [SanntiS](https://github.com/Finn-Lab/SanntiS) are shown alongside protein annotations from the standard pipeline. + +![Example of an additional analysis of a metagenomics assembly](images/additional-analyses/extra-assembly-analyses-guide.png) +(1) = the contigs list for the assembly, where the opaque `[S]` flag indicated that the `[S]ANNTIS` crate has annotations for that contig. +(2) = the annotation attributes (from the GFF file) can be used to colourise and label the track: here a BGC ID is shown for each gene cluster prediction +(3) = the annotation in the track can be clicked to open the details view +(4) = the details view for the annotation +(5) = the "Browse the RO-Crate" link opens a report containing the metadata of the additional analyses, including provenance information like a link to the workflow source code. diff --git a/src/docs/analysis.md b/src/docs/analysis.md index 9467cf8..4a46a9b 100644 --- a/src/docs/analysis.md +++ b/src/docs/analysis.md @@ -8,6 +8,7 @@ author: date: last-modified citation: true description: Description of the latest MGnify analysis pipeline and the tools it uses. +order: 3 --- ## Overview diff --git a/src/docs/api.md b/src/docs/api.md index bb0eaea..f70c158 100644 --- a/src/docs/api.md +++ b/src/docs/api.md @@ -8,6 +8,7 @@ author: date: last-modified citation: true description: Programatically accessing MGnify data via the API (Application Programming Interface) +order: 7 --- With the rapid expansion in the number of datasets deposited in MGnify, it has become increasingly important to provide programmatic diff --git a/src/docs/dataflow.md b/src/docs/dataflow.md index 3e1acf4..7e48da9 100644 --- a/src/docs/dataflow.md +++ b/src/docs/dataflow.md @@ -8,6 +8,7 @@ author: date: last-modified citation: true description: How MGnify data moves from submission to analysis +order: 2 --- ![MGnify data flow from submission to [analysis results](glossary.md#analysis-result).](images/dataflow/submit_graph_08_web032.png){#fig-dataflow-process .tall-figure fig-align="left"} diff --git a/src/docs/faqs.md b/src/docs/faqs.md index 89a90ea..f4a5e28 100644 --- a/src/docs/faqs.md +++ b/src/docs/faqs.md @@ -8,6 +8,7 @@ author: date: last-modified citation: true description: Frequently asked questions about MGnify +order: 10 --- ## What kind of sequence data does the service accept? diff --git a/src/docs/genome-viewer.md b/src/docs/genome-viewer.md index 3b009f8..42a8320 100644 --- a/src/docs/genome-viewer.md +++ b/src/docs/genome-viewer.md @@ -8,6 +8,7 @@ author: date: last-modified citation: true description: Description of the MGnify genomes pipelines and web resource +order: 6 --- ## Genome Catalogues diff --git a/src/docs/glossary.md b/src/docs/glossary.md index bc87d89..ccabd33 100644 --- a/src/docs/glossary.md +++ b/src/docs/glossary.md @@ -8,6 +8,7 @@ author: date: last-modified citation: true description: Dictionary of terms used in MGnify and throughout this documentaiton +order: 11 --- ## 16S rRNA genes @@ -76,6 +77,9 @@ A software or script used during the individual step of an analysis pipeline. ## Predicted coding sequence (pCDS) Partial or complete gene sequence as predicted by the gene caller (FragGenScan for read submissions, Prodigal and FragGenScan for assembly submissions). +## RO-Crate (Research Object Crate) +A method and standard for describing the metadata associated with a set of research data. [RO-Crates](https://www.researchobject.org/ro-crate/) are implemented as a JSON-LD schema file, which can be packaged as a ZIP file optionally containing some or all of the data it describes, and optionally an HTML file to provide a human-readable view of the crate. + ## Run The sequence file obtained from performing an experiment (an experiment generally includes several steps such as filtration, metatranscriptomic extraction and Illumina MiSeq sequencing, for example) on all or part of a [sample](#sample). Several runs can therefore be generated from a single [sample](#sample). diff --git a/src/docs/images/additional-analyses/extra-assembly-analyses-guide.png b/src/docs/images/additional-analyses/extra-assembly-analyses-guide.png new file mode 100644 index 0000000..7e7e1d2 Binary files /dev/null and b/src/docs/images/additional-analyses/extra-assembly-analyses-guide.png differ diff --git a/src/docs/notebooks.md b/src/docs/notebooks.md index 753a1ae..cf7b2d4 100644 --- a/src/docs/notebooks.md +++ b/src/docs/notebooks.md @@ -8,6 +8,7 @@ author: date: last-modified citation: true description: Using MGnify’s Jupyter Notebooks to explore and access data programatically +order: 8 --- ## A Jupyter Lab environment for the MGnify API diff --git a/src/docs/portal.md b/src/docs/portal.md index ccc5c12..ac65145 100644 --- a/src/docs/portal.md +++ b/src/docs/portal.md @@ -8,6 +8,7 @@ author: date: last-modified citation: true description: User guide to the MGnify website +order: 4 --- ## Sections of the MGnify website @@ -220,6 +221,8 @@ This feature is available for assembly analysis only and can be found in the tab Interactive contig viewer for localised visualisation of functional annotation per contig. ::: +The Contig Viewer also displays tracks from any [additional analyses](additional-analyses) that may have been run on the same assembly. + ## Finding taxonomic information about runs on the MGnify website Taxonomic analysis of runs within projects on the [MGnify website](https://www.ebi.ac.uk/metagenomics/) can be accessed by selecting the ‘Taxonomic analysis’ tab found toward the top of any run page (see @fig-taxonomic-analysis below). diff --git a/src/docs/sequence-search.md b/src/docs/sequence-search.md index 23b088a..395c728 100644 --- a/src/docs/sequence-search.md +++ b/src/docs/sequence-search.md @@ -8,6 +8,7 @@ author: date: last-modified citation: true description: Guide to using MGnify's peptide sequence search service +order: 9 --- ## Landing page diff --git a/src/docs/tutorials.md b/src/docs/tutorials.md index 03596d0..c3be9f1 100644 --- a/src/docs/tutorials.md +++ b/src/docs/tutorials.md @@ -8,6 +8,7 @@ author: date: last-modified citation: true description: List of online tutorials relevant to MGnify and ENA +order: 12 --- ## MGnify and EMBL-EBI online tutorials