nf-core · GallVp · Jan 14, 2025 · Jan 8, 2025 · Jan 8, 2025 · Jan 13, 2025
diff --git a/.nf-core.yml b/.nf-core.yml
@@ -1,11 +1,11 @@
 nf_core_version: 3.1.1
 repository_type: pipeline
 template:
-  author: "S\xE9bastien Guizard (@sguizard)"
+  author: "Sébastien Guizard (@sguizard)"
   description: Genes and transcripts annotation with Isoseq using uLTRA and TAMA
   force: false
   is_nfcore: true
   name: isoseq
   org: nf-core
   outdir: .
-  version: 2.0.1dev
+  version: 2.1.0dev
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,26 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## v2.1.0 - [08/01/2025]
+
+### `Added`
+
+- Added an optional field `bam_type` to `schema_input` to allow simultaneous input of both Subreads and CCS sequences [#40](https://github.com/nf-core/isoseq/issues/40)
+- Updated nf-core template to version 3.1.1
+
+### `Fixed`
+
+- Update modules (bamtools/convert, custom/dumpsoftwareversions, gnu/sort, gstama/collapse/ gstama/merge, gstama/polyacleanup, gunzip, isoseq/refine, lima, minimap2/align, multiqc, pbccs, ultra/align, ultra/index)
+
+  | Tool    | Previous version | New version |
+  | ------- | ---------------- | ----------- |
+  | multiqc | 1.24.1           | 1.26        |
+  | lima    | 2.9.0            | 2.12.0      |
+
+### `Dependencies`
+
+### `Deprecated`
+
 ## v2.0.0 - Sapphire Duck [05/09/2024]
 
 New entrypoint option to skip isoseq pre-processing.

diff --git a/README.md b/README.md
@@ -39,7 +39,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
 
 ## Pipeline summary
 
-1. Generate CCS consensuses from raw isoseq subreads ([`PBCCS`](https://github.com/PacificBiosciences/ccs))
+1. Generate CCS consensuses from raw isoseq subreads (Optional, [`PBCCS`](https://github.com/PacificBiosciences/ccs))
 2. Remove primer sequences from consensuses ([`LIMA`](https://github.com/pacificbiosciences/barcoding/))
 3. Detect and remove chimeric reads ([`ISOSEQ3 REFINE`](https://github.com/PacificBiosciences/IsoSeq))
 4. Convert bam file into fasta file ([`BAMTOOLS CONVERT`](https://github.com/pezmaster31/bamtools))

diff --git a/assets/samplesheet_ccs.csv b/assets/samplesheet_ccs.csv
@@ -0,0 +1,2 @@
+sample,bam,bam_type
+alz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.bam,ccs
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -13,22 +13,40 @@
                 "errorMessage": "Sample name must be provided and cannot contain spaces",
                 "meta": ["id"]
             },
+            "bam_type": {
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "pattern": "^subreads|ccs$",
+                        "format": "file-path"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ],
+                "errorMessage": "BAM type should be either 'subreads' or 'ccs'. If missing, it is assumed to be 'subreads'",
+                "meta": ["bam_type"]
+            },
             "bam": {
                 "type": "string",
                 "pattern": "(^\\S+\\.bam$|^None$)",
-                "errorMessage": "Subreads BAM file must be provided, cannot contain spaces and must have extension '.bam' or being 'None' if the entrypoint is 'map'"
+                "errorMessage": "Subreads/CCS BAM file must be provided, cannot contain spaces and must have extension '.bam' or being 'None' if the entrypoint is 'map'"
             },
             "pbi": {
                 "type": "string",
                 "pattern": "(^\\S+\\.bam\\.pbi$|^None$)",
-                "errorMessage": "PacBio Index file for BAM subreads cannot contain spaces and must have extension '.bam.pbi' or being 'None' if the entrypoint is 'map'"
+                "errorMessage": "PacBio Index file for BAM subreads cannot contain spaces and must have extension '.bam.pbi' or being 'None' if the entrypoint is 'map' or if the BAM file is CCS"
             },
             "reads": {
                 "type": "string",
                 "pattern": "(^\\S+\\.fa\\.gz$|^None$)",
                 "errorMessage": "Long reads file cannot contain spaces and must have extension '.bam.pbi' or being 'None' if the entrypoint is 'isoseq'"
             }
         },
-        "required": ["sample"]
+        "required": ["sample"],
+        "dependentRequired": {
+            "bam_type": ["bam"]
+        }
     }
 }
diff --git a/conf/test_ccs.config b/conf/test_ccs.config
@@ -0,0 +1,37 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/isoseq -profile test_ccs,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+process {
+    resourceLimits = [
+        cpus: 4,
+        memory: '15.GB',
+        time: '1.h'
+    ]
+}
+
+params {
+    config_profile_name        = 'Test profile - CCS BAM'
+    config_profile_description = 'Minimal test dataset to check pipeline function'
+
+    // Input data
+    input           = "$projectDir" + '/assets/samplesheet_ccs.csv'
+    primers         = 'https://raw.githubusercontent.com/nf-core/test-datasets/isoseq/testdata/primers.fasta'
+    fasta           = 'https://raw.githubusercontent.com/nf-core/test-datasets/isoseq/reference/Homo_sapiens.GRCh38.dna.chromosome.19.fasta'
+    gtf             = 'https://raw.githubusercontent.com/nf-core/test-datasets/isoseq/reference/Homo_sapiens.GRCh38.104.chr.13_18_19.gtf'
+    rq              = 0.9
+    chunk           = 5
+    five_prime      = 100
+    splice_junction = 10
+    three_prime     = 100
+    capped          = true
+    aligner         = 'ultra'
+}
diff --git a/docs/usage.md b/docs/usage.md
@@ -12,6 +12,7 @@ It reads all samples from a samplesheet file and parallelizes computation for ea
 Depending on your on data, you might not need to run the isoseq preprocessing.
 This step can be skipped by setting the `--entrypoint` parameter to `map` and starting the analysis from the mapping step.
 By default, the entrypoint is set to `isoseq` and the full pipeline is run.
+The generation of CCS consensuses from raw isoseq subreads can be skipped by directly providing the CCS consensuses and setting the `bam_type` field to `ccs` in the samplesheet.
 
 ### Samplesheet input
 
@@ -24,12 +25,13 @@ Use `--input` parameter to specify its location.
 
 The samplesheet is a comma-separated file with 4 columns, and a header row as shown in the examples below.
 
-| Column   | Description                                                                                                                                                               |
-| -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `sample` | Custom sample name. Spaces in sample names are automatically converted to underscores (`_`).                                                                              |
-| `bam`    | Full path to isoseq subreads in `bam` format.                                                                                                                             |
-| `pbi`    | Full path to Pacbio index generated with [pbindex](https://github.com/pacificbiosciences/pbbam/). File's name must be compose of bam file name with the `.pbi` extension. |
-| `reads`  | Set of long reads to analyse in fasta format. The file must be gziped (.fa.gz).                                                                                           |
+| Column     | Description                                                                                                                                                               |
+| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `sample`   | Custom sample name. Spaces in sample names are automatically converted to underscores (`_`).                                                                              |
+| `bam_type` | Type of data in the BAM file: `subreads` or `ccs`.                                                                                                                        |
+| `bam`      | Full path to isoseq subreads in `bam` format.                                                                                                                             |
+| `pbi`      | Full path to Pacbio index generated with [pbindex](https://github.com/pacificbiosciences/pbbam/). File's name must be compose of bam file name with the `.pbi` extension. |
+| `reads`    | Set of long reads to analyse in fasta format. The file must be gziped (.fa.gz).                                                                                           |
 
 Starting from `pbccs` (`isoseq` entrypoint), the columns `sample`, `bam`, `pbi` are mandatory.
 The `reads` column must be set to `None`.
@@ -40,6 +42,16 @@ sample1,sample1.subreads.bam,sample1.subreads.bam.pbi,None
 sample2,sample2.subreads.bam,sample2.subreads.bam.pbi,None
 ```
 
+If CCS consensuses are available for a sample, `pbccs` can be skipped. Columns `sample`, `bam`, `bam_type` are mandatory in this case.
+The `reads` and `pbi` column must be set to `None`.
+
+```console
+sample,bam_type,bam,pbi,reads
+sample1,ccs,sample1.ccs.bam,None,None
+sample2,subreads,sample2.subreads.bam,sample2.subreads.bam.pbi,None
+sample3,subreads,sample3.subreads.bam,sample3.subreads.bam.pbi,None
+```
+
 If the `map` entrypoint is used, the `reads` column must be filled with a gzipped fasta file with long reads and `sample` must be set.
 The `bam` and `pbi` columns have to be set to `None`.
 

diff --git a/main.nf b/main.nf
diff --git a/modules.json b/modules.json
@@ -7,72 +7,72 @@
                 "nf-core": {
                     "bamtools/convert": {
                         "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
-                        "git_sha": "82024cf6325d2ee194e7f056d841ecad2f6856e9",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "gnu/sort": {
                         "branch": "master",
-                        "git_sha": "a3cc42943548378b726610f45bb5a79ab3f0b633",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "gstama/collapse": {
                         "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "gstama/merge": {
                         "branch": "master",
-                        "git_sha": "3528e946c9f19501d78cf0901d0b57e13e2f6860",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "gstama/polyacleanup": {
                         "branch": "master",
-                        "git_sha": "8684e27e1061f855822ecc77d5099bc08f858784",
+                        "git_sha": "0e9cb409c32d3ec4f0d3804588e4778971c09b7e",
                         "installed_by": ["modules"]
                     },
                     "gunzip": {
                         "branch": "master",
-                        "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d",
+                        "git_sha": "ce35ce92566b3328b405253543b9b2b4d4e5f4f7",
                         "installed_by": ["modules"]
                     },
                     "isoseq/refine": {
                         "branch": "master",
-                        "git_sha": "8da47884cbd6c7a8d849f8cf53340511ab00df51",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "lima": {
                         "branch": "master",
-                        "git_sha": "b20be35facfc5acdc1259f132ed79339d79e989f",
+                        "git_sha": "1c4249137bdcd4392317e34123c00b5049c58d45",
                         "installed_by": ["modules"]
                     },
                     "minimap2/align": {
                         "branch": "master",
-                        "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "multiqc": {
                         "branch": "master",
-                        "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d",
+                        "git_sha": "f80914f78fb7fa1c00b14cfeb29575ee12240d9c",
                         "installed_by": ["modules"]
                     },
                     "pbccs": {
                         "branch": "master",
-                        "git_sha": "8da47884cbd6c7a8d849f8cf53340511ab00df51",
+                        "git_sha": "0e9cb409c32d3ec4f0d3804588e4778971c09b7e",
                         "installed_by": ["modules"]
                     },
                     "ultra/align": {
                         "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "ultra/index": {
                         "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     }
                 }

diff --git a/modules/nf-core/bamtools/convert/environment.yml b/modules/nf-core/bamtools/convert/environment.yml
diff --git a/modules/nf-core/bamtools/convert/meta.yml b/modules/nf-core/bamtools/convert/meta.yml
diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		sample,bam,bam_type
		alz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.bam,ccs