Skip to content

Commit

Permalink
Use scala-csv
Browse files Browse the repository at this point in the history
* Use sbt 1.9.8

* Update dependencies

* Do not include module-info.class in assembly

* Set version to 3.8.0-SNAPSHOT

* Replace kantan.csv with scala-csv

* Update changelog

* Revert samtools to maintain JDK8 compatibility
  • Loading branch information
mtomko authored Jan 10, 2024
1 parent d692fac commit f1922bf
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 45 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Changelog

## 3.8.0
* Replace kantan.csv with scala-csv

## 3.7.2
* Parse barcodes from read IDs in demultiplexed mode

## 3.7.1
* Adjust handling of command-line arguments in demultiplexed FASTQ file case

## 3.7.0
* Support for processing demultiplexed FASTQ files

Expand Down
28 changes: 14 additions & 14 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,21 @@ lazy val versions = new {
val acyclic = "0.2.1"
val betterFiles = "3.9.2"
val betterMonadicFor = "0.3.1"
val catsEffect3 = "3.5.1"
val catsEffect3 = "3.5.2"
val cats = "2.10.0"
val commonsIo = "2.13.0"
val commonsText = "1.10.0"
val commonsIo = "2.15.1"
val commonsText = "1.11.0"
val commonsMath3 = "3.6.1"
val fastutil = "8.5.12"
val fs2 = "3.8.0"
val kantanCodecs = "0.5.3"
val kantanCsv = "0.7.0"
val fs2 = "3.9.3"
val log4s = "1.10.0"
val logback = "1.2.11"
val logback = "1.2.13"
val munit = "0.7.29"
val munitCatsEffect3 = "1.0.7"
val samTools = "3.0.5"
val scalaCheck = "1.17.0"
val scalaTest = "3.2.16"
val scalaCsv = "1.3.10"
val scalaTest = "3.2.17"
val scalaTestPlusScalaCheck = "3.2.2.0"
val scopt = "4.1.0"
val slf4j = "1.7.36"
Expand All @@ -45,12 +44,11 @@ lazy val libraries = new {
val fastutil = "it.unimi.dsi" % "fastutil" % versions.fastutil
val fs2Core = "co.fs2" %% "fs2-core" % versions.fs2
val fs2Io = "co.fs2" %% "fs2-io" % versions.fs2
val kantanCodecs = "com.nrinaudo" %% "kantan.codecs" % versions.kantanCodecs
val kantanCsv = "com.nrinaudo" %% "kantan.csv" % versions.kantanCsv
val log4s = "org.log4s" %% "log4s" % versions.log4s
val logbackCore = "ch.qos.logback" % "logback-core" % versions.logback
val logbackClassic = "ch.qos.logback" % "logback-classic" % versions.logback
val samtools = "com.github.samtools" % "htsjdk" % versions.samTools
val scalaCsv = "com.github.tototoshi" %% "scala-csv" % versions.scalaCsv
val scopt = "com.github.scopt" %% "scopt" % versions.scopt
val slf4j = "org.slf4j" % "slf4j-api" % versions.slf4j

Expand All @@ -71,12 +69,11 @@ lazy val dependencies =
libraries.commonsIo,
libraries.commonsMath3,
libraries.fastutil,
libraries.kantanCodecs,
libraries.kantanCsv,
libraries.log4s,
libraries.logbackCore % Runtime,
libraries.logbackClassic % Runtime,
libraries.samtools,
libraries.scalaCsv,
libraries.scopt,
libraries.slf4j,
libraries.betterFiles % Test,
Expand Down Expand Up @@ -107,8 +104,11 @@ lazy val headerSettings = List(
lazy val assemblySettings = List(
assembly / assemblyJarName := "../bin/poolq3.jar",
assembly / assemblyMergeStrategy := {
case "logback.xml" => MergeStrategy.first
case "logback-test.xml" => MergeStrategy.discard
case "logback.xml" => MergeStrategy.first
case "logback-test.xml" => MergeStrategy.discard
case PathList("module-info.class") => MergeStrategy.discard
case PathList("META-INF", "versions", xs @ _, "module-info.class") => MergeStrategy.discard
case "module-info.class" => MergeStrategy.first
case x =>
val old = (assembly / assemblyMergeStrategy).value
old(x)
Expand Down
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.9.6
sbt.version=1.9.8
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ import java.nio.file.Path

import scala.util.Using

import kantan.csv._
import kantan.csv.ops._
import com.github.tototoshi.csv._
import org.apache.commons.io.ByteOrderMark
import org.apache.commons.io.input.BOMInputStream
import org.broadinstitute.gpp.poolq3.reports.{GctDialect, PoolQ2Dialect, ReportsDialect}
Expand Down Expand Up @@ -77,34 +76,34 @@ object ReferenceData {
.setInclude(false)
.get()
val br = new BufferedReader(new InputStreamReader(in))
val delimiter = guessDelimiter(br)
val config =
CsvConfiguration(delimiter, quote, CsvConfiguration.QuotePolicy.WhenNeeded, CsvConfiguration.Header.None)
val guessedDelimiter = guessDelimiter(br)
implicit object CSVFormat extends DefaultCSVFormat {
override val delimiter = guessedDelimiter
override val quoteChar: Char = quote
}
skipHeader(br, LineRegex)
val reader = br.asCsvReader[List[String]](config)
val barcodes = reader.map {
case Right(xs) =>
xs match {
case barcodeRaw :: idRaw :: _ =>
// if the CSV parser leaves spaces, we should remove them
val barcode = barcodeRaw.trim()
val id = idRaw.trim()

// N.B. empty IDs are commonly used and must be supported; as long as the barcode is a non-empty, valid
// DNA string, we must accept the row. However, sometimes Excel leaves empty lines in exported CSV; as
// long as *both* the barcode and ID are empty, it's safe to just skip the row. For now we'll be paranoid
// and reject cases where the barcode is empty but the ID is non-empty
if (barcode.isEmpty && id.isEmpty) None
else if (isReferenceBarcode(barcode)) Some(ReferenceEntry(barcode, id))
else throw InvalidFileException(file, s"Invalid DNA barcode '$barcode' for ID '$id'")
case _ =>
throw InvalidFileException(
file,
s"Incorrect number of columns. At least 2 required, got: ${xs.length}: $xs"
)
}
case Left(value) => throw InvalidFileException(file, s"Unable to parse data ${value.getMessage}")
}.toList
val rows = CSVReader.open(br).all()
val barcodes = rows.map { case xs =>
xs match {
case barcodeRaw :: idRaw :: _ =>
// if the CSV parser leaves spaces, we should remove them
val barcode = barcodeRaw.trim()
val id = idRaw.trim()

// N.B. empty IDs are commonly used and must be supported; as long as the barcode is a non-empty, valid
// DNA string, we must accept the row. However, sometimes Excel leaves empty lines in exported CSV; as
// long as *both* the barcode and ID are empty, it's safe to just skip the row. For now we'll be paranoid
// and reject cases where the barcode is empty but the ID is non-empty
if (barcode.isEmpty && id.isEmpty) None
else if (isReferenceBarcode(barcode)) Some(ReferenceEntry(barcode, id))
else throw InvalidFileException(file, s"Invalid DNA barcode '$barcode' for ID '$id'")
case _ =>
throw InvalidFileException(
file,
s"Incorrect number of columns. At least 2 required, got: ${xs.length}: $xs"
)
}
}

if (barcodes.isEmpty) {
throw InvalidFileException(file, "Empty reference file")
Expand Down
2 changes: 1 addition & 1 deletion version.sbt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ThisBuild / version := "3.7.3-SNAPSHOT"
ThisBuild / version := "3.8.0-SNAPSHOT"

0 comments on commit f1922bf

Please sign in to comment.