From f1196920034588b969e2578a4882ed71fb9f9982 Mon Sep 17 00:00:00 2001 From: Mark Tomko <610104+mtomko@users.noreply.github.com> Date: Thu, 22 Feb 2024 09:50:25 -0800 Subject: [PATCH] Port to Scala 3 (with newly released `scalafmt`) * Port to Scala 3 * Refactor reference package object * Convert package objects to toplevel * Set Scala 3 rewrite rules * Run scalafmt * Update copyright * Set version to 3.12.0-SNAPSHOT * Update changelog * Update GH actions * Workaround for scalafmt issue See https://github.com/scalameta/scalafmt/issues/3787 --- .github/workflows/ci.yml | 2 + .github/workflows/release.yml | 2 + .scalafix.conf | 2 +- .scalafmt.conf | 14 +- CHANGELOG.md | 3 + build.sbt | 25 +- .../org/broadinstitute/gpp/poolq3/PoolQ.scala | 46 ++-- .../gpp/poolq3/PoolQConfig.scala | 87 +++--- .../gpp/poolq3/ReadsSource.scala | 8 +- .../gpp/poolq3/barcode/BarcodePolicy.scala | 162 +++++------ .../gpp/poolq3/barcode/Barcodes.scala | 2 +- .../gpp/poolq3/barcode/Dmuxed.scala | 12 +- .../poolq3/barcode/DmuxedBarcodeSource.scala | 14 +- .../DmuxedPairedEndBarcodeSource.scala | 15 +- .../gpp/poolq3/barcode/FoundBarcode.scala | 9 +- .../gpp/poolq3/barcode/KeyMask.scala | 49 ++-- .../gpp/poolq3/barcode/KeyRange.scala | 15 +- .../gpp/poolq3/barcode/KnuthMorrisPratt.scala | 49 ++-- .../barcode/ReadTooShortException.scala | 2 +- .../barcode/SingleFileBarcodeSource.scala | 14 +- .../barcode/ThreeFileBarcodeSource.scala | 15 +- .../poolq3/barcode/TwoFileBarcodeSource.scala | 15 +- .../gpp/poolq3/barcode/barcode.scala | 107 ++++++++ .../gpp/poolq3/barcode/package.scala | 120 -------- .../gpp/poolq3/collection/collection.scala | 9 + .../gpp/poolq3/collection/package.scala | 14 - .../gpp/poolq3/hist/Histogram.scala | 10 +- .../poolq3/hist/OpenHashMapHistogram.scala | 8 +- .../gpp/poolq3/hist/ShardedHistogram.scala | 23 +- .../gpp/poolq3/hist/TupleHistogram.scala | 26 +- .../gpp/poolq3/numeric/numeric.scala | 24 ++ .../gpp/poolq3/numeric/package.scala | 28 -- .../gpp/poolq3/parser/BarcodeSet.scala | 25 +- .../gpp/poolq3/parser/CloseableIterable.scala | 59 ++-- .../gpp/poolq3/parser/CloseableIterator.scala | 9 +- .../parser/ConflictingBarcodeException.scala | 2 +- .../gpp/poolq3/parser/FastqParser.scala | 26 +- .../poolq3/parser/InvalidFileException.scala | 5 +- .../poolq3/parser/InvalidReadException.scala | 2 +- .../gpp/poolq3/parser/ReferenceData.scala | 61 ++--- .../gpp/poolq3/parser/ReferenceEntry.scala | 16 +- .../gpp/poolq3/parser/SamParser.scala | 19 +- .../gpp/poolq3/parser/TextParser.scala | 16 +- .../gpp/poolq3/parser/package.scala | 38 --- .../gpp/poolq3/parser/parser.scala | 31 +++ .../gpp/poolq3/process/BarcodeStats.scala | 15 +- .../gpp/poolq3/process/Consumer.scala | 6 +- .../gpp/poolq3/process/NoOpConsumer.scala | 9 +- .../gpp/poolq3/process/PoolQProcess.scala | 37 ++- .../gpp/poolq3/process/ScoringConsumer.scala | 96 +++---- .../gpp/poolq3/process/State.scala | 8 +- .../process/UnexpectedSequenceTracker.scala | 28 +- .../gpp/poolq3/reference/BaseReference.scala | 22 +- .../gpp/poolq3/reference/BkTree.scala | 64 ----- .../gpp/poolq3/reference/ExactReference.scala | 22 +- .../gpp/poolq3/reference/MatchedBarcode.scala | 2 +- .../gpp/poolq3/reference/Reference.scala | 61 +++-- .../poolq3/reference/VariantReference.scala | 49 ++-- .../gpp/poolq3/reference/package.scala | 37 --- .../poolq3/reports/BarcodeCountsWriter.scala | 16 +- .../gpp/poolq3/reports/BarcodeFrequency.scala | 6 +- .../reports/CorrelationFileWriter.scala | 33 +-- .../gpp/poolq3/reports/CountsWriter.scala | 16 +- .../reports/LogNormalizedCountsWriter.scala | 11 +- .../gpp/poolq3/reports/ParsedFilename.scala | 2 +- .../gpp/poolq3/reports/QualityWriter.scala | 31 +-- .../reports/ReferenceCollisionWriter.scala | 8 +- .../gpp/poolq3/reports/ReportsDialect.scala | 2 +- .../gpp/poolq3/reports/RunInfoWriter.scala | 6 +- .../gpp/poolq3/reports/UmiQualityWriter.scala | 11 +- .../reports/UnexpectedSequenceWriter.scala | 73 +++-- .../gpp/poolq3/reports/package.scala | 90 ------ .../gpp/poolq3/reports/reports.scala | 85 ++++++ .../broadinstitute/gpp/poolq3/seq/seq.scala | 256 ++++++++---------- .../gpp/poolq3/types/LongTuple.scala | 16 +- .../gpp/poolq3/types/OutputFileType.scala | 2 +- .../gpp/poolq3/types/PoolQException.scala | 2 +- .../gpp/poolq3/types/PoolQRunSummary.scala | 2 +- .../gpp/poolq3/types/PoolQSummary.scala | 2 +- .../gpp/poolq3/types/Read.scala | 2 +- .../gpp/poolq3/types/ReadIdCheckPolicy.scala | 45 ++- .../gpp/poolq3/types/ReadsFileType.scala | 29 +- .../types/UncoordinatedReadsException.scala | 2 +- .../broadinstitute/gpp/poolq3/PoolQTest.scala | 6 +- .../gpp/poolq3/TestResources.scala | 12 +- .../poolq3/barcode/BarcodePackageTest.scala | 20 +- .../poolq3/barcode/BarcodePolicyTest.scala | 6 +- .../barcode/DmuxedBarcodeSourceTest.scala | 8 +- .../DmuxedPairedEndBarcodeSourceTest.scala | 8 +- .../gpp/poolq3/barcode/DmuxedTest.scala | 6 +- .../barcode/FixedOffsetPolicyTest.scala | 10 +- .../IndexOfKnownPrefixPolicyTest.scala | 10 +- .../gpp/poolq3/barcode/KeyMaskTest.scala | 8 +- .../gpp/poolq3/barcode/KeyMaskTest2.scala | 6 +- .../gpp/poolq3/barcode/KeyRangeTest.scala | 8 +- .../barcode/KmpKnownPrefixPolicyTest.scala | 10 +- .../barcode/KnownPrefixPolicyBenchmark.scala | 6 +- .../poolq3/barcode/KnuthMorrisPrattTest.scala | 10 +- .../barcode/SplitBarcodePolicyTest.scala | 2 +- .../poolq3/barcode/TemplatePolicyTest.scala | 6 +- .../barcode/TwoFileBarcodeSourceTest.scala | 11 +- .../collection/CollectionPackageTest.scala | 8 +- .../broadinstitute/gpp/poolq3/gen/gen.scala | 29 ++ .../gpp/poolq3/gen/package.scala | 33 --- .../hist/OpenHashMapHistogramTest.scala | 6 +- .../poolq3/hist/ShardedHistogramTest.scala | 8 +- .../gpp/poolq3/hist/TupleHistogramTest.scala | 6 +- .../integration/AmbiguousMatchTest.scala | 8 +- .../integration/LongBarcodeMatchTest.scala | 8 +- .../integration/PairedEndMatchTest.scala | 12 +- .../gpp/poolq3/integration/UmiMatchTest.scala | 14 +- .../integration/UnambiguousMatchTest.scala | 8 +- .../integration/UnambiguousVariantTest.scala | 15 +- .../integration/UnlabeledConditionsTest.scala | 35 ++- .../legacy/LegacyIntegrationTest.scala | 48 ++-- .../poolq3/numeric/NumericPackageTest.scala | 10 +- .../poolq3/parser/BarcodeSetParserTest.scala | 6 +- .../gpp/poolq3/parser/FastqParserTest.scala | 28 +- .../gpp/poolq3/parser/ParserPackageTest.scala | 6 +- .../gpp/poolq3/parser/ReferenceDataTest.scala | 8 +- .../poolq3/parser/ReferenceEntryTest.scala | 6 +- .../gpp/poolq3/parser/SamParserTest.scala | 6 +- .../gpp/poolq3/parser/TextParserTest.scala | 11 +- .../poolq3/process/ScoringConsumerTest.scala | 30 +- .../gpp/poolq3/reference/BkTreeTest.scala | 44 --- .../poolq3/reference/ExactReferenceTest.scala | 10 +- .../reference/VariantReferenceTest.scala | 10 +- .../poolq3/reports/BarcodeCountsTest.scala | 33 ++- .../ConditionBarcodeCountsSummaryTest.scala | 6 +- .../poolq3/reports/CorrelationFileTest.scala | 26 +- .../gpp/poolq3/reports/CountsTest.scala | 39 ++- .../gpp/poolq3/reports/ReportsTest.scala | 6 +- .../poolq3/reports/RunInfoWriterTest.scala | 13 +- .../gpp/poolq3/reports/UmiQualityTest.scala | 16 +- .../reports/UnexpectedSequencesTest.scala | 33 ++- .../gpp/poolq3/seq/SeqPackageTest.scala | 14 +- .../gpp/poolq3/testutil/package.scala | 23 -- .../gpp/poolq3/testutil/testutil.scala | 19 ++ .../gpp/poolq3/tools/package.scala | 40 --- .../gpp/poolq3/tools/tools.scala | 31 +++ .../poolq3/types/ReadIdCheckPolicyTest.scala | 10 +- version.sbt | 2 +- 142 files changed, 1466 insertions(+), 1855 deletions(-) create mode 100644 src/main/scala/org/broadinstitute/gpp/poolq3/barcode/barcode.scala delete mode 100644 src/main/scala/org/broadinstitute/gpp/poolq3/barcode/package.scala create mode 100644 src/main/scala/org/broadinstitute/gpp/poolq3/collection/collection.scala delete mode 100644 src/main/scala/org/broadinstitute/gpp/poolq3/collection/package.scala create mode 100644 src/main/scala/org/broadinstitute/gpp/poolq3/numeric/numeric.scala delete mode 100644 src/main/scala/org/broadinstitute/gpp/poolq3/numeric/package.scala delete mode 100644 src/main/scala/org/broadinstitute/gpp/poolq3/parser/package.scala create mode 100644 src/main/scala/org/broadinstitute/gpp/poolq3/parser/parser.scala delete mode 100644 src/main/scala/org/broadinstitute/gpp/poolq3/reference/BkTree.scala delete mode 100644 src/main/scala/org/broadinstitute/gpp/poolq3/reference/package.scala delete mode 100644 src/main/scala/org/broadinstitute/gpp/poolq3/reports/package.scala create mode 100644 src/main/scala/org/broadinstitute/gpp/poolq3/reports/reports.scala create mode 100644 src/test/scala/org/broadinstitute/gpp/poolq3/gen/gen.scala delete mode 100644 src/test/scala/org/broadinstitute/gpp/poolq3/gen/package.scala delete mode 100644 src/test/scala/org/broadinstitute/gpp/poolq3/reference/BkTreeTest.scala delete mode 100644 src/test/scala/org/broadinstitute/gpp/poolq3/testutil/package.scala create mode 100644 src/test/scala/org/broadinstitute/gpp/poolq3/testutil/testutil.scala delete mode 100644 src/test/scala/org/broadinstitute/gpp/poolq3/tools/package.scala create mode 100644 src/test/scala/org/broadinstitute/gpp/poolq3/tools/tools.scala diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 84ebf9b..3b60555 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,6 +15,8 @@ jobs: steps: - uses: actions/checkout@v4 - uses: jrouly/scalafmt-native-action@v3 + with: + version: "3.8.0" - name: Set up JDK 8 uses: actions/setup-java@v4 with: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8128cc5..a7a1759 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -14,6 +14,8 @@ jobs: - run: git config --global user.email "gpp-informatics@broadinstitute.org" - run: git config --global user.name "GPP Informatics" - uses: jrouly/scalafmt-native-action@v3 + with: + version: "3.8.0" - name: Set up JDK 8 uses: actions/setup-java@v4 with: diff --git a/.scalafix.conf b/.scalafix.conf index 968eb04..0b55fd3 100644 --- a/.scalafix.conf +++ b/.scalafix.conf @@ -7,5 +7,5 @@ OrganizeImports { expandRelative = true groups = ["re:javax?\\.", "scala.", "*"] groupedImports = Merge - removeUnused = true + removeUnused = false } diff --git a/.scalafmt.conf b/.scalafmt.conf index 3c86644..afac328 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -1,5 +1,13 @@ # PoolQ3 .scalafmt configuration -version=3.7.17 +version=3.8.0 +runner.dialect = scala3 + +# work around https://github.com/scalameta/scalafmt/issues/3787 +fileOverride { + "glob:**.sbt" { + runner.dialect = sbt1 + } +} style = IntelliJ maxColumn = 120 @@ -14,4 +22,6 @@ rewrite.redundantBraces.generalExpressions = false rewrite.redundantBraces.includeUnitMethods = false rewrite.redundantBraces.maxBreaks = 16 rewrite.redundantBraces.stringInterpolation = true -runner.dialect = scala213source3 +rewrite.scala3.convertToNewSyntax = true +rewrite.scala3.removeOptionalBraces = oldSyntaxToo +rewrite.scala3.insertEndMarkerMinLines = 8 diff --git a/CHANGELOG.md b/CHANGELOG.md index 109badd..25ce629 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 3.12.0 +* Update to Scala 3.3.1 + ## 3.11.0 * Machine-parseable condition barcode summary file diff --git a/build.sbt b/build.sbt index e841820..8568f98 100644 --- a/build.sbt +++ b/build.sbt @@ -2,7 +2,7 @@ val artifactId = "poolq" inThisBuild( List( - scalaVersion := "2.13.11", + scalaVersion := "3.3.1", semanticdbEnabled := true, semanticdbVersion := scalafixSemanticdb.revision, versionScheme := Some("early-semver") @@ -10,16 +10,14 @@ inThisBuild( ) lazy val versions = new { - val acyclic = "0.2.1" val betterFiles = "3.9.2" - val betterMonadicFor = "0.3.1" - val catsEffect3 = "3.5.2" + val catsEffect3 = "3.5.3" val cats = "2.10.0" val commonsIo = "2.15.1" val commonsText = "1.11.0" val commonsMath3 = "3.6.1" - val fastutil = "8.5.12" - val fs2 = "3.9.3" + val fastutil = "8.5.13" + val fs2 = "3.9.4" val log4s = "1.10.0" val logback = "1.2.13" val munit = "0.7.29" @@ -27,16 +25,14 @@ lazy val versions = new { val samTools = "3.0.5" val scalaCheck = "1.17.0" val scalaCsv = "1.3.10" - val scalaTest = "3.2.17" - val scalaTestPlusScalaCheck = "3.2.2.0" + val scalaTest = "3.2.18" + val scalaTestPlusScalaCheck = "3.2.18.0" val scopt = "4.1.0" val slf4j = "1.7.36" } lazy val libraries = new { - val acyclic = "com.lihaoyi" %% "acyclic" % versions.acyclic val betterFiles = "com.github.pathikrit" %% "better-files" % versions.betterFiles - val betterMonadicFor = "com.olegpy" %% "better-monadic-for" % versions.betterMonadicFor val cats = "org.typelevel" %% "cats-core" % versions.cats val catsEffect3 = "org.typelevel" %% "cats-effect" % versions.catsEffect3 val commonsIo = "commons-io" % "commons-io" % versions.commonsIo @@ -59,12 +55,11 @@ lazy val libraries = new { val munitCatsEffect3 = "org.typelevel" %% "munit-cats-effect-3" % versions.munitCatsEffect3 val scalaTest = "org.scalatest" %% "scalatest" % versions.scalaTest val scalaCheck = "org.scalacheck" %% "scalacheck" % versions.scalaCheck - val scalaTestPlusScalaCheck = "org.scalatestplus" %% "scalacheck-1-14" % versions.scalaTestPlusScalaCheck + val scalaTestPlusScalaCheck = "org.scalatestplus" %% "scalacheck-1-17" % versions.scalaTestPlusScalaCheck } lazy val dependencies = List( - libraries.acyclic % "provided", libraries.cats, libraries.commonsIo, libraries.commonsMath3, @@ -90,7 +85,7 @@ lazy val dependencies = ) lazy val headerLicenseText = - """|Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + """|Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. | |SPDX-License-Identifier: BSD-3-Clause |""".stripMargin @@ -130,13 +125,9 @@ lazy val poolq = project name := "poolq3", organization := "org.broadinstitute.gpp", libraryDependencies := dependencies, - scalacOptions ++= List("-P:acyclic:force", "-Xsource:3"), buildInfoKeys := Seq[BuildInfoKey](name, version), buildInfoPackage := "org.broadinstitute.gpp.poolq3", - addCompilerPlugin(libraries.acyclic), - addCompilerPlugin(libraries.betterMonadicFor), testFrameworks += new TestFramework("munit.Framework"), - scalacOptions += "-Yrangepos", // ensure munit clues work // Tests pass in parallel, but SLF4J logging behaves weirdly. Disable this flag to examine test // log output; leave this enabled for very fast test execution. Test / parallelExecution := true diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQ.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQ.scala index fca9e06..1d1c51c 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQ.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQ.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,7 +9,7 @@ import java.nio.file.{Files, Path} import scala.util.{Failure, Success, Try, Using} -import cats.syntax.all._ +import cats.syntax.all.* import org.broadinstitute.gpp.poolq3.PoolQConfig.synthesizeArgs import org.broadinstitute.gpp.poolq3.barcode.{BarcodePolicy, Barcodes, barcodeSource} import org.broadinstitute.gpp.poolq3.parser.{BarcodeSet, CloseableIterable, ReferenceData} @@ -20,7 +20,7 @@ import org.broadinstitute.gpp.poolq3.process.{ ScoringConsumer, UnexpectedSequenceTracker } -import org.broadinstitute.gpp.poolq3.reference.{ExactReference, Reference, referenceFor} +import org.broadinstitute.gpp.poolq3.reference.{ExactReference, Reference} import org.broadinstitute.gpp.poolq3.reports.{ BarcodeCountsWriter, CorrelationFileWriter, @@ -45,7 +45,7 @@ import org.broadinstitute.gpp.poolq3.types.{ } import org.log4s.{Logger, getLogger} -object PoolQ { +object PoolQ: private[this] val log: Logger = getLogger @@ -60,19 +60,17 @@ object PoolQ { ) final def main(args: Array[String]): Unit = - PoolQConfig.parse(args) match { + PoolQConfig.parse(args) match case None => System.exit(-1) case Some(config) => - run(config) match { + run(config) match case Success(_) => // do nothing case Failure(t) => log.error(t)("PoolQ failed") System.exit(-1) - } - } /** The main entry point for PoolQ3 as an API */ - final def run(config: PoolQConfig): Try[PoolQSummary] = { + final def run(config: PoolQConfig): Try[PoolQSummary] = log.info(s"PoolQ version: ${BuildInfo.version}") logCli(config) @@ -108,7 +106,7 @@ object PoolQ { log.info("Building row reference") val rowReference: Reference = - referenceFor( + Reference( config.rowMatchFn, ReferenceData.truncator(rowBarcodeLength), config.countAmbiguous, @@ -118,7 +116,7 @@ object PoolQ { log.info("Building column reference") val colBarcodeLength = colBarcodePolicyOpt.map(_.length).getOrElse(colReferenceData.barcodeLength) val colReference: Reference = - referenceFor( + Reference( config.colMatchFn, ReferenceData.truncator(colBarcodeLength), config.countAmbiguous, @@ -136,21 +134,20 @@ object PoolQ { barcodeSource(config.input, rowBarcodePolicy, revRowBarcodePolicyOpt, colBarcodePolicyOrLength, umiInfo.map(_._2)) lazy val unexpectedSequenceCacheDirOpt: Option[Path] = - if (config.skipUnexpectedSequenceReport) None - else { + if config.skipUnexpectedSequenceReport then None + else val ret = config.unexpectedSequenceCacheDir.map(Files.createDirectories(_)).orElse { val ret: Path = Files.createTempDirectory("unexpected-sequence-cache") Some(ret) } ret.foreach(path => log.info(s"Writing unexpected sequence cache files to $path")) ret - } lazy val unexpectedSequenceTrackerOpt: Option[UnexpectedSequenceTracker] = unexpectedSequenceCacheDirOpt.map(new UnexpectedSequenceTracker(_, colReference)) val consumer = - if (config.noopConsumer) new NoOpConsumer + if config.noopConsumer then new NoOpConsumer else new ScoringConsumer( rowReference, @@ -162,7 +159,7 @@ object PoolQ { config.isPairedEnd ) - for { + for runSummary <- runProcess(barcodes, consumer) state = runSummary.state counts = state.known @@ -220,17 +217,19 @@ object PoolQ { config.unexpectedSequenceMaxSampleSize ) .as(UnexpectedSequencesFileType.some) - if (config.removeUnexpectedSequenceCache) { + if config.removeUnexpectedSequenceCache then log.info(s"Removing unexpected sequence cache ${config.unexpectedSequenceCacheDir}") UnexpectedSequenceWriter.removeCache(dir) - } ret } _ = log.info(s"Writing run info ${config.output.unexpectedSequencesFile}") _ <- RunInfoWriter.write(config.output.runInfoFile, config) _ = log.info("PoolQ complete") - } yield PoolQSummary(runSummary, AlwaysWrittenFiles ++ Set(cfto, usfto).flatten) - } + yield PoolQSummary(runSummary, AlwaysWrittenFiles ++ Set(cfto, usfto).flatten) + + end for + + end run def runProcess(barcodes: CloseableIterable[Barcodes], consumer: Consumer): Try[PoolQRunSummary] = Using(barcodes.iterator) { iterator => @@ -257,7 +256,7 @@ object PoolQ { (rowBarcodePolicy, None, rowBarcodePolicy.length) } - private[this] def logCli(config: PoolQConfig): Unit = { + private[this] def logCli(config: PoolQConfig): Unit = val logStr = synthesizeArgs(config) .map { @@ -266,6 +265,7 @@ object PoolQ { } .mkString(" \\\n") log.info(s"PoolQ command-line settings:\n$logStr") - } -} + end logCli + +end PoolQ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQConfig.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQConfig.scala index f7daa8c..a1490d3 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQConfig.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQConfig.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,8 +10,8 @@ import java.nio.file.{Files, Path, Paths} import scala.collection.mutable -import cats.data.{NonEmptyList => Nel} -import cats.syntax.all._ +import cats.data.NonEmptyList as Nel +import cats.syntax.all.* import org.broadinstitute.gpp.poolq3.PoolQConfig.DefaultPath import org.broadinstitute.gpp.poolq3.reports.{GctDialect, PoolQ2Dialect, PoolQ3Dialect, ReportsDialect} import org.broadinstitute.gpp.poolq3.types.{PoolQException, ReadIdCheckPolicy} @@ -34,15 +34,15 @@ final case class PoolQInput( addlColReads: List[Path] = Nil, addlReads: List[(Option[String], Path)] = Nil, demultiplexed: Boolean = false -) { +): - def readsSourceE: Either[Exception, ReadsSource] = (rowReads, reverseRowReads, colReads, reads, demultiplexed) match { + def readsSourceE: Either[Exception, ReadsSource] = (rowReads, reverseRowReads, colReads, reads, demultiplexed) match case (None, None, None, Some(r), false) => Right(ReadsSource.SelfContained(Nel(r._2, addlReads.view.map(_._2).toList))) case (Some(rr), None, Some(cr), None, false) => val rs = ReadsSource.Split(Nel(cr, addlColReads), Nel(rr._2, addlRowReads.view.map(_._2).toList)) - if (rs.forward.length == rs.index.length) Right(rs) + if rs.forward.length == rs.index.length then Right(rs) else Left(PoolQException("Number of row, column, and reverse reads files must match")) case (Some(rr), Some(rrr), Some(cr), None, false) => @@ -51,7 +51,7 @@ final case class PoolQInput( Nel(rr._2, addlRowReads.view.map(_._2).toList), Nel(rrr._2, addlReverseRowReads.view.map(_._2).toList) ) - if (rs.forward.length == rs.index.length && rs.forward.length == rs.reverse.length) Right(rs) + if rs.forward.length == rs.index.length && rs.forward.length == rs.reverse.length then Right(rs) else Left(PoolQException("Number of row and column reads files must match")) case (Some(rr), None, None, None, true) => @@ -59,15 +59,14 @@ final case class PoolQInput( case (Some(rr), Some(rrr), None, None, true) => val rs = ReadsSource.DmuxedPairedEnd(Nel(rr, addlRowReads), Nel(rrr, addlReverseRowReads)) - if (rs.read1.map(_._1) == rs.read2.map(_._1)) Right(rs) + if rs.read1.map(_._1) == rs.read2.map(_._1) then Right(rs) else Left(PoolQException("Row and column reads files must match")) case _ => Left(PoolQException("Conflicting input options")) - } def readsSource: ReadsSource = readsSourceE.fold(e => throw e, rs => rs) -} +end PoolQInput final case class PoolQOutput( countsFile: Path = Paths.get("counts.txt"), @@ -102,22 +101,22 @@ final case class PoolQConfig( reportsDialect: ReportsDialect = PoolQ3Dialect, alwaysCountColumnBarcodes: Boolean = false, noopConsumer: Boolean = false -) { +): def isPairedEnd = reverseRowBarcodePolicyStr.isDefined && - (input.readsSourceE match { + (input.readsSourceE match case Right(ReadsSource.PairedEnd(_, _, _)) => true case Right(ReadsSource.DmuxedPairedEnd(_, _)) => true case Right(ReadsSource.SelfContained(_)) => false case Right(ReadsSource.Split(_, _)) => false case Right(ReadsSource.Dmuxed(_)) => false case Left(_) => false - }) + ) -} +end PoolQConfig -object PoolQConfig { +object PoolQConfig: private[poolq3] val BarcodePathRegex = "([ACGT]+):(.+)".r @@ -126,31 +125,30 @@ object PoolQConfig { implicit private[this] val readPath: Read[Path] = implicitly[Read[File]].map(_.toPath) implicit private[this] val readPaths: Read[(Path, List[Path])] = implicitly[Read[Seq[File]]].map { files => - files.toList.map(_.toPath) match { + files.toList.map(_.toPath) match case Nil => throw new IllegalArgumentException(s"No argument provided") case (x :: xs) => (x, xs) - } } implicit private[this] val readBarcodePaths: Read[List[(Option[String], Path)]] = implicitly[Read[Seq[String]]].map { args => args.view.map { arg => - arg match { + arg match case BarcodePathRegex(bc, pathStr) => (Option(bc), Paths.get(pathStr)) case _ => (None, Paths.get(arg)) - } }.toList } implicit private[this] val readReadIdCheckPolicy: Read[ReadIdCheckPolicy] = implicitly[Read[String]].map(ReadIdCheckPolicy.forName) - def parse(args: Array[String]): Option[PoolQConfig] = { + def parse(args: Array[String]): Option[PoolQConfig] = + + val parser: OptionParser[PoolQConfig] = new OptionParser[PoolQConfig]("poolq"): - val parser: OptionParser[PoolQConfig] = new OptionParser[PoolQConfig]("poolq") { private[this] def existsAndIsReadable(f: Path): Either[String, Unit] = - if (!Files.exists(f)) failure(s"Could not find ${f.toAbsolutePath}") - else if (!Files.isReadable(f)) failure(s"Could not read ${f.toAbsolutePath}") + if !Files.exists(f) then failure(s"Could not find ${f.toAbsolutePath}") + else if !Files.isReadable(f) then failure(s"Could not read ${f.toAbsolutePath}") else success locally { @@ -290,7 +288,7 @@ object PoolQConfig { .valueName("") .action((n, c) => c.copy(unexpectedSequencesToReport = n)) .validate { n => - if (n > 0) success + if n > 0 then success else failure(s"Unexpected sequence threshold must be greater than 0, got: $n") } @@ -334,7 +332,7 @@ object PoolQConfig { val _ = opt[Unit]("noop").hidden().action((_, c) => c.copy(noopConsumer = true)) val _ = checkConfig { c => - val readsCheck = (c.input.reads, c.input.rowReads, c.input.colReads, c.input.demultiplexed) match { + val readsCheck = (c.input.reads, c.input.rowReads, c.input.colReads, c.input.demultiplexed) match case (None, None, None, _) => failure("No reads files specified.") case (None, None, Some(_), false) => failure("Column barcode file specified but no row barcodes file specified.") @@ -343,24 +341,21 @@ object PoolQConfig { case (None, Some(_), None, false) => failure("Row barcode file specified but no column barcodes file specified.") case _ => success - } - val pairedEndConsistencyCheck = (c.input.reverseRowReads, c.reverseRowBarcodePolicyStr) match { + val pairedEndConsistencyCheck = (c.input.reverseRowReads, c.reverseRowBarcodePolicyStr) match case (Some(_), None) => failure("Reverse row reads file specified but no reverse barcode policy specified") case (None, Some(_)) => failure("Reverse barcode policy specified but now reverse row reads file specified") case _ => success - } readsCheck >> pairedEndConsistencyCheck } } - } parser.parse(args, PoolQConfig()) - } + end parse - def synthesizeArgs(config: PoolQConfig): List[(String, String)] = { + def synthesizeArgs(config: PoolQConfig): List[(String, String)] = val args = new mutable.ArrayBuffer[(String, String)] // umi @@ -399,29 +394,21 @@ object PoolQConfig { // run control args += (("row-matcher", config.rowMatchFn)) args += (("col-matcher", config.colMatchFn)) - if (config.countAmbiguous) { - args += (("count-ambiguous", "")) - } + if config.countAmbiguous then args += (("count-ambiguous", "")) args += (("row-barcode-policy", config.rowBarcodePolicyStr)) config.reverseRowBarcodePolicyStr.foreach(p => args += (("rev-row-barcode-policy", p))) config.colBarcodePolicyStr.foreach(pol => args += (("col-barcode-policy", pol))) umiInfo.map(_._2).foreach(str => args += (("umi-barcode-policy", str))) // deal with the unexpected sequence options - if (config.skipUnexpectedSequenceReport) { - args += (("skip-unexpected-sequence-report", "")) - } else { + if config.skipUnexpectedSequenceReport then args += (("skip-unexpected-sequence-report", "")) + else // give whatever path we were given here - this _may_ not need to be included at all, honestly config.unexpectedSequenceCacheDir.foreach(file => args += (("unexpected-sequence-cache", file.toString))) args += (("unexpected-sequence-threshold", config.unexpectedSequencesToReport.toString)) - } - if (config.skipShortReads) { - args += (("skip-short-reads", "")) - } - if (config.reportsDialect == PoolQ2Dialect) { - args += (("compat", "")) - } + if config.skipShortReads then args += (("skip-short-reads", "")) + if config.reportsDialect == PoolQ2Dialect then args += (("compat", "")) // output files val output = config.output @@ -435,15 +422,13 @@ object PoolQConfig { output.umiCountsFilesDir.foreach(d => args += (("umi-counts-dir", d.toString))) output.umiBarcodeCountsFilesDir.foreach(d => args += (("umi-barcode-counts-dir", d.toString))) } - if (!config.skipUnexpectedSequenceReport) { + if !config.skipUnexpectedSequenceReport then args += (("unexpected-sequences", output.unexpectedSequencesFile.getFileName.toString)) - } - if (config.alwaysCountColumnBarcodes) { - args += (("always-count-col-barcodes", "")) - } + if config.alwaysCountColumnBarcodes then args += (("always-count-col-barcodes", "")) args += (("run-info", output.runInfoFile.getFileName.toString)) args.toList - } -} + end synthesizeArgs + +end PoolQConfig diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/ReadsSource.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/ReadsSource.scala index 0e87359..185fb20 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/ReadsSource.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/ReadsSource.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,11 +7,11 @@ package org.broadinstitute.gpp.poolq3 import java.nio.file.Path -import cats.data.{NonEmptyList => Nel} +import cats.data.NonEmptyList as Nel sealed trait ReadsSource extends Product with Serializable -object ReadsSource { +object ReadsSource: final case class SelfContained(paths: Nel[Path]) extends ReadsSource final case class Split(index: Nel[Path], forward: Nel[Path]) extends ReadsSource @@ -21,4 +21,4 @@ object ReadsSource { final case class DmuxedPairedEnd(read1: Nel[(Option[String], Path)], read2: Nel[(Option[String], Path)]) extends ReadsSource -} +end ReadsSource diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/BarcodePolicy.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/BarcodePolicy.scala index d2b644a..b401d10 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/BarcodePolicy.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/BarcodePolicy.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -12,112 +12,105 @@ import scala.util.matching.Regex import org.broadinstitute.gpp.poolq3.types.Read -sealed trait BarcodePolicy { +sealed trait BarcodePolicy: def length: Int def find(read: Read): Option[FoundBarcode] -} -object BarcodePolicy { +object BarcodePolicy: private val Regex = """^([A-Z]+)(.+)""".r def apply(desc: String, refBarcodeLength: Int, skipShortReads: Boolean): BarcodePolicy = - desc match { + desc match case Regex(descriptor, rest) => - descriptor match { + descriptor match case "FIXED" => FixedOffsetPolicy(rest, refBarcodeLength, skipShortReads) case "PREFIX" => KnownPrefixPolicy(rest, refBarcodeLength) case "KEYMASK" | "TEMPLATE" => TemplatePolicy(rest, refBarcodeLength) case _ => throw new IllegalArgumentException(s"Unrecognized barcode policy: $desc") - } case _ => throw new IllegalArgumentException(s"Unrecognized barcode policy: $desc") - } -} +end BarcodePolicy -final case class FixedOffsetPolicy(startPos0: Int, length: Int, skipShortReads: Boolean) extends BarcodePolicy { +final case class FixedOffsetPolicy(startPos0: Int, length: Int, skipShortReads: Boolean) extends BarcodePolicy: private[this] val minLength: Int = startPos0 + length private[this] val endPos0: Int = startPos0 + length override def find(read: Read): Option[FoundBarcode] = - if (read.seq.length < minLength) { - if (skipShortReads) None + if read.seq.length < minLength then + if skipShortReads then None else throw ReadTooShortException(read.seq, startPos0, minLength) - } else Some(FoundBarcode(copyOfRange(read.seq.toCharArray, startPos0, endPos0), startPos0)) + else Some(FoundBarcode(copyOfRange(read.seq.toCharArray, startPos0, endPos0), startPos0)) -} +end FixedOffsetPolicy -object FixedOffsetPolicy { +object FixedOffsetPolicy: private[this] val Regex1: Regex = """^[:@](\d+)$""".r private[this] val Regex2: Regex = """^[:@](\d+):(\d+)$""".r def apply(s: String, refBarcodeLength: Int, skipShortReads: Boolean): FixedOffsetPolicy = - s match { + s match case Regex1(sp0) => FixedOffsetPolicy(sp0.toInt, refBarcodeLength, skipShortReads) case Regex2(sp0, len) => FixedOffsetPolicy(sp0.toInt, len.toInt, skipShortReads) case _ => throw new IllegalArgumentException(s"Incomprehensible fixed offset barcode policy: $s") - } -} +end FixedOffsetPolicy /** This is a marker trait for known-prefix policies; it exists primarily for its companion object, defined below. */ -sealed trait KnownPrefixPolicy extends BarcodePolicy { +sealed trait KnownPrefixPolicy extends BarcodePolicy: def prefix: String -} final case class IndexOfKnownPrefixPolicy( prefix: String, length: Int, minPrefixStartPos: Option[Int] = None, maxPrefixStartPos: Option[Int] = None -) extends KnownPrefixPolicy { +) extends KnownPrefixPolicy: private[this] val prefixLength: Int = prefix.length private[this] val minPrefixStartPosInt: Int = minPrefixStartPos.getOrElse(0) private[this] val maxPrefixStartPosInt: Int = maxPrefixStartPos.getOrElse(Int.MaxValue) - override def find(read: Read): Option[FoundBarcode] = { + override def find(read: Read): Option[FoundBarcode] = val index = read.seq.indexOf(prefix, minPrefixStartPosInt) val barcodeStart = index + prefixLength - if (index < 0) None - else if (index > maxPrefixStartPosInt) None - else if ((read.seq.length - barcodeStart) < length) None + if index < 0 then None + else if index > maxPrefixStartPosInt then None + else if (read.seq.length - barcodeStart) < length then None else Some(FoundBarcode(copyOfRange(read.seq.toCharArray, barcodeStart, barcodeStart + length), barcodeStart)) - } -} +end IndexOfKnownPrefixPolicy final case class KmpKnownPrefixPolicy( prefix: String, length: Int, minPrefixStartPos: Option[Int] = None, maxPrefixStartPos: Option[Int] = None -) extends KnownPrefixPolicy { +) extends KnownPrefixPolicy: private[this] val kmp: KnuthMorrisPratt = new KnuthMorrisPratt(prefix) private[this] val prefixLength: Int = prefix.length private[this] val minPrefixStartPosInt: Int = minPrefixStartPos.getOrElse(0) private[this] val maxPrefixStartPosInt: Int = maxPrefixStartPos.getOrElse(Int.MaxValue) - override def find(read: Read): Option[FoundBarcode] = { + override def find(read: Read): Option[FoundBarcode] = val searchEndPos = math.min(maxPrefixStartPosInt, read.seq.length - length) kmp.search(read.seq, minPrefixStartPosInt, searchEndPos).map { prefixStart => val barcodeStart = prefixStart + prefixLength FoundBarcode(copyOfRange(read.seq.toCharArray, barcodeStart, barcodeStart + length), barcodeStart) } - } -} +end KmpKnownPrefixPolicy -object KnownPrefixPolicy { +object KnownPrefixPolicy: val Regex: Regex = """^:([ACGT]+)(?:@(\d+)?(-\d+)?)?(:\d+)?$""".r def apply(s: String, refBarcodeLength: Int): KnownPrefixPolicy = - s match { + s match case Regex(prefix, minStr, maxStr, lengthStr) => val min = Option(minStr).map(_.toInt) val max = Option(maxStr).map(_.tail.toInt) @@ -125,40 +118,35 @@ object KnownPrefixPolicy { IndexOfKnownPrefixPolicy(prefix, length, min, max) case _ => throw new IllegalArgumentException(s"Incomprehensible known prefix barcode policy: $s") - } -} +end KnownPrefixPolicy sealed trait TemplatePolicy extends BarcodePolicy with Product with Serializable -object TemplatePolicy { +object TemplatePolicy: val Regex1: Regex = """^:([ACGTRYSWKMBDHVNacgtryswkmbdhvn]+)(?:@(\d+)?(-\d+)?)?$""".r val Regex2: Regex = """^([acgt]+)(N+)(n+)([acgt]+)(N+)[acgt]*$""".r def apply(s: String, refBarcodeLength: Int): TemplatePolicy = - s match { + s match case Regex1(ctx, minStr, maxStr) => - ctx match { + ctx match case Regex2(p1, b1, gap, p2, b2) => - if ((b1.length + b2.length) != refBarcodeLength) { + if (b1.length + b2.length) != refBarcodeLength then throw new IllegalArgumentException(s"$s is not compatible with the provided reference file") - } val min = Option(minStr).map(_.toInt) val max = Option(maxStr).map(_.tail.toInt) SplitBarcodePolicy(p1.toUpperCase, b1.length, gap.length, p2.toUpperCase, b2.length, min, max) case _ => val km = KeyMask(ctx) - if (km.keyLengthInBases != refBarcodeLength) { + if km.keyLengthInBases != refBarcodeLength then throw new IllegalArgumentException(s"$s is not compatible with the provided reference file") - } val min = Option(minStr).map(_.toInt) val max = Option(maxStr).map(_.tail.toInt) GeneralTemplatePolicy(km, min, max) - } case _ => throw new IllegalArgumentException(s"Incomprehensible template barcode policy: $s") - } /* A.................Adenine @@ -179,7 +167,7 @@ object TemplatePolicy { . or -............gap */ final def compatible(p: Char, b: Char): Boolean = - (p: @switch) match { + (p: @switch) match case 'N' => true case 'A' => b == 'A' case 'C' => b == 'C' @@ -196,21 +184,18 @@ object TemplatePolicy { case 'H' => b == 'A' || b == 'C' || b == 'T' case 'V' => b == 'A' || b == 'C' || b == 'G' case _ => false - } - final def satisfies(template: Array[Char], seq: String, seqOffset: Int): Boolean = { + final def satisfies(template: Array[Char], seq: String, seqOffset: Int): Boolean = var i = 0 - while (i < template.length) { - if (!compatible(template(i), seq(seqOffset + i))) return false + while i < template.length do + if !compatible(template(i), seq(seqOffset + i)) then return false i += 1 - } true - } -} +end TemplatePolicy final case class GeneralTemplatePolicy(template: KeyMask, minStartPos: Option[Int], maxStartPos: Option[Int] = None) - extends TemplatePolicy { + extends TemplatePolicy: private[this] val minStartPosInt: Int = minStartPos.getOrElse(0) private[this] val maxStartPosInt: Int = maxStartPos.getOrElse(Int.MaxValue) @@ -224,20 +209,21 @@ final case class GeneralTemplatePolicy(template: KeyMask, minStartPos: Option[In // the publicly exposed length is the keyLength override val length: Int = keyLength - override def find(read: Read): Option[FoundBarcode] = { + override def find(read: Read): Option[FoundBarcode] = // loop through the sequence looking for a valid context seq val maxPos = math.min(read.seq.length - contextLength, maxStartPosInt) @tailrec def find(i: Int): Option[Int] = - if (i > maxPos) None - else if (TemplatePolicy.satisfies(templateChars, read.seq, i)) Some(i) + if i > maxPos then None + else if TemplatePolicy.satisfies(templateChars, read.seq, i) then Some(i) else find(i + 1) find(minStartPosInt).map(i => extract(read, i)) - } - private[barcode] def extract(read: Read, i: Int): FoundBarcode = { + end find + + private[barcode] def extract(read: Read, i: Int): FoundBarcode = val keyBuf = Array.ofDim[Char](keyLength) var offset = 0 template.keyRanges.foreach { kr => @@ -245,9 +231,10 @@ final case class GeneralTemplatePolicy(template: KeyMask, minStartPos: Option[In offset += kr.length } FoundBarcode(keyBuf, firstKeyBaseOffset + i) - } -} + end extract + +end GeneralTemplatePolicy final case class SplitBarcodePolicy( prefix1: String, @@ -257,7 +244,7 @@ final case class SplitBarcodePolicy( b2Length: Int, minPrefix1StartPos: Option[Int], maxPrefix1StartPos: Option[Int] = None -) extends TemplatePolicy { +) extends TemplatePolicy: import SplitBarcodePolicy.{indexOf, matches} private[this] val minPrefix1StartPosInt: Int = minPrefix1StartPos.getOrElse(0) @@ -269,61 +256,52 @@ final case class SplitBarcodePolicy( override def length: Int = b1Length + b2Length - override def find(read: Read): Option[FoundBarcode] = { + override def find(read: Read): Option[FoundBarcode] = @tailrec - def loop(start: Int): Option[FoundBarcode] = { + def loop(start: Int): Option[FoundBarcode] = val e = math.min(maxPrefix1StartPosInt, read.seq.length - patternLength) - if (start > e) None - else { - indexOf(prefix1, read.seq, start, e) match { + if start > e then None + else + indexOf(prefix1, read.seq, start, e) match case None => None case Some(p1Index) => val p2Index = p1Index + expectedP2Offset - if (matches(prefix2, read.seq, p2Index)) { + if matches(prefix2, read.seq, p2Index) then val dest = Array.ofDim[Char](length) // copy in the the barcodes read.seq.getChars(p1Index + p1Length, p1Index + p1Length + b1Length, dest, 0) read.seq.getChars(p2Index + p2Length, p2Index + p2Length + b2Length, dest, b1Length) Some(FoundBarcode(dest, p1Index + p1Length)) - } else { - loop(p1Index + 1) - } - } - } - } + else loop(p1Index + 1) + end if + end loop loop(minPrefix1StartPosInt) - } -} + end find + +end SplitBarcodePolicy -object SplitBarcodePolicy { +object SplitBarcodePolicy: // assumes ASCII (1-byte) chars // haystack is the string we are searching // needle is the string we are searching for // start is the first place in `haystack` we will look for `needle` // end is the last place in `haystack` where `needle` may _begin_ - final private[barcode] def indexOf(needle: String, haystack: String, start: Int, end: Int): Option[Int] = { + final private[barcode] def indexOf(needle: String, haystack: String, start: Int, end: Int): Option[Int] = @tailrec def loop(i: Int): Option[Int] = - if (i > math.min(haystack.length - needle.length, end)) { - None - } else { - if (matches(needle, haystack, i)) Some(i) - else loop(i + 1) - } + if i > math.min(haystack.length - needle.length, end) then None + else if matches(needle, haystack, i) then Some(i) + else loop(i + 1) loop(start) - } - final private def matches(needle: String, haystack: String, haystackOffset: Int): Boolean = { + final private def matches(needle: String, haystack: String, haystackOffset: Int): Boolean = @tailrec def loop(i: Int): Boolean = - if (i >= needle.length) true - else { - if (needle(i) != haystack(haystackOffset + i)) false - else loop(i + 1) - } + if i >= needle.length then true + else if needle(i) != haystack(haystackOffset + i) then false + else loop(i + 1) loop(0) - } -} +end SplitBarcodePolicy diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/Barcodes.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/Barcodes.scala index 791c3f4..1b15373 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/Barcodes.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/Barcodes.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/Dmuxed.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/Dmuxed.scala index 8755847..264f0d0 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/Dmuxed.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/Dmuxed.scala @@ -1,18 +1,14 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.barcode -object Dmuxed { +object Dmuxed: - private[barcode] def barcodeFromId(length: Int): String => Option[FoundBarcode] = { + private[barcode] def barcodeFromId(length: Int): String => Option[FoundBarcode] = val regex = s"@.*[^ACGTN]([ACGTN]{$length})$$".r - _ match { + _ match case regex(barcode) => Some(FoundBarcode(barcode.toCharArray, 0)) case _ => None - } - } - -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedBarcodeSource.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedBarcodeSource.scala index 23de4de..eba702f 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedBarcodeSource.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedBarcodeSource.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -13,26 +13,26 @@ final class DmuxedBarcodeSource( rowPolicy: BarcodePolicy, umiPolicyOpt: Option[BarcodePolicy], colBarcodeLength: Int -) extends CloseableIterable[Barcodes] { +) extends CloseableIterable[Barcodes]: // used to attempt to parse barcodes out of ids if the file has no associated barcode private val colBarcodeParser = Dmuxed.barcodeFromId(colBarcodeLength) private def colBarcodeOpt = parser.indexBarcode - private[this] class BarcodeIterator(iterator: CloseableIterator[Read]) extends CloseableIterator[Barcodes] { + private[this] class BarcodeIterator(iterator: CloseableIterator[Read]) extends CloseableIterator[Barcodes]: override def hasNext: Boolean = iterator.hasNext - override def next(): Barcodes = { + override def next(): Barcodes = val nextRead = iterator.next() val rowBarcodeOpt = rowPolicy.find(nextRead) val umiBarcodeOpt = umiPolicyOpt.flatMap(_.find(nextRead)) Barcodes(rowBarcodeOpt, None, colBarcodeOpt.orElse(colBarcodeParser(nextRead.id)), umiBarcodeOpt) - } override def close(): Unit = iterator.close() - } + + end BarcodeIterator override def iterator: CloseableIterator[Barcodes] = new BarcodeIterator(parser.iterator) -} +end DmuxedBarcodeSource diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedPairedEndBarcodeSource.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedPairedEndBarcodeSource.scala index 8eca5ce..0660ee7 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedPairedEndBarcodeSource.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedPairedEndBarcodeSource.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -16,21 +16,21 @@ class DmuxedPairedEndBarcodeSource( umiPolicyOpt: Option[BarcodePolicy], readIdCheckPolicy: ReadIdCheckPolicy, colBarcodeLength: Int -) extends CloseableIterable[Barcodes] { +) extends CloseableIterable[Barcodes]: // the index barcode _is_ the column barcode; we get it from the row parser // because the demultiplexed file is associated with the index barcode private def colBarcodeOpt = rowParser.indexBarcode private[this] class BarcodeIterator(rowIterator: CloseableIterator[Read], revRowIterator: CloseableIterator[Read]) - extends CloseableIterator[Barcodes] { + extends CloseableIterator[Barcodes]: // used to attempt to parse barcodes out of ids if the file has no associated barcode private val colBarcodeParser = Dmuxed.barcodeFromId(colBarcodeLength) final override def hasNext: Boolean = rowIterator.hasNext && revRowIterator.hasNext - final override def next(): Barcodes = { + final override def next(): Barcodes = val nextRow = rowIterator.next() val nextRevRow = revRowIterator.next() readIdCheckPolicy.check(nextRow, nextRevRow) @@ -38,15 +38,16 @@ class DmuxedPairedEndBarcodeSource( val revRowBarcodeOpt = revRowPolicy.find(nextRevRow) val umiBarcodeOpt = umiPolicyOpt.flatMap(_.find(nextRow)) Barcodes(rowBarcodeOpt, revRowBarcodeOpt, colBarcodeOpt.orElse(colBarcodeParser(nextRow.id)), umiBarcodeOpt) - } + + end next final override def close(): Unit = try rowIterator.close() finally revRowIterator.close() - } + end BarcodeIterator override def iterator: CloseableIterator[Barcodes] = new BarcodeIterator(rowParser.iterator, revRowParser.iterator) -} +end DmuxedPairedEndBarcodeSource diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/FoundBarcode.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/FoundBarcode.scala index 51108b8..72f72cd 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/FoundBarcode.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/FoundBarcode.scala @@ -1,19 +1,18 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.barcode -final case class FoundBarcode(barcode: Array[Char], offset0: Int) { +final case class FoundBarcode(barcode: Array[Char], offset0: Int): override def toString = s"FoundBarcode(${new String(barcode)}, $offset0)" /** This method exists purely for testing. In production, we do not compare FoundBarcode objects */ override def equals(other: scala.Any): Boolean = - other match { + other match case that: FoundBarcode => (this eq that) || (this.barcode.sameElements(that.barcode) && this.offset0 == that.offset0) case _ => false - } -} +end FoundBarcode diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/KeyMask.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/KeyMask.scala index 41b815a..947d19f 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/KeyMask.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/KeyMask.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,7 +7,7 @@ package org.broadinstitute.gpp.poolq3.barcode import scala.annotation.tailrec -sealed abstract class KeyMask(val pattern: String, val keyRanges: Seq[KeyRange]) { +sealed abstract class KeyMask(val pattern: String, val keyRanges: Seq[KeyRange]): // these must be in sorted order assert(keyRanges.sorted == keyRanges) @@ -19,14 +19,14 @@ sealed abstract class KeyMask(val pattern: String, val keyRanges: Seq[KeyRange]) override def toString: String = pattern -} +end KeyMask -object KeyMask { +object KeyMask: def apply(pattern: String): KeyMask = create(pattern, parsePatternRanges(pattern)) - def apply(contextLength: Int, keyRanges: Seq[KeyRange]): KeyMask = { + def apply(contextLength: Int, keyRanges: Seq[KeyRange]): KeyMask = require(keyRanges.nonEmpty, "Key mask must have at least one key range") keyRanges.foreach { r => require(r.end0 < contextLength, s"contextLength ($contextLength) is not large enough to contain key range: $r") @@ -34,43 +34,40 @@ object KeyMask { val mergedRanges = mergeAdjacent(keyRanges.sorted) val pat: String = constructPattern(contextLength, keyRanges) create(pat, mergedRanges) - } + + end apply private[this] def create(pattern: String, mergedRanges: Seq[KeyRange]): KeyMask = - mergedRanges.length match { + mergedRanges.length match case 1 => val r = mergedRanges.head - if (r.start0 == 0 && r.length == pattern.length) - KeyMask0(pattern) + if r.start0 == 0 && r.length == pattern.length then KeyMask0(pattern) else KeyMask1(pattern, r) case 2 => KeyMask2(pattern, mergedRanges(0), mergedRanges(1)) case _ => KeyMaskN(pattern, mergedRanges) - } - def fromString(contextLength: Int, str: String): KeyMask = { + def fromString(contextLength: Int, str: String): KeyMask = val ranges = str.split(",", -1).view.map(s => KeyRange.apply(s.trim)) require(ranges.nonEmpty, s"KeyMask range string yields no valid ranges: '$str'") apply(contextLength, ranges.toIndexedSeq) - } /** Given a sorted sequence of potentially-overlapping key ranges (which represent closed intervals in the key space), * merges adjacent/overlapping ranges. For example, [1-9], [9-10], [12-14] should be merged to just [1-10], [12-14] */ - private[barcode] def mergeAdjacent(bases: Seq[KeyRange]): Seq[KeyRange] = { + private[barcode] def mergeAdjacent(bases: Seq[KeyRange]): Seq[KeyRange] = // use List for pattern matching & fast seq construction def merge(acc: List[KeyRange], current: KeyRange): List[KeyRange] = - acc match { + acc match case Nil => current :: Nil case head :: tail => - if (head.end0 >= current.start0 - 1) - KeyRange(head.start0, current.end0) :: tail + if head.end0 >= current.start0 - 1 then KeyRange(head.start0, current.end0) :: tail else current :: acc - } // but use an IndexedSeq for efficiency later bases.foldLeft(List[KeyRange]())(merge).toIndexedSeq.reverse - } + + end mergeAdjacent /** Given a pattern string representing the key mask, generates the list of key ranges that are used to construct a * key that will be stored in the index. The input description uses a capital letters to indicate that a base at that @@ -79,28 +76,27 @@ object KeyMask { * that all indexed context sequences contain an "A" at that position (others are skipped during indexing, and are * simply not present in this index). */ - private[barcode] def parsePatternRanges(pattern: String): List[KeyRange] = { + private[barcode] def parsePatternRanges(pattern: String): List[KeyRange] = @tailrec def loop(acc: List[KeyRange], ps: List[(Char, Int)]): List[KeyRange] = - ps match { + ps match case Nil => acc.reverse case (base, startIdx) :: _ if base.isUpper => val (span, rest) = ps.span { case (p, _) => p.isUpper } loop(KeyRange(startIdx, startIdx + span.length - 1) :: acc, rest) case (_, _) :: tl => loop(acc, tl) - } // in a micro-benchmark, empirically calling zipWithIndex before toList is faster than // the other way around loop(Nil, pattern.zipWithIndex.toList) - } - def constructPattern(length: Int, ranges: Seq[KeyRange]): String = { + end parsePatternRanges + + def constructPattern(length: Int, ranges: Seq[KeyRange]): String = val chars = Array.fill[Char](length)('n') ranges.foreach(range => (range.start0 to range.end0).foreach(i => chars(i) = chars(i).toUpper)) new String(chars) - } -} +end KeyMask //-------------------------------------------------------------------------------------------------- // KeyMask implementations @@ -120,6 +116,5 @@ final case class KeyMask2(override val pattern: String, keyRange1: KeyRange, key /** A `KeyMask` with a any number of gaps. I doubt anyone will ever need this */ final case class KeyMaskN(override val pattern: String, override val keyRanges: Seq[KeyRange]) - extends KeyMask(pattern, keyRanges) { + extends KeyMask(pattern, keyRanges): require(keyRanges.size > 2, s"KeyMaskN must have at least 3 keyRanges: $keyRanges") -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/KeyRange.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/KeyRange.scala index 021b182..c41611b 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/KeyRange.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/KeyRange.scala @@ -1,11 +1,11 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.barcode -final case class KeyRange(start0: Int, end0: Int) { +final case class KeyRange(start0: Int, end0: Int): require(start0 <= end0, s"`start` must be <= `end`, got [$start0, $end0]") require(start0 >= 0, s"negative indices not allowed in KeyRange: [$start0, $end0]") require(end0 - start0 >= 0, s"KeyRange($start0, $end0) has illegal length ${end0 - start0 + 1} (must be positive)") @@ -16,11 +16,11 @@ final case class KeyRange(start0: Int, end0: Int) { def end1: Int = end0 + 1 - override def toString: String = if (length == 1) start1.toString else s"$start1..$end1" + override def toString: String = if length == 1 then start1.toString else s"$start1..$end1" -} +end KeyRange -object KeyRange { +object KeyRange: implicit val ord: Ordering[KeyRange] = Ordering.by(kr => (kr.start0, kr.end0)) @@ -28,10 +28,9 @@ object KeyRange { private[this] val Range2Re = """^(\d+)(?:-|\.\.)(\d+)$""".r def apply(str: String): KeyRange = - str match { + str match case Range2Re(s, e) => KeyRange(s.toInt - 1, e.toInt - 1) case Range1Re(s) => KeyRange(s.toInt - 1, s.toInt - 1) case _ => throw new IllegalArgumentException(s"Unrecognized key range `$str`") - } -} +end KeyRange diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/KnuthMorrisPratt.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/KnuthMorrisPratt.scala index e63faa3..d8496bf 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/KnuthMorrisPratt.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/KnuthMorrisPratt.scala @@ -1,11 +1,11 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.barcode -final class KnuthMorrisPratt(word: String) { +final class KnuthMorrisPratt(word: String): private[this] val f: Array[Int] = KnuthMorrisPratt.failure(word) final def search(text: String): Option[Int] = search(text, 0, text.length) @@ -13,36 +13,35 @@ final class KnuthMorrisPratt(word: String) { /** Returns `Some(idx)` where `idx` is the index of the first occurrence of `word` within the provided `text` between * `fromIndex` and `toIndex`, or `None` if `word` is not found within the range of `text`. */ - final def search(text: String, fromIndex: Int, toIndex: Int): Option[Int] = { + final def search(text: String, fromIndex: Int, toIndex: Int): Option[Int] = val wordLength = word.length var m = fromIndex var i = 0 - while ((m + i) < toIndex) { - if (word.charAt(i) == text.charAt(m + i)) { - if (i == (wordLength - 1)) return Some(m) + while (m + i) < toIndex do + if word.charAt(i) == text.charAt(m + i) then + if i == (wordLength - 1) then return Some(m) i += 1 - } else { + else val fi = f(i) - if (fi > -1) { + if fi > -1 then m = m + i - fi i = fi - } else { + else m = m + i + 1 i = 0 - } - } - } + end while None - } -} + end search -object KnuthMorrisPratt { +end KnuthMorrisPratt + +object KnuthMorrisPratt: /** Computes the KMP failure function, which maps integers: {1,2,...,m} -> {0,1,...,m-1} such that f(q) = max { k : k * < q and Pk is a suffix of Pq } CLR calls this the prefix function. See CLR 1ed p. 871 for details. */ - def failure(word: String): Array[Int] = { + def failure(word: String): Array[Int] = // initialize the KMP failure array val f = Array.fill(word.length)(0) f(0) = -1 @@ -51,21 +50,19 @@ object KnuthMorrisPratt { var wi = 2 // the word index var si = 0 // the substring index - while (wi < word.length) { - if (word(wi - 1) == word(si)) { + while wi < word.length do + if word(wi - 1) == word(si) then f(wi) = si + 1 si = si + 1 wi = wi + 1 - } else if (si > 0) { - si = f(si) - } else { + else if si > 0 then si = f(si) + else f(wi) = 0 wi = wi + 1 - } - } - + end while // return f f - } -} + end failure + +end KnuthMorrisPratt diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/ReadTooShortException.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/ReadTooShortException.scala index 2f050d9..d37914d 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/ReadTooShortException.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/ReadTooShortException.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/SingleFileBarcodeSource.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/SingleFileBarcodeSource.scala index 184ae6f..f8f62a2 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/SingleFileBarcodeSource.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/SingleFileBarcodeSource.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -13,23 +13,23 @@ final class SingleFileBarcodeSource( rowPolicy: BarcodePolicy, columnPolicy: BarcodePolicy, umiPolicyOpt: Option[BarcodePolicy] -) extends CloseableIterable[Barcodes] { +) extends CloseableIterable[Barcodes]: - private[this] class BarcodeIterator(iterator: CloseableIterator[Read]) extends CloseableIterator[Barcodes] { + private[this] class BarcodeIterator(iterator: CloseableIterator[Read]) extends CloseableIterator[Barcodes]: override def hasNext: Boolean = iterator.hasNext - override def next(): Barcodes = { + override def next(): Barcodes = val nextRead = iterator.next() val rowBarcodeOpt = rowPolicy.find(nextRead) val colBarcodeOpt = columnPolicy.find(nextRead) val umiBarcodeOpt = umiPolicyOpt.flatMap(_.find(nextRead)) Barcodes(rowBarcodeOpt, None, colBarcodeOpt, umiBarcodeOpt) - } override def close(): Unit = iterator.close() - } + + end BarcodeIterator override def iterator: CloseableIterator[Barcodes] = new BarcodeIterator(parser.iterator) -} +end SingleFileBarcodeSource diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/ThreeFileBarcodeSource.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/ThreeFileBarcodeSource.scala index 794c661..a9067b9 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/ThreeFileBarcodeSource.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/ThreeFileBarcodeSource.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -17,17 +17,17 @@ final class ThreeFileBarcodeSource( columnPolicy: BarcodePolicy, umiPolicyOpt: Option[BarcodePolicy], readIdCheckPolicy: ReadIdCheckPolicy -) extends CloseableIterable[Barcodes] { +) extends CloseableIterable[Barcodes]: private[this] class BarcodeIterator( rowIterator: CloseableIterator[Read], revRowIterator: CloseableIterator[Read], colIterator: CloseableIterator[Read] - ) extends CloseableIterator[Barcodes] { + ) extends CloseableIterator[Barcodes]: final override def hasNext: Boolean = rowIterator.hasNext && revRowIterator.hasNext && colIterator.hasNext - final override def next(): Barcodes = { + final override def next(): Barcodes = val nextRow = rowIterator.next() val nextRevRow = revRowIterator.next() val nextCol = colIterator.next() @@ -38,7 +38,8 @@ final class ThreeFileBarcodeSource( val colBarcodeOpt = columnPolicy.find(nextCol) val umiBarcodeOpt = umiPolicyOpt.flatMap(_.find(nextRow)) Barcodes(rowBarcodeOpt, revRowBarcodeOpt, colBarcodeOpt, umiBarcodeOpt) - } + + end next final override def close(): Unit = try rowIterator.close() @@ -46,9 +47,9 @@ final class ThreeFileBarcodeSource( try revRowIterator.close() finally colIterator.close() - } + end BarcodeIterator override def iterator: CloseableIterator[Barcodes] = new BarcodeIterator(rowParser.iterator, revRowParser.iterator, colParser.iterator) -} +end ThreeFileBarcodeSource diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/TwoFileBarcodeSource.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/TwoFileBarcodeSource.scala index 7f5ef6b..b638263 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/TwoFileBarcodeSource.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/TwoFileBarcodeSource.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -15,14 +15,14 @@ final class TwoFileBarcodeSource( columnPolicy: BarcodePolicy, umiPolicyOpt: Option[BarcodePolicy], readIdCheckPolicy: ReadIdCheckPolicy -) extends CloseableIterable[Barcodes] { +) extends CloseableIterable[Barcodes]: private[this] class BarcodeIterator(rowIterator: CloseableIterator[Read], colIterator: CloseableIterator[Read]) - extends CloseableIterator[Barcodes] { + extends CloseableIterator[Barcodes]: final override def hasNext: Boolean = rowIterator.hasNext && colIterator.hasNext - final override def next(): Barcodes = { + final override def next(): Barcodes = val nextRow = rowIterator.next() val nextCol = colIterator.next() readIdCheckPolicy.check(nextRow, nextCol) @@ -30,15 +30,16 @@ final class TwoFileBarcodeSource( val colBarcodeOpt = columnPolicy.find(nextCol) val umiBarcodeOpt = umiPolicyOpt.flatMap(_.find(nextRow)) Barcodes(rowBarcodeOpt, None, colBarcodeOpt, umiBarcodeOpt) - } + + end next final override def close(): Unit = try rowIterator.close() finally colIterator.close() - } + end BarcodeIterator override def iterator: CloseableIterator[Barcodes] = new BarcodeIterator(rowParser.iterator, colParser.iterator) -} +end TwoFileBarcodeSource diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/barcode.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/barcode.scala new file mode 100644 index 0000000..8935e17 --- /dev/null +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/barcode.scala @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.broadinstitute.gpp.poolq3.barcode + +import java.nio.file.Path + +import scala.collection.mutable + +import org.broadinstitute.gpp.poolq3.parser.{ + CloseableIterable, + CloseableIterator, + DmuxedIterable, + FastqParser, + SamParser, + TextParser +} +import org.broadinstitute.gpp.poolq3.types.{BamType, FastqType, Read, ReadsFileType, SamType, TextType} +import org.broadinstitute.gpp.poolq3.{PoolQInput, ReadsSource} + +def barcodeSource( + config: PoolQInput, + rowBarcodePolicy: BarcodePolicy, + revRowBarcodePolicyOpt: Option[BarcodePolicy], + colBarcodePolicyOpt: Either[Int, BarcodePolicy], + umiBarcodePolicyOpt: Option[BarcodePolicy] +): CloseableIterable[Barcodes] = + (config.readsSource, revRowBarcodePolicyOpt, colBarcodePolicyOpt) match + case (ReadsSource.Split(index, forward), None, Right(colBarcodePolicy)) => + new TwoFileBarcodeSource( + parserFor(forward.toList), + parserFor(index.toList), + rowBarcodePolicy, + colBarcodePolicy, + umiBarcodePolicyOpt, + config.readIdCheckPolicy + ) + case (ReadsSource.PairedEnd(index, forward, reverse), Some(revRowBarcodePolicy), Right(colBarcodePolicy)) => + new ThreeFileBarcodeSource( + parserFor(forward.toList), + parserFor(reverse.toList), + parserFor(index.toList), + rowBarcodePolicy, + revRowBarcodePolicy, + colBarcodePolicy, + umiBarcodePolicyOpt, + config.readIdCheckPolicy + ) + case (ReadsSource.SelfContained(paths), None, Right(colBarcodePolicy)) => + new SingleFileBarcodeSource(parserFor(paths.toList), rowBarcodePolicy, colBarcodePolicy, umiBarcodePolicyOpt) + case (ReadsSource.Dmuxed(read1), _, Left(colBarcodeLength)) => + new DmuxedBarcodeSource( + DmuxedIterable(read1.toList, parserFor(_).iterator), + rowBarcodePolicy, + umiBarcodePolicyOpt, + colBarcodeLength + ) + case (ReadsSource.DmuxedPairedEnd(read1, read2), Some(revRowBarcodePolicy), Left(colBarcodeLength)) => + new DmuxedPairedEndBarcodeSource( + DmuxedIterable(read1.toList, parserFor(_).iterator), + DmuxedIterable(read2.toList, parserFor(_).iterator), + rowBarcodePolicy, + revRowBarcodePolicy, + umiBarcodePolicyOpt, + config.readIdCheckPolicy, + colBarcodeLength + ) + case _ => + throw new IllegalArgumentException("Incompatible reads and barcode policy settings") + +def parserFor(file: Path): CloseableIterable[Read] = + ReadsFileType.fromFilename(file.getFileName.toString) match + case Some(FastqType) => new FastqParser(file) + case Some(SamType) | Some(BamType) => new SamParser(file) + case Some(TextType) => new TextParser(file) + case None => throw new IllegalArgumentException(s"File $file is of an unknown file type") + +def parserFor(files: List[Path]): CloseableIterable[Read] = + parserFor[Path, Read](files, p => parserFor(p).iterator) + +private[barcode] def parserFor[A, B](sources: List[A], mkIterator: A => CloseableIterator[B]): CloseableIterable[B] = + new CloseableIterable[B]: + + override def iterator: CloseableIterator[B] = new CloseableIterator[B]: + + private val queue: mutable.Queue[A] = mutable.Queue.from(sources) + + var current: CloseableIterator[B] = _ + + override def hasNext: Boolean = + var currentHasNext = if current == null then false else current.hasNext + while !currentHasNext && queue.nonEmpty do + val head = queue.dequeue() + if head != null then + val old = current + current = mkIterator(head) + if old != null then old.close() + currentHasNext = current.hasNext + currentHasNext + + end hasNext + + override def next(): B = if current == null then throw new NoSuchElementException else current.next() + + override def close(): Unit = Option(current).foreach(_.close()) diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/package.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/package.scala deleted file mode 100644 index eab96e6..0000000 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/barcode/package.scala +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ -package org.broadinstitute.gpp.poolq3 - -import java.nio.file.Path - -import scala.collection.mutable - -import org.broadinstitute.gpp.poolq3.ReadsSource -import org.broadinstitute.gpp.poolq3.parser.{ - CloseableIterable, - CloseableIterator, - DmuxedIterable, - FastqParser, - SamParser, - TextParser -} -import org.broadinstitute.gpp.poolq3.types.{BamType, FastqType, Read, ReadsFileType, SamType, TextType} - -package object barcode { - - def barcodeSource( - config: PoolQInput, - rowBarcodePolicy: BarcodePolicy, - revRowBarcodePolicyOpt: Option[BarcodePolicy], - colBarcodePolicyOpt: Either[Int, BarcodePolicy], - umiBarcodePolicyOpt: Option[BarcodePolicy] - ): CloseableIterable[Barcodes] = - (config.readsSource, revRowBarcodePolicyOpt, colBarcodePolicyOpt) match { - case (ReadsSource.Split(index, forward), None, Right(colBarcodePolicy)) => - new TwoFileBarcodeSource( - parserFor(forward.toList), - parserFor(index.toList), - rowBarcodePolicy, - colBarcodePolicy, - umiBarcodePolicyOpt, - config.readIdCheckPolicy - ) - case (ReadsSource.PairedEnd(index, forward, reverse), Some(revRowBarcodePolicy), Right(colBarcodePolicy)) => - new ThreeFileBarcodeSource( - parserFor(forward.toList), - parserFor(reverse.toList), - parserFor(index.toList), - rowBarcodePolicy, - revRowBarcodePolicy, - colBarcodePolicy, - umiBarcodePolicyOpt, - config.readIdCheckPolicy - ) - case (ReadsSource.SelfContained(paths), None, Right(colBarcodePolicy)) => - new SingleFileBarcodeSource(parserFor(paths.toList), rowBarcodePolicy, colBarcodePolicy, umiBarcodePolicyOpt) - case (ReadsSource.Dmuxed(read1), _, Left(colBarcodeLength)) => - new DmuxedBarcodeSource( - DmuxedIterable(read1.toList, parserFor(_).iterator), - rowBarcodePolicy, - umiBarcodePolicyOpt, - colBarcodeLength - ) - case (ReadsSource.DmuxedPairedEnd(read1, read2), Some(revRowBarcodePolicy), Left(colBarcodeLength)) => - new DmuxedPairedEndBarcodeSource( - DmuxedIterable(read1.toList, parserFor(_).iterator), - DmuxedIterable(read2.toList, parserFor(_).iterator), - rowBarcodePolicy, - revRowBarcodePolicy, - umiBarcodePolicyOpt, - config.readIdCheckPolicy, - colBarcodeLength - ) - case _ => - throw new IllegalArgumentException("Incompatible reads and barcode policy settings") - } - - def parserFor(file: Path): CloseableIterable[Read] = - ReadsFileType.fromFilename(file.getFileName.toString) match { - case Some(FastqType) => new FastqParser(file) - case Some(SamType) | Some(BamType) => new SamParser(file) - case Some(TextType) => new TextParser(file) - case None => throw new IllegalArgumentException(s"File $file is of an unknown file type") - } - - def parserFor(files: List[Path]): CloseableIterable[Read] = - parserFor[Path, Read](files, p => parserFor(p).iterator) - - private[barcode] def parserFor[A, B](sources: List[A], mkIterator: A => CloseableIterator[B]): CloseableIterable[B] = - new CloseableIterable[B] { - - override def iterator: CloseableIterator[B] = new CloseableIterator[B] { - - private val queue: mutable.Queue[A] = mutable.Queue.from(sources) - - var current: CloseableIterator[B] = _ - - override def hasNext: Boolean = { - var currentHasNext = if (current == null) false else current.hasNext - while (!currentHasNext && queue.nonEmpty) { - val head = queue.dequeue() - if (head != null) { - val old = current - current = mkIterator(head) - if (old != null) { - old.close() - } - currentHasNext = current.hasNext - } - } - currentHasNext - } - - override def next(): B = if (current == null) throw new NoSuchElementException else current.next() - - override def close(): Unit = Option(current).foreach(_.close()) - - } - - } - -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/collection/collection.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/collection/collection.scala new file mode 100644 index 0000000..adefefc --- /dev/null +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/collection/collection.scala @@ -0,0 +1,9 @@ +/* + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.broadinstitute.gpp.poolq3.collection + +implicit class ZipWithIndex1[A](private val t: Iterator[A]) extends AnyVal: + def zipWithIndex1: Iterator[(A, Int)] = t.zipWithIndex.map { case (x, i) => (x, i + 1) } diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/collection/package.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/collection/package.scala deleted file mode 100644 index 85d7686..0000000 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/collection/package.scala +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ -package org.broadinstitute.gpp.poolq3 - -package object collection { - - implicit class ZipWithIndex1[A](private val t: Iterator[A]) extends AnyVal { - def zipWithIndex1: Iterator[(A, Int)] = t.zipWithIndex.map { case (x, i) => (x, i + 1) } - } - -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/hist/Histogram.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/hist/Histogram.scala index dc4f054..1bde1b8 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/hist/Histogram.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/hist/Histogram.scala @@ -1,11 +1,11 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.hist -trait ReadOnlyHistogram[A] { +trait ReadOnlyHistogram[A]: /** Returns the number of occurrences of key `k` */ def count(k: A): Int @@ -15,12 +15,10 @@ trait ReadOnlyHistogram[A] { def toMap: scala.collection.immutable.Map[A, Int] = keys.map(k => k -> count(k)).toMap -} +end ReadOnlyHistogram /** Simple representation of a mutable histogram */ -trait Histogram[A] extends ReadOnlyHistogram[A] { +trait Histogram[A] extends ReadOnlyHistogram[A]: /** Increment the occurrences of key `k` */ def increment(k: A): Int - -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/hist/OpenHashMapHistogram.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/hist/OpenHashMapHistogram.scala index d86d1c7..f821934 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/hist/OpenHashMapHistogram.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/hist/OpenHashMapHistogram.scala @@ -1,15 +1,15 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.hist -import scala.jdk.CollectionConverters._ +import scala.jdk.CollectionConverters.* import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap -class OpenHashMapHistogram[A] extends Histogram[A] { +class OpenHashMapHistogram[A] extends Histogram[A]: private[this] val hist: Object2IntOpenHashMap[A] = new Object2IntOpenHashMap[A]() hist.defaultReturnValue(0) @@ -23,4 +23,4 @@ class OpenHashMapHistogram[A] extends Histogram[A] { /** Returns the keys tracked in this histogram */ override def keys: Set[A] = hist.keySet().asScala.toSet -} +end OpenHashMapHistogram diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/hist/ShardedHistogram.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/hist/ShardedHistogram.scala index d1460c6..1a459f9 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/hist/ShardedHistogram.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/hist/ShardedHistogram.scala @@ -1,16 +1,16 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.hist import scala.collection.mutable -import scala.jdk.CollectionConverters._ +import scala.jdk.CollectionConverters.* import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap -trait ShardedHistogram[A, B] extends ReadOnlyHistogram[B] { +trait ShardedHistogram[A, B] extends ReadOnlyHistogram[B]: def shards: Set[A] @@ -22,25 +22,23 @@ trait ShardedHistogram[A, B] extends ReadOnlyHistogram[B] { override def keys: Set[B] - override def count(k: B): Int = { + override def count(k: B): Int = var ret = 0 shards.foreach(s => ret += forShard(Some(s)).count(k)) ret += forShard(None).count(k) ret - } -} +end ShardedHistogram -class BasicShardedHistogram[A, B](make: => Histogram[B]) extends ShardedHistogram[A, B] { +class BasicShardedHistogram[A, B](make: => Histogram[B]) extends ShardedHistogram[A, B]: val hs: Object2ObjectOpenHashMap[A, Histogram[B]] = new Object2ObjectOpenHashMap[A, Histogram[B]]() val nullShard: Histogram[B] = make override def forShard(shard: Option[A]): Histogram[B] = - shard match { + shard match case None => nullShard - case Some(s) => hs.compute(s, (_, h) => if (h == null) make else h) - } + case Some(s) => hs.compute(s, (_, h) => if h == null then make else h) override def increment(shard: Option[A], k: B): Int = forShard(shard).increment(k) @@ -48,11 +46,10 @@ class BasicShardedHistogram[A, B](make: => Histogram[B]) extends ShardedHistogra override def keys(shard: Option[A]): Set[B] = forShard(shard).keys - override def keys: Set[B] = { + override def keys: Set[B] = val keys: mutable.Set[B] = new mutable.HashSet keys.addAll(nullShard.keys) hs.forEach((_, h) => keys.addAll(h.keys)) keys.toSet - } -} +end BasicShardedHistogram diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/hist/TupleHistogram.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/hist/TupleHistogram.scala index eea9fcb..132e622 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/hist/TupleHistogram.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/hist/TupleHistogram.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,33 +9,32 @@ import scala.collection.mutable import it.unimi.dsi.fastutil.objects.{Object2IntOpenHashMap, Object2ObjectOpenHashMap} -class TupleHistogram[A] extends Histogram[(A, A)] { +class TupleHistogram[A] extends Histogram[(A, A)]: private[this] val hist: Object2ObjectOpenHashMap[A, Object2IntOpenHashMap[A]] = new Object2ObjectOpenHashMap() /** Increment the occurrences of key `k` */ - override def increment(k: (A, A)): Int = { + override def increment(k: (A, A)): Int = val (fst, snd) = k val sndMap = hist.get(fst) - if (sndMap != null) sndMap.addTo(snd, 1) - else { + if sndMap != null then sndMap.addTo(snd, 1) + else val newSndMap = new Object2IntOpenHashMap[A]() newSndMap.put(snd, 1) hist.put(fst, newSndMap) 1 - } - } + + end increment /** Returns the number of occurrences of key `k` */ - override def count(k: (A, A)): Int = { + override def count(k: (A, A)): Int = val (fst, snd) = k val sndMap = hist.get(fst) - if (sndMap == null) 0 + if sndMap == null then 0 else sndMap.getOrDefault(snd, 0) - } /** Returns the keys tracked in this histogram */ - override def keys: Set[(A, A)] = { + override def keys: Set[(A, A)] = val s: mutable.Set[(A, A)] = new mutable.HashSet() hist.forEach { (fst, snds) => @@ -44,6 +43,7 @@ class TupleHistogram[A] extends Histogram[(A, A)] { } } s.toSet - } -} + end keys + +end TupleHistogram diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/numeric/numeric.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/numeric/numeric.scala new file mode 100644 index 0000000..843e0fc --- /dev/null +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/numeric/numeric.scala @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.broadinstitute.gpp.poolq3.numeric + +import java.text.DecimalFormat + +val Decimal000Format: DecimalFormat = new DecimalFormat("0.000") + +val Decimal00Format: DecimalFormat = new DecimalFormat("0.00") + +val Log2: Double = math.log(2) + +val OneMillion: Double = 1000000.0 + +def percent(num: Int, denom: Int): Double = if denom == 0 then 0.0 else num * 100.0 / denom + +def log2(x: Double): Double = math.log(x) / Log2 + +def logNormalize(num: Int, denom: Int): Double = + if denom == 0 then 0 + else math.log1p(num * OneMillion / denom) / Log2 diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/numeric/package.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/numeric/package.scala deleted file mode 100644 index 5e2970c..0000000 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/numeric/package.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ -package org.broadinstitute.gpp.poolq3 - -import java.text.DecimalFormat - -package object numeric { - - val Decimal000Format: DecimalFormat = new DecimalFormat("0.000") - - val Decimal00Format: DecimalFormat = new DecimalFormat("0.00") - - val Log2: Double = math.log(2) - - val OneMillion: Double = 1000000.0 - - def percent(num: Int, denom: Int): Double = if (denom == 0) 0.0 else num * 100.0 / denom - - def log2(x: Double): Double = math.log(x) / Log2 - - def logNormalize(num: Int, denom: Int): Double = - if (denom == 0) 0 - else math.log1p(num * OneMillion / denom) / Log2 - -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/BarcodeSet.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/BarcodeSet.scala index f000f31..d5345e1 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/BarcodeSet.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/BarcodeSet.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,30 +9,27 @@ import java.io.{BufferedReader, FileInputStream, InputStreamReader} import java.nio.file.Path import java.util.stream.Collectors -import scala.jdk.CollectionConverters._ +import scala.jdk.CollectionConverters.* import scala.util.{Failure, Success, Try, Using} -import cats.syntax.all._ +import cats.syntax.all.* import org.apache.commons.io.ByteOrderMark import org.apache.commons.io.input.BOMInputStream -class BarcodeSet(val barcodes: Set[String]) { +class BarcodeSet(val barcodes: Set[String]): def barcodeLength: Int = barcodes.head.length def isDefined(s: String): Boolean = barcodes.contains(s) -} - -object BarcodeSet { +object BarcodeSet: private[this] val BarcodeRe = s"""([ACGT]+)""".r def parseBarcode(makeException: String => Exception)(line: String): Try[String] = - line match { + line match case BarcodeRe(bc) => Success(bc) case _ => Failure(makeException(line)) - } def apply(file: Path): BarcodeSet = Using.resource(new FileInputStream(file.toFile)) { fin => @@ -56,14 +53,12 @@ object BarcodeSet { } def checkSet(file: Path, barcodeSet: BarcodeSet): Try[Unit] = - if (barcodeSet.barcodes.isEmpty) Failure(InvalidFileException(file, s"Empty barcode file")) - else { + if barcodeSet.barcodes.isEmpty then Failure(InvalidFileException(file, s"Empty barcode file")) + else val expectedLength = barcodeSet.barcodeLength - barcodeSet.barcodes.find(_.length != expectedLength) match { + barcodeSet.barcodes.find(_.length != expectedLength) match case None => Success(()) case Some(b) => Failure(InvalidFileException(file, s"Barcode '$b' did not match expected length $expectedLength")) - } - } -} +end BarcodeSet diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/CloseableIterable.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/CloseableIterable.scala index 91c78d7..e459201 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/CloseableIterable.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/CloseableIterable.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -12,26 +12,22 @@ import scala.collection.mutable import org.broadinstitute.gpp.poolq3.barcode.FoundBarcode import org.broadinstitute.gpp.poolq3.types.Read -abstract class CloseableIterable[A] extends Iterable[A] { +abstract class CloseableIterable[A] extends Iterable[A]: override def iterator: CloseableIterator[A] -} -object CloseableIterable { +object CloseableIterable: - def ofList[A](list: List[A]): CloseableIterable[A] = new CloseableIterable[A] { + def ofList[A](list: List[A]): CloseableIterable[A] = new CloseableIterable[A]: - override def iterator: CloseableIterator[A] = new CloseableIterator[A] { + override def iterator: CloseableIterator[A] = new CloseableIterator[A]: val underlying: Iterator[A] = list.iterator override def close(): Unit = {} override def hasNext: Boolean = underlying.hasNext override def next(): A = underlying.next() - } - - } -} +end CloseableIterable -abstract class DmuxedIterable extends CloseableIterable[Read] { +abstract class DmuxedIterable extends CloseableIterable[Read]: /** `Some(barcode)` or else `None` if unmatched */ // hack: this is sort of an encapsulation violation because ordinarily the @@ -39,25 +35,25 @@ abstract class DmuxedIterable extends CloseableIterable[Read] { // case inherently crosses those lines and defining this here avoids recomputing // the same value potentially millions of times in a row def indexBarcode: Option[FoundBarcode] -} -object DmuxedIterable { +end DmuxedIterable + +object DmuxedIterable: def apply(iterable: Iterable[(Option[String], Path)], parserFor: Path => CloseableIterator[Read]): DmuxedIterable = new DmuxedIterableImpl(iterable, parserFor) - def apply(data: List[(Option[String], List[String])]): DmuxedIterable = { + def apply(data: List[(Option[String], List[String])]): DmuxedIterable = val data2: List[(Option[String], List[Read])] = data.map { case (bco, seqs) => (bco, seqs.zipWithIndex.map { case (seq, i) => Read(i.toString, seq) }) } DmuxedIterable.forReads(data2) - } def forReads(data: List[(Option[String], List[Read])]): DmuxedIterable = new DmuxedIterableImpl(data, CloseableIterator.ofList) private class DmuxedIterableImpl[A](src: Iterable[(Option[String], A)], makeIterator: A => CloseableIterator[Read]) - extends DmuxedIterable { + extends DmuxedIterable: private val queue: mutable.Queue[(Option[String], A)] = mutable.Queue.from(src) @@ -65,35 +61,30 @@ object DmuxedIterable { var indexBarcode: Option[FoundBarcode] = _ - override def iterator: CloseableIterator[Read] = new CloseableIterator[Read] { + override def iterator: CloseableIterator[Read] = new CloseableIterator[Read]: - override def hasNext: Boolean = { - var currentHasNext = if (current == null) false else current.hasNext - while (!currentHasNext && queue.nonEmpty) { + override def hasNext: Boolean = + var currentHasNext = if current == null then false else current.hasNext + while !currentHasNext && queue.nonEmpty do val head = queue.dequeue() - if (head != null) { + if head != null then val old = current indexBarcode = head._1.map(bc => FoundBarcode(bc.toCharArray, 0)) current = makeIterator(head._2) - if (old != null) { - old.close() - } + if old != null then old.close() currentHasNext = current.hasNext - } - } + end while currentHasNext - } + + end hasNext override def next(): Read = - if (current == null) throw new NoSuchElementException + if current == null then throw new NoSuchElementException else current.next() - override def close(): Unit = { + override def close(): Unit = Option(current).foreach(_.close()) - } - - } - } + end DmuxedIterableImpl -} +end DmuxedIterable diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/CloseableIterator.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/CloseableIterator.scala index 0d6dad8..988c37a 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/CloseableIterator.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/CloseableIterator.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,14 +9,13 @@ import java.io.Closeable abstract class CloseableIterator[A] extends Iterator[A] with Closeable -object CloseableIterator { +object CloseableIterator: /** A convenience implementation used for testing */ - def ofList[A](xs: List[A]): CloseableIterator[A] = new CloseableIterator[A] { + def ofList[A](xs: List[A]): CloseableIterator[A] = new CloseableIterator[A]: val iter = xs.iterator override def close(): Unit = () override def hasNext: Boolean = iter.hasNext override def next(): A = iter.next() - } -} +end CloseableIterator diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/ConflictingBarcodeException.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/ConflictingBarcodeException.scala index 1be8d86..c1ec9ba 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/ConflictingBarcodeException.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/ConflictingBarcodeException.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/FastqParser.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/FastqParser.scala index 52b3bd6..9e38b80 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/FastqParser.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/FastqParser.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,46 +10,44 @@ import java.nio.file.Path import org.broadinstitute.gpp.poolq3.types.Read -final class FastqParser(file: Path) extends CloseableIterable[Read] { +final class FastqParser(file: Path) extends CloseableIterable[Read]: /** Wraps the underlying `InputStream`, taking ownership of it. Thus, closing this iterator closes the stream. */ - private[parser] class FastqIterator(is: InputStream) extends CloseableIterator[Read] { + private[parser] class FastqIterator(is: InputStream) extends CloseableIterator[Read]: private[this] val reader = new BufferedReader(new InputStreamReader(is)) private[this] var line = reader.readLine() - final private[this] def nextLine(): String = { + final private[this] def nextLine(): String = val ret = line line = reader.readLine() ret - } // for details on the FASTQ format, see https://maq.sourceforge.net/fastq.shtml - final override def next(): Read = { + final override def next(): Read = // get the next record and make sure it's complete val line0 = nextLine() val line1 = nextLine() val line2 = nextLine() val line3 = nextLine() - if (line0.charAt(0) != '@') { + if line0.charAt(0) != '@' then throw InvalidFileException(file, "Corrupt or incorrect FASTQ: field 1 must begin with '@'") - } - if (line2.charAt(0) != '+') { + if line2.charAt(0) != '+' then throw InvalidFileException(file, "Corrupt or incorrect FASTQ: field 3 must begin with '+'") - } - if (line3 == null) throw InvalidFileException(file, "File contains an incomplete FASTQ read") + if line3 == null then throw InvalidFileException(file, "File contains an incomplete FASTQ read") else Read(line0, line1) - } + + end next final override def hasNext: Boolean = line != null final override def close(): Unit = reader.close() - } + end FastqIterator override def iterator: CloseableIterator[Read] = new FastqIterator(inputStream(file)) -} +end FastqParser diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/InvalidFileException.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/InvalidFileException.scala index de61835..5f43076 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/InvalidFileException.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/InvalidFileException.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,6 +10,5 @@ import java.nio.file.Path final case class InvalidFileException(file: Path, msg: Option[String] = None) extends RuntimeException(s"$file was invalid" + msg.fold("")(txt => s": $txt")) -object InvalidFileException { +object InvalidFileException: def apply(file: Path, msg: String): InvalidFileException = new InvalidFileException(file, Option(msg)) -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/InvalidReadException.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/InvalidReadException.scala index ed3bb60..68e7116 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/InvalidReadException.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/InvalidReadException.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceData.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceData.scala index 7244988..9b1ded7 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceData.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceData.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,14 +10,14 @@ import java.nio.file.Path import scala.util.Using -import com.github.tototoshi.csv._ +import com.github.tototoshi.csv.* import org.apache.commons.io.ByteOrderMark import org.apache.commons.io.input.BOMInputStream import org.broadinstitute.gpp.poolq3.reports.{GctDialect, PoolQ2Dialect, ReportsDialect} import org.broadinstitute.gpp.poolq3.seq.isReferenceBarcode import org.log4s.{Logger, getLogger} -class ReferenceData(val mappings: Seq[ReferenceEntry]) { +class ReferenceData(val mappings: Seq[ReferenceEntry]): require(mappings.nonEmpty, "Reference data may not be empty") ReferenceData.checkLengths(mappings) @@ -25,26 +25,24 @@ class ReferenceData(val mappings: Seq[ReferenceEntry]) { def barcodeLengths: (Int, Int) = mappings.head.barcodeLengths - def forColumnBarcodes(dialect: ReportsDialect): ReferenceData = { + def forColumnBarcodes(dialect: ReportsDialect): ReferenceData = val columnBarcodeMappings = mappings.map { m => - if (m.referenceId.isEmpty) m.copy(referenceId = ReferenceData.unlabeled(dialect)) else m + if m.referenceId.isEmpty then m.copy(referenceId = ReferenceData.unlabeled(dialect)) else m } new ReferenceData(columnBarcodeMappings) - } -} +end ReferenceData -object ReferenceData { +object ReferenceData: private[this] val log: Logger = getLogger val UnlabeledSampleBarcodes = "Unlabeled Sample Barcodes" val UnlabeledColumnBarcodes = "Unlabeled Column Barcodes" - def unlabeled(dialect: ReportsDialect): String = dialect match { + def unlabeled(dialect: ReportsDialect): String = dialect match case PoolQ2Dialect | GctDialect => UnlabeledSampleBarcodes case _ => UnlabeledColumnBarcodes - } // this is complicated because it handles the case where the DNA barcode is quoted // matches `"[ACGTacgt:;-]+ *"` or `[ACGTacgt:;-]+ *`, @@ -53,21 +51,21 @@ object ReferenceData { private[this] val DelimiterRegex = """^(?:[^,\t]+)([\t,]).+$""".r - private[parser] def guessDelimiter(br: BufferedReader): Char = { + private[parser] def guessDelimiter(br: BufferedReader): Char = br.mark(1024) val iter = br.lines().iterator() val ret = - if (iter.hasNext) { - iter.next() match { + if iter.hasNext then + iter.next() match case DelimiterRegex(d) => d.head case _ => ',' - } - } else ',' + else ',' br.reset() ret - } - def apply(file: Path, quote: Char = '"'): ReferenceData = { + end guessDelimiter + + def apply(file: Path, quote: Char = '"'): ReferenceData = Using.resource(new FileInputStream(file.toFile)) { fin => val in = BOMInputStream .builder() @@ -77,14 +75,13 @@ object ReferenceData { .get() val br = new BufferedReader(new InputStreamReader(in)) val guessedDelimiter = guessDelimiter(br) - implicit object CSVFormat extends DefaultCSVFormat { + implicit object CSVFormat extends DefaultCSVFormat: override val delimiter = guessedDelimiter override val quoteChar: Char = quote - } skipHeader(br, LineRegex) val rows = CSVReader.open(br).all() val barcodes = rows.map { case xs => - xs match { + xs match case barcodeRaw :: idRaw :: _ => // if the CSV parser leaves spaces, we should remove them val barcode = barcodeRaw.trim() @@ -94,29 +91,25 @@ object ReferenceData { // DNA string, we must accept the row. However, sometimes Excel leaves empty lines in exported CSV; as // long as *both* the barcode and ID are empty, it's safe to just skip the row. For now we'll be paranoid // and reject cases where the barcode is empty but the ID is non-empty - if (barcode.isEmpty && id.isEmpty) None - else if (isReferenceBarcode(barcode)) Some(ReferenceEntry(barcode, id)) + if barcode.isEmpty && id.isEmpty then None + else if isReferenceBarcode(barcode) then Some(ReferenceEntry(barcode, id)) else throw InvalidFileException(file, s"Invalid DNA barcode '$barcode' for ID '$id'") case _ => throw InvalidFileException( file, s"Incorrect number of columns. At least 2 required, got: ${xs.length}: $xs" ) - } } - if (barcodes.isEmpty) { - throw InvalidFileException(file, "Empty reference file") - } + if barcodes.isEmpty then throw InvalidFileException(file, "Empty reference file") new ReferenceData(barcodes.flatten) } - } - private[parser] def checkLengths(mappings: Seq[ReferenceEntry]): Unit = { + private[parser] def checkLengths(mappings: Seq[ReferenceEntry]): Unit = val barcodesByLength = mappings.groupBy(_.barcodeLength) - if (barcodesByLength.keySet.size == 1) () - else { + if barcodesByLength.keySet.size == 1 then () + else // grab the first thing in each size grouping val examples = barcodesByLength.toSeq.flatMap { case (length, barcodes) => barcodes.headOption.map(bc => length -> bc) @@ -131,9 +124,11 @@ object ReferenceData { // log the problem and throw log.error(s"Examples: ${sortedExamples.mkString(", ")}") throw new IllegalArgumentException(s"Input barcodes must all be of the same length") - } - } + + end if + + end checkLengths def truncator(newLength: Int): String => String = s => s.substring(0, newLength) -} +end ReferenceData diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceEntry.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceEntry.scala index ffc6d9a..664b42c 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceEntry.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceEntry.scala @@ -1,21 +1,17 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.parser -case class ReferenceEntry(referenceBarcode: String, referenceId: String) { +case class ReferenceEntry(referenceBarcode: String, referenceId: String): val dnaBarcode: String = referenceBarcode.replaceAll("[:;-]", "").toUpperCase def barcodeLength: Int = dnaBarcode.length - def barcodeLengths: (Int, Int) = { + def barcodeLengths: (Int, Int) = val split = referenceBarcode.indexWhere(Set(';', ':', '-')) - if (split > 0) { - (split, barcodeLength - split) - } else { - (barcodeLength, 0) - } - } + if split > 0 then (split, barcodeLength - split) + else (barcodeLength, 0) -} +end ReferenceEntry diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/SamParser.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/SamParser.scala index 54032d7..15f803b 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/SamParser.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/SamParser.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -11,30 +11,29 @@ import htsjdk.samtools.SamReaderFactory import org.broadinstitute.gpp.poolq3.seq.reverseComplement import org.broadinstitute.gpp.poolq3.types.Read -final class SamParser(file: Path) extends CloseableIterable[Read] { +final class SamParser(file: Path) extends CloseableIterable[Read]: private[this] val readerFactory: SamReaderFactory = SamReaderFactory.makeDefault() - private[this] class SamIterator extends CloseableIterator[Read] { + private[this] class SamIterator extends CloseableIterator[Read]: private[this] val samReader = readerFactory.open(file.toFile) private[this] val samIterator = samReader.iterator() - final override def close(): Unit = { + final override def close(): Unit = samIterator.close() samReader.close() - } - final override def next(): Read = { + final override def next(): Read = val samRecord = samIterator.next() val readSequence = - if (samRecord.getReadNegativeStrandFlag) reverseComplement(samRecord.getReadString) + if samRecord.getReadNegativeStrandFlag then reverseComplement(samRecord.getReadString) else samRecord.getReadString Read(samRecord.getReadName, readSequence) - } final override def hasNext: Boolean = samIterator.hasNext - } + + end SamIterator override def iterator: CloseableIterator[Read] = new SamIterator() -} +end SamParser diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/TextParser.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/TextParser.scala index 9c446cc..ac3936f 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/TextParser.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/TextParser.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,27 +10,27 @@ import java.nio.file.Path import org.broadinstitute.gpp.poolq3.types.Read -class TextParser(file: Path) extends CloseableIterable[Read] { +class TextParser(file: Path) extends CloseableIterable[Read]: - private[parser] class TextIterator(is: InputStream) extends CloseableIterator[Read] { + private[parser] class TextIterator(is: InputStream) extends CloseableIterator[Read]: private[this] val reader = new BufferedReader(new InputStreamReader(is)) private[this] var lineNo: Int = 1 private[this] var line = reader.readLine() final override def next(): Read = - if (line == null) throw new NoSuchElementException - else { + if line == null then throw new NoSuchElementException + else val ret = Read(s"Line $lineNo", line) line = reader.readLine() lineNo += 1 ret - } final override def hasNext: Boolean = line != null final override def close(): Unit = reader.close() - } + + end TextIterator override def iterator: CloseableIterator[Read] = new TextIterator(inputStream(file)) -} +end TextParser diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/package.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/package.scala deleted file mode 100644 index e62ecab..0000000 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/package.scala +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ -package org.broadinstitute.gpp.poolq3 - -import java.io.{BufferedInputStream, BufferedReader, FileInputStream, InputStream} -import java.nio.file.Path -import java.util.zip.GZIPInputStream - -import scala.util.matching.Regex - -package object parser { - - /** Attempts to guess whether a file is gzipped */ - def isGzipped(file: Path): Boolean = file.toFile.getName.toLowerCase.endsWith(".gz") - - /** Returns an appropriate `InputStream` for the file; if the file appears to be gzipped, it will return a - * GZIPInputStream that decompresses the data on the fly. - */ - def inputStream(file: Path): InputStream = { - val rawStream = new FileInputStream(file.toFile) - val bufferedStream = new BufferedInputStream(rawStream) - if (isGzipped(file)) new GZIPInputStream(bufferedStream, 8192) - else bufferedStream - } - - private[parser] def skipHeader(br: BufferedReader, re: Regex): Unit = { - br.mark(1024) - val line = br.readLine() - line match { - case re(_) => br.reset() - case _ => // do nothing - } - } - -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/parser/parser.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/parser.scala new file mode 100644 index 0000000..f8b4a89 --- /dev/null +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/parser/parser.scala @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.broadinstitute.gpp.poolq3.parser + +import java.io.{BufferedInputStream, BufferedReader, FileInputStream, InputStream} +import java.nio.file.Path +import java.util.zip.GZIPInputStream + +import scala.util.matching.Regex + +/** Attempts to guess whether a file is gzipped */ +def isGzipped(file: Path): Boolean = file.toFile.getName.toLowerCase.endsWith(".gz") + +/** Returns an appropriate `InputStream` for the file; if the file appears to be gzipped, it will return a + * GZIPInputStream that decompresses the data on the fly. + */ +def inputStream(file: Path): InputStream = + val rawStream = new FileInputStream(file.toFile) + val bufferedStream = new BufferedInputStream(rawStream) + if isGzipped(file) then new GZIPInputStream(bufferedStream, 8192) + else bufferedStream + +private[parser] def skipHeader(br: BufferedReader, re: Regex): Unit = + br.mark(1024) + val line = br.readLine() + line match + case re(_) => br.reset() + case _ => // do nothing diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/process/BarcodeStats.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/process/BarcodeStats.scala index efd3e26..45925ce 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/process/BarcodeStats.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/process/BarcodeStats.scala @@ -1,11 +1,11 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.process -class BarcodeStats { +class BarcodeStats: private var minPos: Int = Int.MaxValue private var maxPos: Int = -1 @@ -18,17 +18,16 @@ class BarcodeStats { def notFound(totalReads: Int): Int = totalReads - found def avg: Option[Double] = - if (found < 1) None + if found < 1 then None else Some(sum / found.toDouble) - def update(pos: Int): Unit = { + def update(pos: Int): Unit = found += 1 minPos = math.min(minPos, pos) maxPos = math.max(maxPos, pos) sum += pos - } - def minPosStr = if (min == Int.MaxValue) "N/A" else min.toString - def maxPosStr = if (min < 0) "N/A" else max.toString + def minPosStr = if min == Int.MaxValue then "N/A" else min.toString + def maxPosStr = if min < 0 then "N/A" else max.toString -} +end BarcodeStats diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/process/Consumer.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/process/Consumer.scala index 878fb48..d2576a9 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/process/Consumer.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/process/Consumer.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,7 +9,7 @@ import java.io.Closeable import org.broadinstitute.gpp.poolq3.barcode.Barcodes -trait Consumer extends Closeable { +trait Consumer extends Closeable: def start(): Unit @@ -25,4 +25,4 @@ trait Consumer extends Closeable { def state: State -} +end Consumer diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/process/NoOpConsumer.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/process/NoOpConsumer.scala index 414f77e..58e7365 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/process/NoOpConsumer.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/process/NoOpConsumer.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -8,15 +8,14 @@ package org.broadinstitute.gpp.poolq3.process import org.broadinstitute.gpp.poolq3.barcode.Barcodes import org.broadinstitute.gpp.poolq3.hist.{BasicShardedHistogram, OpenHashMapHistogram, TupleHistogram} -class NoOpConsumer extends Consumer { +class NoOpConsumer extends Consumer: var reads = 0 final override def start(): Unit = {} - final override def consume(parsedBarcode: Barcodes): Unit = { + final override def consume(parsedBarcode: Barcodes): Unit = reads += 1 - } override def readsProcessed: Int = reads @@ -34,4 +33,4 @@ class NoOpConsumer extends Consumer { new OpenHashMapHistogram() ) -} +end NoOpConsumer diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/process/PoolQProcess.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/process/PoolQProcess.scala index a2d7866..6bd9c8f 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/process/PoolQProcess.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/process/PoolQProcess.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -23,7 +23,7 @@ final class PoolQProcess( consumer: Consumer, queueSize: Int = 100, reportFrequency: Int = 5000000 -) { +): private[this] val log: Logger = getLogger @@ -31,42 +31,38 @@ final class PoolQProcess( @volatile private[this] var done = false - final private[this] class ConsumerThread extends Thread { + final private[this] class ConsumerThread extends Thread: - override def run(): Unit = { + override def run(): Unit = val t0 = System.currentTimeMillis() - def logProgress(n: Int): Unit = { + def logProgress(n: Int): Unit = val nd = consumer.readsProcessed.toFloat val dt = System.currentTimeMillis() - t0 val avg = nd / dt val pct = consumer.matchPercent log.info(s"Processed $n reads in $dt ms ($avg reads/ms). Match percent: $pct; queue size: ${queue.size()}") - } - while (!done || !queue.isEmpty) { // as long as we're not done OR there is still work in the queue - try { - Option(queue.poll(100, TimeUnit.MILLISECONDS)).foreach(next => consumer.consume(next)) - } catch { + while !done || !queue.isEmpty do // as long as we're not done OR there is still work in the queue + try Option(queue.poll(100, TimeUnit.MILLISECONDS)).foreach(next => consumer.consume(next)) + catch case _: InterruptedException => log.warn( s"Interrupted. Done = $done Processed ${consumer.readsProcessed} reads; queue has ${queue.size()} remaining" ) case NonFatal(e) => log.error(e)(s"Error processing read ${consumer.readsProcessed}") - } // update the log periodically val n = consumer.readsProcessed - if (n % reportFrequency == 0) { - logProgress(n) - } - } + if n % reportFrequency == 0 then logProgress(n) + end while logProgress(consumer.readsProcessed) - } - } + end run + + end ConsumerThread /** Runs the process in the calling thread and returns the final state */ - def run(): PoolQRunSummary = { + def run(): PoolQRunSummary = val consumerThread = new ConsumerThread consumerThread.setName("Consumer") @@ -87,6 +83,7 @@ final class PoolQProcess( consumer.close() PoolQRunSummary(consumer.readsProcessed, consumer.matchingReads, consumer.matchPercent, consumer.state) - } -} + end run + +end PoolQProcess diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/process/ScoringConsumer.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/process/ScoringConsumer.scala index 8c64a2a..5f281a0 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/process/ScoringConsumer.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/process/ScoringConsumer.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -11,7 +11,7 @@ import org.broadinstitute.gpp.poolq3.barcode.{Barcodes, FoundBarcode} import org.broadinstitute.gpp.poolq3.hist.{BasicShardedHistogram, OpenHashMapHistogram, TupleHistogram} import org.broadinstitute.gpp.poolq3.parser.BarcodeSet import org.broadinstitute.gpp.poolq3.reference.{MatchedBarcode, Reference} -import org.broadinstitute.gpp.poolq3.seq._ +import org.broadinstitute.gpp.poolq3.seq.* import org.log4s.{Logger, getLogger} final class ScoringConsumer( @@ -22,7 +22,7 @@ final class ScoringConsumer( umiReference: Option[BarcodeSet], unexpectedSequenceTrackerOpt: Option[UnexpectedSequenceTracker], pairedEndMode: Boolean -) extends Consumer { +) extends Consumer: private[this] val log: Logger = getLogger @@ -42,27 +42,23 @@ final class ScoringConsumer( ) // this thread is used to write unexpected sequences to the file cache - private[this] val unexpectedSequenceTrackerThread: Thread = new Thread { + private[this] val unexpectedSequenceTrackerThread: Thread = new Thread: - final override def run(): Unit = { + final override def run(): Unit = assert(unexpectedSequenceTrackerOpt.isDefined) val unexpectedSequenceTracker = unexpectedSequenceTrackerOpt.get - while (!done || !unexpectedSequenceQueue.isEmpty) { - try { + while !done || !unexpectedSequenceQueue.isEmpty do + try Option(unexpectedSequenceQueue.poll(100, TimeUnit.MILLISECONDS)) .foreach(unexpectedSequenceTracker.reportUnexpected) - } catch { + catch case _: InterruptedException => log.debug(s"Interrupted. Done = $done; queue length = ${unexpectedSequenceQueue.size()}") - } - } - } - } + end run - override def start(): Unit = { + override def start(): Unit = unexpectedSequenceTrackerOpt.foreach(_ => unexpectedSequenceTrackerThread.start()) - } override def close(): Unit = unexpectedSequenceTrackerOpt.foreach { _ => @@ -71,18 +67,18 @@ final class ScoringConsumer( unexpectedSequenceTrackerOpt.foreach(_.close()) } - override def consume(parsedBarcode: Barcodes): Unit = { + override def consume(parsedBarcode: Barcodes): Unit = // increment the read counter regardless state.reads += 1 - (parsedBarcode.row, parsedBarcode.revRow, parsedBarcode.col) match { + (parsedBarcode.row, parsedBarcode.revRow, parsedBarcode.col) match case (f @ Some(_), revRowOpt, None) => // a forward row barcode region was found; extract the sequence and update stats - updateRowBarcodePositionStats(f, if (pairedEndMode) revRowOpt else None) + updateRowBarcodePositionStats(f, if pairedEndMode then revRowOpt else None) case (f @ Some(parsedRow), None, Some(parsedCol)) => updateRowBarcodePositionStats(f, None) - if (pairedEndMode == false) { + if pairedEndMode == false then // a row barcode region was found; extract the sequence and update stats // match the sequence against the row reference to determine if this was a known barcode @@ -99,12 +95,9 @@ final class ScoringConsumer( // if we are tracking unexpected sequences and we matched the column barcode to the reference data but didn't // match the row barcode to the reference data, and the row barcode doesn't have an N in it, then queue the // row barcode for inclusion in the unexpected sequence report - if ( - unexpectedSequenceTrackerOpt.isDefined && colBc.nonEmpty && rowBc.isEmpty && !containsN(parsedRow.barcode) - ) { - unexpectedSequenceQueue.put((parsedRow.barcode, parsedCol.barcode)) - } - } + if unexpectedSequenceTrackerOpt.isDefined && colBc.nonEmpty && rowBc.isEmpty && !containsN(parsedRow.barcode) + then unexpectedSequenceQueue.put((parsedRow.barcode, parsedCol.barcode)) + end if case (f @ Some(parsedRow), r @ Some(parsedRevRow), Some(parsedCol)) => // a row barcode region was found; extract the sequence and update stats @@ -126,11 +119,8 @@ final class ScoringConsumer( // if we are tracking unexpected sequences and we matched the column barcode to the reference data but didn't // match the row barcode to the reference data, and the row barcode doesn't have an N in it, then queue the // row barcode for inclusion in the unexpected sequence report - if ( - unexpectedSequenceTrackerOpt.isDefined && colBc.nonEmpty && rowBc.isEmpty && !containsN(parsedRow.barcode) - ) { - unexpectedSequenceQueue.put((parsedRow.barcode, parsedCol.barcode)) - } + if unexpectedSequenceTrackerOpt.isDefined && colBc.nonEmpty && rowBc.isEmpty && !containsN(parsedRow.barcode) + then unexpectedSequenceQueue.put((parsedRow.barcode, parsedCol.barcode)) case (None, r, None) => updateRowBarcodePositionStats(None, r) @@ -144,67 +134,59 @@ final class ScoringConsumer( // in that portion of the read, the read quality is suspect (e.g., a potential primer-dimer) and should not be // counted; instead, we're counting sample barcode matches only when the rest of the read matches to the // expected structure - if (alwaysCountColumnBarcodes) { + if alwaysCountColumnBarcodes then val colBc: Seq[MatchedBarcode] = colReference.find(col.barcode) updateColumnBarcodeStats(colBc, col) - } - } - } + end match + + end consume // Process the row and column barcodes when both are found and match to reference data - private[this] def matchedRowAndCol(row: MatchedBarcode, col: MatchedBarcode, umi: Option[FoundBarcode]): Unit = { + private[this] def matchedRowAndCol(row: MatchedBarcode, col: MatchedBarcode, umi: Option[FoundBarcode]): Unit = val r = row.barcode val c = col.barcode log.debug(s"Incrementing state for ($r, $c}).") - umiReference match { + umiReference match case None => // we're not in UMI mode, so just increment the state val _ = state.known.increment(None, (r, c)) case Some(ref) => // we're in UMI mode handleUmi(umi, ref, r, c) - } state.matches += 1 - if (row.distance == 0) { - state.exactMatches += 1 - } - } + if row.distance == 0 then state.exactMatches += 1 + + end matchedRowAndCol // Process a UMI barcode if we're doing that - private[this] def handleUmi(umi: Option[FoundBarcode], ref: BarcodeSet, r: String, c: String): Unit = { - umi match { + private[this] def handleUmi(umi: Option[FoundBarcode], ref: BarcodeSet, r: String, c: String): Unit = + umi match case Some(s) => val u = new String(s.barcode) - if (ref.isDefined(u)) { + if ref.isDefined(u) then // we found a known UMI barcode, so increment val _ = state.known.increment(Some(u), (r, c)) - } else { + else // we found an unknown UMI barcode, so track it somehow val _ = state.known.increment(None, (r, c)) val _ = state.unknownUmi.increment(u) - } + end if case None => // this means we were configured for UMI but we didn't extract a UMI barcode at all state.umiBarcodeNotFound += 1 - } - } - private[this] def updateColumnBarcodeStats(colBc: Seq[MatchedBarcode], col: FoundBarcode): Unit = { + private[this] def updateColumnBarcodeStats(colBc: Seq[MatchedBarcode], col: FoundBarcode): Unit = // the if and else if branches aren't really related but they are also mutually exclusive, so if one matches // there is no reason to test the other - if (countAmbiguous || colBc.lengthCompare(1) == 0) { - colBc.foreach(mb => state.knownCol.increment(mb.barcode)) - } else if (colBc.isEmpty && !containsN(col.barcode)) { + if countAmbiguous || colBc.lengthCompare(1) == 0 then colBc.foreach(mb => state.knownCol.increment(mb.barcode)) + else if colBc.isEmpty && !containsN(col.barcode) then val _ = state.unknownCol.increment(new String(col.barcode)) - } - } - private[this] def updateRowBarcodePositionStats(row: Option[FoundBarcode], revRow: Option[FoundBarcode]): Unit = { + private[this] def updateRowBarcodePositionStats(row: Option[FoundBarcode], revRow: Option[FoundBarcode]): Unit = row.foreach(r => state.rowBarcodeStats.update(r.offset0)) revRow.foreach(r => state.revRowBarcodeStats.update(r.offset0)) - if (row.isEmpty && revRow.isEmpty) { state.neitherRowBarcodeFound += 1 } - } + if row.isEmpty && revRow.isEmpty then state.neitherRowBarcodeFound += 1 override def readsProcessed: Int = state.reads @@ -212,4 +194,4 @@ final class ScoringConsumer( override def matchPercent: Float = state.matchPercent.toFloat -} +end ScoringConsumer diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/process/State.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/process/State.scala index 4301145..98f6cb6 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/process/State.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/process/State.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -12,7 +12,7 @@ final class State( val knownCol: Histogram[String], val unknownCol: Histogram[String], val unknownUmi: Histogram[String] -) { +): var reads: Int = 0 var exactMatches: Int = 0 @@ -25,7 +25,7 @@ final class State( val revRowBarcodeStats: BarcodeStats = new BarcodeStats def matchPercent: Double = - if (reads < 1) 0.0 + if reads < 1 then 0.0 else 100L * matches / reads.toDouble -} +end State diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/process/UnexpectedSequenceTracker.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/process/UnexpectedSequenceTracker.scala index 9526243..49f20ca 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/process/UnexpectedSequenceTracker.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/process/UnexpectedSequenceTracker.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -14,17 +14,16 @@ import org.broadinstitute.gpp.poolq3.process.UnexpectedSequenceTracker.nameFor import org.broadinstitute.gpp.poolq3.reference.Reference import org.log4s.{Logger, getLogger} -final class UnexpectedSequenceTracker(cacheDir: Path, colReference: Reference) extends Closeable { +final class UnexpectedSequenceTracker(cacheDir: Path, colReference: Reference) extends Closeable: private[this] val log: Logger = getLogger // prep the directory and create file writers - private[this] val outputFileWriters: Map[String, BufferedWriter] = { + private[this] val outputFileWriters: Map[String, BufferedWriter] = val _ = Files.createDirectories(cacheDir) colReference.allBarcodes.map(barcode => barcode -> newWriterFor(barcode)).toMap - } - def reportUnexpected(barcodes: (Array[Char], Array[Char])): Unit = { + def reportUnexpected(barcodes: (Array[Char], Array[Char])): Unit = val (rowBarcode, columnBarcode) = barcodes val rowBc = new String(rowBarcode) @@ -33,28 +32,23 @@ final class UnexpectedSequenceTracker(cacheDir: Path, colReference: Reference) e val writer = outputFileWriters(colBc) writer.write(rowBc) writer.write("\n") - } + + end reportUnexpected override def close(): Unit = outputFileWriters.values.foreach { writer => - try { - writer.close() - } catch { - case NonFatal(e) => log.error(e)("Error closing file") - } + try writer.close() + catch case NonFatal(e) => log.error(e)("Error closing file") } - private[this] def newWriterFor(shard: String): BufferedWriter = { + private[this] def newWriterFor(shard: String): BufferedWriter = val name = cacheDir.resolve(nameFor(shard)) new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(name))) - } -} +end UnexpectedSequenceTracker -object UnexpectedSequenceTracker { +object UnexpectedSequenceTracker: private[this] val fileExtension: String = ".txt" def nameFor(shard: String) = s"unexpected-$shard$fileExtension" - -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/BaseReference.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reference/BaseReference.scala index 4a7869f..8e02e08 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/BaseReference.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reference/BaseReference.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -13,7 +13,7 @@ abstract class BaseReference( final val allBarcodes: Seq[String], barcodeToInputBarcode: Object2ObjectMap[String, String], barcodeEntries: Object2ObjectMap[String, mutable.LinkedHashSet[String]] -) extends Reference { +) extends Reference: require(allBarcodes.nonEmpty, "Reference may not be empty") @@ -22,11 +22,10 @@ abstract class BaseReference( final def isDefined(barcode: String): Boolean = find(barcode).nonEmpty - final def idsForBarcode(barcode: String): Seq[String] = { + final def idsForBarcode(barcode: String): Seq[String] = val ids = barcodeEntries.get(barcode) - if (ids == null) Vector.empty + if ids == null then Vector.empty else ids.toVector - } final def barcodesForId(id: String): Seq[String] = barcodesForIdMap.getOrElse(id, Nil) @@ -36,24 +35,21 @@ abstract class BaseReference( * common for a single ID to be represented by several barcodes (indicating replicates). Thus, the value lists may * contain several distinct barcodes. */ - final lazy val barcodesForIdMap: Map[String, List[String]] = { + final lazy val barcodesForIdMap: Map[String, List[String]] = val m = mutable.HashMap[String, List[String]]() barcodeEntries.forEach((barcode, ids) => ids.foreach(id => m.put(id, barcode :: m.getOrElse(id, Nil)))) m.toMap - } - def referenceBarcodeForDnaBarcode(matchingBarcode: String): String = { + def referenceBarcodeForDnaBarcode(matchingBarcode: String): String = val inputBarcode = barcodeToInputBarcode.get(matchingBarcode) - if (inputBarcode == null) throw new IllegalArgumentException(s"Unknown matching barcode $matchingBarcode") + if inputBarcode == null then throw new IllegalArgumentException(s"Unknown matching barcode $matchingBarcode") else inputBarcode - } - final lazy val allIds: Seq[String] = { + final lazy val allIds: Seq[String] = val ids = mutable.LinkedHashSet[String]() allBarcodes.foreach(barcode => ids ++= barcodeEntries.get(barcode)) ids.toVector - } final val barcodeLength: Int = allBarcodes.head.length -} +end BaseReference diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/BkTree.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reference/BkTree.scala deleted file mode 100644 index 14e630c..0000000 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/BkTree.scala +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ -package org.broadinstitute.gpp.poolq3.reference - -import org.broadinstitute.gpp.poolq3.reference.BkTree._ - -final class BkTree[A](dist: (A, A) => Int, dictionary: Seq[A]) { - require(dictionary.nonEmpty, "Dictionary must be non-empty") - - private[this] def empty: Array[Node[A]] = Array.fill(8)(E) - - private[this] def extend(a: Array[Node[A]], minExtension: Int): Array[Node[A]] = - Array.concat(a, Array.fill(math.max(a.length, minExtension - a.length + 1))(E)) - - private[this] val root: Node[A] = { - val initial: Node[A] = N(dictionary.head, empty) - - def insert(t: Node[A], s: A): Node[A] = - t match { - case E => N(s, empty) - case n @ N(v, c) => - val d = dist(v, s) - if (c.length <= d) { - val nc = extend(c, d) - nc(d) = N(s, empty) - N(n.value, nc) - } else { - c(d) = insert(c(d), s) - n - } - } - - dictionary.drop(1).foldLeft(initial)(insert) - } - - def query(s: A, n: Int): Set[A] = { - var children: List[A] = Nil - def loop(node: Node[A]): Unit = - node match { - case E => () - case N(v, c) => - val d = dist(v, s) - val range = math.max(0, d - n) to math.min(d + n, c.length - 1) - range.foreach(x => loop(c(x))) - if (d <= n) children ::= v - () - } - - loop(root) - children.toSet - } - -} - -object BkTree { - - sealed private[BkTree] trait Node[+T] - private[BkTree] case object E extends Node[Nothing] - final private[BkTree] case class N[T](value: T, children: Array[Node[T]]) extends Node[T] - -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/ExactReference.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reference/ExactReference.scala index 54b43f7..4b3716d 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/ExactReference.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reference/ExactReference.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,7 +9,7 @@ import scala.collection.mutable import it.unimi.dsi.fastutil.objects.Object2ObjectMap import org.broadinstitute.gpp.poolq3.parser.ReferenceEntry -import org.broadinstitute.gpp.poolq3.seq._ +import org.broadinstitute.gpp.poolq3.seq.* final class ExactReference private[ExactReference] ( allBarcodes: Seq[String], @@ -17,31 +17,29 @@ final class ExactReference private[ExactReference] ( barcodeIds: Object2ObjectMap[String, mutable.LinkedHashSet[String]], barcodeProcessor: String => String, includeAmbiguous: Boolean -) extends BaseReference(allBarcodes, barcodeToInputBarcode, barcodeIds) { +) extends BaseReference(allBarcodes, barcodeToInputBarcode, barcodeIds): // we still have variants for exact matching in case we need to deal with truncated barcodes private[this] val truncationVariants = Reference.truncationVariants(allBarcodes, barcodeProcessor, includeAmbiguous) def find(barcode: String): Seq[MatchedBarcode] = - if (containsN(barcode)) Vector.empty[MatchedBarcode] - else { + if containsN(barcode) then Vector.empty[MatchedBarcode] + else val barcodes: List[String] = truncationVariants.get(barcode) - if (barcodes == null) Vector.empty[MatchedBarcode] + if barcodes == null then Vector.empty[MatchedBarcode] else barcodes.map(MatchedBarcode(_, 0)) - } -} +end ExactReference -object ExactReference { +object ExactReference: def apply( mappings: Seq[ReferenceEntry], barcodeProcessor: String => String, includeAmbiguous: Boolean - ): ExactReference = { + ): ExactReference = val (barcodes, barcodeToInputBarcode, barcodeIds) = Reference.build(mappings) new ExactReference(barcodes.toVector, barcodeToInputBarcode, barcodeIds, barcodeProcessor, includeAmbiguous) - } -} +end ExactReference diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/MatchedBarcode.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reference/MatchedBarcode.scala index c1cf7f8..c8a1620 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/MatchedBarcode.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reference/MatchedBarcode.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/Reference.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reference/Reference.scala index 9f08ecf..37ce34c 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/Reference.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reference/Reference.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -12,7 +12,7 @@ import org.broadinstitute.gpp.poolq3.parser.{ConflictingBarcodeException, Refere /** Represents one dimension of reference data, which is a mapping between DNA barcodes and associated identifiers. */ -trait Reference { +trait Reference: /** Finds the best matching barcode in the reference data for the barcode provided */ def find(barcode: String): Seq[MatchedBarcode] @@ -41,14 +41,28 @@ trait Reference { /** Returns the length of the barcodes represented by this reference */ def barcodeLength: Int -} +end Reference -object Reference { +object Reference: type Mappings = (Seq[String], Object2ObjectMap[String, String], Object2ObjectMap[String, mutable.LinkedHashSet[String]]) - def build(mappings: Seq[ReferenceEntry]): Mappings = { + def apply( + matcher: String, + barcodeProcessor: String => String, + includeAmbiguous: Boolean, + bs: Seq[ReferenceEntry] + ): Reference = + matcher.toLowerCase match + case "exact" => ExactReference(bs, barcodeProcessor, includeAmbiguous) + case "mismatch" => VariantReference(bs, barcodeProcessor, includeAmbiguous) + case _ => + throw new IllegalArgumentException( + s"Unknown matching function `$matcher`. Please choose either exact or mismatch." + ) + + def build(mappings: Seq[ReferenceEntry]): Mappings = // these make up the return type val barcodes = new mutable.LinkedHashSet[String] val barcodeIds = new Object2ObjectOpenHashMap[String, mutable.LinkedHashSet[String]]() @@ -59,36 +73,33 @@ object Reference { mappings.foreach { referenceEntry => barcodes += referenceEntry.dnaBarcode // check that for any barcode used in matching, only one _input_ barcode reduces to it - if (barcodeToInputBarcode.containsKey(referenceEntry.dnaBarcode)) { + if barcodeToInputBarcode.containsKey(referenceEntry.dnaBarcode) then val witness = barcodeToInputBarcode.get(referenceEntry.dnaBarcode) - if (witness != referenceEntry.referenceBarcode) { + if witness != referenceEntry.referenceBarcode then throw new ConflictingBarcodeException(referenceEntry.referenceBarcode, referenceEntry.referenceId, witness) - } - } else { - barcodeToInputBarcode.put(referenceEntry.dnaBarcode, referenceEntry.referenceBarcode) - } + else barcodeToInputBarcode.put(referenceEntry.dnaBarcode, referenceEntry.referenceBarcode) // this looks inefficient, but using `.putIfAbsent` appears to basically do the same thing, // with the exception that `.putIfAbsent` requires a new set to be constructed every time // regardless of whether it's used, since it's a Java API and Java doesn't support by-name // parameters - if (barcodeIds.containsKey(referenceEntry.dnaBarcode)) { + if barcodeIds.containsKey(referenceEntry.dnaBarcode) then barcodeIds.get(referenceEntry.dnaBarcode) += referenceEntry.referenceId - } else { + else val set = mutable.LinkedHashSet[String]() set += referenceEntry.referenceId barcodeIds.put(referenceEntry.dnaBarcode, set) - } } (barcodes.toVector, barcodeToInputBarcode, barcodeIds) - } + + end build def truncationVariants( barcodes: Seq[String], barcodeProcessor: String => String, includeAmbiguous: Boolean - ): Object2ObjectMap[String, List[String]] = { + ): Object2ObjectMap[String, List[String]] = val map = new Object2ObjectOpenHashMap[String, List[String]] map.defaultReturnValue(Nil) barcodes.foreach { barcode => @@ -98,23 +109,17 @@ object Reference { } // truncation is one of 2 ways we can end up with ambiguous matches - if (!includeAmbiguous) { - Reference.pruneAmbiguous(map) - } + if !includeAmbiguous then Reference.pruneAmbiguous(map) map - } - def pruneAmbiguous[S <: Seq[String]](barcodeVariants: Object2ObjectMap[String, S]): Unit = { + end truncationVariants + + def pruneAmbiguous[S <: Seq[String]](barcodeVariants: Object2ObjectMap[String, S]): Unit = // list ambiguous variants var ambiguousVariants: List[String] = Nil - barcodeVariants.forEach { (variant, barcodes) => - if (barcodes.size > 1) { - ambiguousVariants ::= variant - } - } + barcodeVariants.forEach((variant, barcodes) => if barcodes.size > 1 then ambiguousVariants ::= variant) // remove them from `barcodeVariants` ambiguousVariants.foreach(barcodeVariants.remove) - } -} +end Reference diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/VariantReference.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reference/VariantReference.scala index 25bef56..f68c1ef 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/VariantReference.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reference/VariantReference.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -18,19 +18,19 @@ final class VariantReference private[VariantReference] ( barcodeIds: Object2ObjectMap[String, mutable.LinkedHashSet[String]], barcodeProcessor: String => String, includeAmbiguous: Boolean -) extends BaseReference(allBarcodes, barcodeToInputBarcode, barcodeIds) { +) extends BaseReference(allBarcodes, barcodeToInputBarcode, barcodeIds): private[this] val truncationVariants = Reference.truncationVariants(allBarcodes, barcodeProcessor, includeAmbiguous) private[this] val mismatchVariants: Object2ObjectMap[String, List[String]] = generateVariants - override def find(barcode: String): Seq[MatchedBarcode] = { + override def find(barcode: String): Seq[MatchedBarcode] = val bases = barcode.toCharArray - singleNIndex(bases) match { + singleNIndex(bases) match case NoN => val exact = truncationVariants.get(barcode) - if (exact.nonEmpty) exact.map(MatchedBarcode(_, 0)) + if exact.nonEmpty then exact.map(MatchedBarcode(_, 0)) else mismatchVariants.get(barcode).map(MatchedBarcode(_, 1)) case PolyN => Seq.empty[MatchedBarcode] @@ -40,18 +40,18 @@ final class VariantReference private[VariantReference] ( val variants = posVariants(bases, n) val matchingReferenceBarcodes = variants.flatMap(truncationVariants.get(_)) - if (includeAmbiguous || matchingReferenceBarcodes.lengthCompare(1) == 0) { + if includeAmbiguous || matchingReferenceBarcodes.lengthCompare(1) == 0 then // this works because the variants have the `N` replaced, so they contain only [ACGT] - that is, they must // match _exactly_ to a barcode in the reference file; we report an edit distance of 1 because we have to // assume the `N` was a mismatch matchingReferenceBarcodes.map(bc => MatchedBarcode(bc, 1)) - } else { - Seq.empty - } - } - } + else Seq.empty + + end match + + end find - private[this] def posVariants(bases: Array[Char], nIdx: Int, orig: Char = 'N'): Seq[String] = { + private[this] def posVariants(bases: Array[Char], nIdx: Int, orig: Char = 'N'): Seq[String] = val ret = new Array[String](4) bases(nIdx) = 'A' ret(0) = new String(bases) @@ -63,19 +63,20 @@ final class VariantReference private[VariantReference] ( ret(3) = new String(bases) bases(nIdx) = orig ArraySeq.unsafeWrapArray(ret) - } - private[this] def generateVariants: Object2ObjectMap[String, List[String]] = { + end posVariants + + private[this] def generateVariants: Object2ObjectMap[String, List[String]] = val initialVariants: Seq[(String, String)] = allBarcodes.map(bc => (barcodeProcessor(bc), bc)) val mismatchVariants: Seq[(String, String)] = - for { + for (variant, barcode) <- initialVariants bases = variant.toArray i <- bases.indices v <- posVariants(bases, i, bases(i)) - } yield (v, barcode) + yield (v, barcode) // build the resulting map of variant to seq of originals val map = new Object2ObjectOpenHashMap[String, List[String]] @@ -87,24 +88,22 @@ final class VariantReference private[VariantReference] ( map.put(variant, barcode :: bcs) } - if (!includeAmbiguous) { - Reference.pruneAmbiguous(map) - } + if !includeAmbiguous then Reference.pruneAmbiguous(map) map - } -} + end generateVariants + +end VariantReference -object VariantReference { +object VariantReference: def apply( mappings: Seq[ReferenceEntry], barcodeProcessor: String => String, includeAmbiguous: Boolean - ): VariantReference = { + ): VariantReference = val (barcodes, barcodeToInputBarcodes, barcodeIds) = Reference.build(mappings) new VariantReference(barcodes.toVector, barcodeToInputBarcodes, barcodeIds, barcodeProcessor, includeAmbiguous) - } -} +end VariantReference diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/package.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reference/package.scala deleted file mode 100644 index 70c5859..0000000 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reference/package.scala +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ -package org.broadinstitute.gpp.poolq3 - -import org.broadinstitute.gpp.poolq3.parser.ReferenceEntry - -/** Provides classes implementing reference databases as well as utility functions used by the various reference - * database implementations. - * - * ==Overview== - * [[org.broadinstitute.gpp.poolq3.reference.Reference]] defines the the basic interface for a single dimension of - * reference data. - * - * [[org.broadinstitute.gpp.poolq3.reference.ExactReference]] provides a basic implementation using exact matching. It - * is suitable for use as a column reference, although more efficient implementations may be possible. - */ -package object reference { - - def referenceFor( - matcher: String, - barcodeProcessor: String => String, - includeAmbiguous: Boolean, - bs: Seq[ReferenceEntry] - ): Reference = - matcher.toLowerCase match { - case "exact" => ExactReference(bs, barcodeProcessor, includeAmbiguous) - case "mismatch" => VariantReference(bs, barcodeProcessor, includeAmbiguous) - case _ => - throw new IllegalArgumentException( - s"Unknown matching function `$matcher`. Please choose either exact or mismatch." - ) - } - -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/BarcodeCountsWriter.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/BarcodeCountsWriter.scala index e98ab7f..b1bdefb 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/BarcodeCountsWriter.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/BarcodeCountsWriter.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,12 +10,12 @@ import java.nio.file.{Files, Path} import scala.util.{Try, Using} -import cats.syntax.all._ +import cats.syntax.all.* import org.broadinstitute.gpp.poolq3.hist.{ReadOnlyHistogram, ShardedHistogram} import org.broadinstitute.gpp.poolq3.parser.BarcodeSet import org.broadinstitute.gpp.poolq3.reference.Reference -object BarcodeCountsWriter { +object BarcodeCountsWriter: def write( file: Path, @@ -26,10 +26,9 @@ object BarcodeCountsWriter { umiBarcodes: Option[BarcodeSet], dialect: ReportsDialect ): Try[Unit] = - umiBarcodes match { + umiBarcodes match case None => write(file, hist, rowReference, colReference, dialect) case Some(ub) => writeUmi(file, umiFileDir, hist, rowReference, colReference, ub, dialect) - } private[reports] def write( barcodeCountsFile: Path, @@ -62,7 +61,7 @@ object BarcodeCountsWriter { colReference: Reference, umiBarcodes: BarcodeSet, dialect: ReportsDialect - ): Try[Unit] = { + ): Try[Unit] = val parsedFilename = parseFilename(barcodeCountsFile) val umiFileDir = umiFileDirOpt.getOrElse(barcodeCountsFile.resolveSibling("umi-barcode-counts")) val basename = parsedFilename.basename @@ -85,6 +84,7 @@ object BarcodeCountsWriter { dialect ) } - } -} + end writeUmi + +end BarcodeCountsWriter diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/BarcodeFrequency.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/BarcodeFrequency.scala index 8c7b2b6..293b4cf 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/BarcodeFrequency.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/BarcodeFrequency.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,9 +7,7 @@ package org.broadinstitute.gpp.poolq3.reports private[reports] case class BarcodeFrequency(bc: String, frequency: Int) -private[reports] object BarcodeFrequency { +private[reports] object BarcodeFrequency: implicit val ord: Ordering[BarcodeFrequency] = Ordering.by[BarcodeFrequency, (Int, String)](b => (-b.frequency, b.bc)).reverse - -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/CorrelationFileWriter.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/CorrelationFileWriter.scala index e571b95..e49b0ab 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/CorrelationFileWriter.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/CorrelationFileWriter.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -20,7 +20,7 @@ import org.log4s.{Logger, getLogger} /** Computes and writes a condition-correlation matrix file. Compares counts for each experimental condition using * Pearson correlation. */ -object CorrelationFileWriter { +object CorrelationFileWriter: private[this] val log: Logger = getLogger @@ -29,32 +29,26 @@ object CorrelationFileWriter { normalizedCounts: Map[String, Map[ColId, Double]], rowReference: Reference, colReference: Reference - ): Try[Option[CorrelationFileType.type]] = { - if (colReference.allIds.size < 2 || rowReference.allBarcodes.size < 2) { + ): Try[Option[CorrelationFileType.type]] = + if colReference.allIds.size < 2 || rowReference.allBarcodes.size < 2 then log.warn( "Skipping correlation file for trivial dataset " + s"(${colReference.allIds.size} columns and ${rowReference.allBarcodes.size} rows)" ) Success(None) - } else { + else val cor = new PearsonsCorrelation() val countsMatrix = makeCountsMatrix(normalizedCounts, rowReference, colReference) val pearsonMatrix = cor.computeCorrelationMatrix(countsMatrix) Using(new PrintWriter(file.toFile)) { pw => printHeaders(colReference, pw) - for (i <- colReference.allIds.indices) { + for i <- colReference.allIds.indices do pw.print(colReference.allIds(i)) - for (j <- colReference.allIds.indices) { - pw.print("\t" + Decimal00Format.format(pearsonMatrix.getEntry(i, j))) - } + for j <- colReference.allIds.indices do pw.print("\t" + Decimal00Format.format(pearsonMatrix.getEntry(i, j))) pw.println() - } Some(CorrelationFileType) } - } - - } private[reports] def printHeaders(colReference: Reference, pw: PrintWriter): Unit = pw.println("\t" + colReference.allIds.mkString("\t")) @@ -63,18 +57,17 @@ object CorrelationFileWriter { counts: Map[String, Map[ColId, Double]], rowReference: Reference, colReference: Reference - ): Array[Array[Double]] = { + ): Array[Array[Double]] = val matrix = Array.ofDim[Double](rowReference.allBarcodes.size, colReference.allIds.size) - for (i <- rowReference.allBarcodes.indices) { + for i <- rowReference.allBarcodes.indices do val row = rowReference.allBarcodes(i) val rowCounts = counts(row) - for (j <- colReference.allIds.indices) - matrix(i)(j) = rowCounts(colReference.allIds(j)) - } + for j <- colReference.allIds.indices do matrix(i)(j) = rowCounts(colReference.allIds(j)) matrix - } -} + end makeCountsMatrix + +end CorrelationFileWriter diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/CountsWriter.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/CountsWriter.scala index 888f5cc..3fa04d1 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/CountsWriter.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/CountsWriter.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,12 +10,12 @@ import java.nio.file.{Files, Path} import scala.util.{Try, Using} -import cats.syntax.all._ +import cats.syntax.all.* import org.broadinstitute.gpp.poolq3.hist.{ReadOnlyHistogram, ShardedHistogram} import org.broadinstitute.gpp.poolq3.parser.BarcodeSet import org.broadinstitute.gpp.poolq3.reference.Reference -object CountsWriter { +object CountsWriter: def write( countsFile: Path, @@ -26,10 +26,9 @@ object CountsWriter { umiBarcodes: Option[BarcodeSet], dialect: ReportsDialect ): Try[Unit] = - umiBarcodes match { + umiBarcodes match case None => write(countsFile, hist, rowReference, colReference, dialect) case Some(ub) => writeUmi(countsFile, umiFileDir, hist, rowReference, colReference, ub, dialect) - } private[reports] def write( countsFile: Path, @@ -64,7 +63,7 @@ object CountsWriter { colReference: Reference, umiBarcodes: BarcodeSet, dialect: ReportsDialect - ): Try[Unit] = { + ): Try[Unit] = val parsedFilename = parseFilename(countsFile) val umiFileDir = umiFileDirOpt.getOrElse(countsFile.resolveSibling("umi-counts")) val basename = parsedFilename.basename @@ -87,6 +86,7 @@ object CountsWriter { dialect ) } - } -} + end writeUmi + +end CountsWriter diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/LogNormalizedCountsWriter.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/LogNormalizedCountsWriter.scala index b737420..9281445 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/LogNormalizedCountsWriter.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/LogNormalizedCountsWriter.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -14,7 +14,7 @@ import org.broadinstitute.gpp.poolq3.hist.ReadOnlyHistogram import org.broadinstitute.gpp.poolq3.numeric.logNormalize import org.broadinstitute.gpp.poolq3.reference.Reference -object LogNormalizedCountsWriter { +object LogNormalizedCountsWriter: type ColId = String @@ -44,7 +44,7 @@ object LogNormalizedCountsWriter { hist: ReadOnlyHistogram[(String, String)], rowReference: Reference, colReference: Reference - ): Map[String, Map[ColId, Double]] = { + ): Map[String, Map[ColId, Double]] = val columnReadCounts: Map[String, Int] = getColumnReadCounts(rowReference, colReference, hist) rowReference.allBarcodes.map { row => @@ -60,6 +60,7 @@ object LogNormalizedCountsWriter { row -> columns }.toMap - } -} + end logNormalizedCounts + +end LogNormalizedCountsWriter diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/ParsedFilename.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/ParsedFilename.scala index 68cca4f..9d28159 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/ParsedFilename.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/ParsedFilename.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/QualityWriter.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/QualityWriter.scala index ff050ab..f9cbb6f 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/QualityWriter.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/QualityWriter.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -14,26 +14,23 @@ import org.broadinstitute.gpp.poolq3.numeric.{Decimal000Format, Decimal00Format, import org.broadinstitute.gpp.poolq3.process.State import org.broadinstitute.gpp.poolq3.reference.Reference -object QualityWriter { +object QualityWriter: - class TeeWriter(w1: PrintWriter, w2: PrintWriter) { + class TeeWriter(w1: PrintWriter, w2: PrintWriter): - def print(s: String): Unit = { + def print(s: String): Unit = w1.print(s) w2.print(s) - } - def println(s: String): Unit = { + def println(s: String): Unit = w1.println(s) w2.println(s) - } - def println(): Unit = { + def println(): Unit = w1.println() w2.println() - } - } + end TeeWriter def write( qualityFile: Path, @@ -47,7 +44,7 @@ object QualityWriter { Using.resources(new PrintWriter(qualityFile.toFile), new PrintWriter(conditionBarcodeCountsSummaryFile.toFile)) { case (qualityWriter, cbcsWriter) => val barcodeLocationStats = - if (isPairedEnd) { + if isPairedEnd then s"""Reads with no construct barcode: ${state.rowBarcodeNotFound + state.revRowBarcodeNotFound - state.neitherRowBarcodeFound} | |Reads with no forward construct barcode: ${state.rowBarcodeNotFound} @@ -59,13 +56,10 @@ object QualityWriter { |Max reverse construct barcode index: ${state.revRowBarcodeStats.maxPosStr} |Min reverse construct barcode index: ${state.revRowBarcodeStats.minPosStr} |Avg reverse construct barcode index: ${decOptFmt(state.revRowBarcodeStats.avg)}""".stripMargin - - } else { - s"""Reads with no construct barcode: ${state.rowBarcodeNotFound} + else s"""Reads with no construct barcode: ${state.rowBarcodeNotFound} |Max construct barcode index: ${state.rowBarcodeStats.maxPosStr} |Min construct barcode index: ${state.rowBarcodeStats.minPosStr} |Avg construct barcode index: ${decOptFmt(state.rowBarcodeStats.avg)}""".stripMargin - } val header = s"""Total reads: ${state.reads} @@ -109,7 +103,7 @@ object QualityWriter { rowReference: Reference, colReference: Reference, colBarcode: String - ): Seq[String] = { + ): Seq[String] = val conditions = colReference.idsForBarcode(colBarcode).mkString(",") val matchedRowAndCol: Int = rowReference.allBarcodes.map(rowBarcode => state.known.count((rowBarcode, colBarcode))).sum @@ -125,6 +119,7 @@ object QualityWriter { Decimal00Format.format(pct), Decimal000Format.format(logNormalize(matchedRowAndCol, state.reads)) ) - } -} + end perBarcodeQualityData + +end QualityWriter diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/ReferenceCollisionWriter.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/ReferenceCollisionWriter.scala index 33658dc..58fa01e 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/ReferenceCollisionWriter.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/ReferenceCollisionWriter.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -13,7 +13,7 @@ import scala.util.{Try, Using} import org.broadinstitute.gpp.poolq3.reference.Reference // TODO: this is not currently used -object ReferenceCollisionWriter { +object ReferenceCollisionWriter: def write(file: Path, reference: Reference, compat: Boolean): Try[Unit] = Using(new PrintWriter(file.toFile)) { writer => @@ -22,9 +22,9 @@ object ReferenceCollisionWriter { .map(bc => (bc, reference.idsForBarcode(bc))) .filter { case (_, ids) => ids.lengthCompare(1) > 1 } .foreach { case (bc, ids) => - if (compat) writer.println(s"$bc:\t${ids.mkString("\t")}") + if compat then writer.println(s"$bc:\t${ids.mkString("\t")}") else writer.println(s"$bc\t${ids.mkString(",")}") } } -} +end ReferenceCollisionWriter diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/ReportsDialect.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/ReportsDialect.scala index db0fc11..bf38f95 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/ReportsDialect.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/ReportsDialect.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/RunInfoWriter.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/RunInfoWriter.scala index ebaa23c..87aa89e 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/RunInfoWriter.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/RunInfoWriter.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -12,7 +12,7 @@ import scala.util.{Try, Using} import org.broadinstitute.gpp.poolq3.{BuildInfo, PoolQConfig} -object RunInfoWriter { +object RunInfoWriter: def write(file: Path, config: PoolQConfig): Try[Unit] = Using(new PrintWriter(file.toFile)) { writer => @@ -30,4 +30,4 @@ object RunInfoWriter { writer.println() } -} +end RunInfoWriter diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/UmiQualityWriter.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/UmiQualityWriter.scala index 8387348..9d5c06c 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/UmiQualityWriter.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/UmiQualityWriter.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -12,9 +12,9 @@ import scala.util.{Try, Using} import org.broadinstitute.gpp.poolq3.process.State -object UmiQualityWriter { +object UmiQualityWriter: - def write(file: Path, state: State): Try[Unit] = { + def write(file: Path, state: State): Try[Unit] = // aggregate report data val umiBarcodeFrequencies = state.known.shards.toList.sorted.map { shard => val hist = state.known.forShard(Some(shard)) @@ -34,6 +34,7 @@ object UmiQualityWriter { topNUnexpectedBarcodeFrequencies.foreach { case BarcodeFrequency(bc, count) => pw.println(s"$bc\t$count") } pw.println() } - } -} + end write + +end UmiQualityWriter diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequenceWriter.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequenceWriter.scala index a47aaaf..2ae2746 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequenceWriter.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequenceWriter.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -17,7 +17,7 @@ import org.broadinstitute.gpp.poolq3.process.UnexpectedSequenceTracker.nameFor import org.broadinstitute.gpp.poolq3.reference.Reference import org.log4s.{Logger, getLogger} -object UnexpectedSequenceWriter { +object UnexpectedSequenceWriter: private[this] val log: Logger = getLogger @@ -28,82 +28,72 @@ object UnexpectedSequenceWriter { colReference: Reference, globalReference: Option[Reference], maxMapSize: Int = 10_000_000 - ): Try[Unit] = { + ): Try[Unit] = // build a "reference set" - a set of unexpected barcodes that we will track exact counts for (read `samplePct`% of each shard) // load the whole cache, tracking only sequences in the reference set val (h, r) = loadCache(unexpectedSequenceCacheDir, colReference, nSequencesToReport, maxMapSize) Using(new PrintWriter(outputFile.toFile))(pw => printUnexpectedCounts(colReference, globalReference, h, r, pw)) - } - def removeCache(unexpectedSequenceCacheDir: Path): Unit = { + end write + + def removeCache(unexpectedSequenceCacheDir: Path): Unit = // swallow non-fatal exceptions def tryDelete(p: Path): Unit = - try - Files.delete(p) - catch { - case NonFatal(_) => log.warn(s"Unable to delete ${p.toAbsolutePath}") - } + try Files.delete(p) + catch case NonFatal(_) => log.warn(s"Unable to delete ${p.toAbsolutePath}") // run for side effects Files.list(unexpectedSequenceCacheDir).forEach(tryDelete) tryDelete(unexpectedSequenceCacheDir) - } + + end removeCache // defining this as a trait is not useful in the main codebase but it makes testing easier - private[reports] trait CachedBarcodes extends Iterator[String] with Closeable { + private[reports] trait CachedBarcodes extends Iterator[String] with Closeable: def colBc: String - } - final private[reports] class SourceCachedBarcodes(val colBc: String, source: Source) extends CachedBarcodes { + final private[reports] class SourceCachedBarcodes(val colBc: String, source: Source) extends CachedBarcodes: private val iter = source.getLines() def close(): Unit = source.close() def hasNext: Boolean = iter.hasNext def next(): String = iter.next() - } final private[reports] class BreadthFirstIterator(readers: mutable.Queue[CachedBarcodes]) - extends Iterator[(String, String)] { + extends Iterator[(String, String)]: def hasNext: Boolean = readers.nonEmpty - def next(): (String, String) = { + def next(): (String, String) = val reader = readers.dequeue() val ret = (reader.next(), reader.colBc) - if (reader.hasNext) { - readers.enqueue(reader) - } else { - reader.close() - } + if reader.hasNext then readers.enqueue(reader) + else reader.close() ret - } - } + end BreadthFirstIterator def loadCache( cacheDir: Path, colReference: Reference, nSequencesToReport: Int, maxMapSize: Int - ): (Map[String, Map[String, Int]], Vector[String]) = { + ): (Map[String, Map[String, Int]], Vector[String]) = val rowColBarcodeCounts = new mutable.HashMap[String, mutable.Map[String, Int]]() val allRowBarcodeCounts = new mutable.HashMap[String, Int]() // create & populate the list of readers val readers = mutable.Queue[CachedBarcodes]() - try { + try colReference.allBarcodes.foreach { colBc => val file = cacheDir.resolve(nameFor(colBc)) - if (Files.exists(file)) { + if Files.exists(file) then val cbc = new SourceCachedBarcodes(colBc, Source.fromFile(file.toFile)) - if (cbc.hasNext) { - readers.enqueue(cbc) - } - } + if cbc.hasNext then readers.enqueue(cbc) } val iterator = new BreadthFirstIterator(readers) - while (rowColBarcodeCounts.size < maxMapSize && iterator.hasNext) { + while rowColBarcodeCounts.size < maxMapSize && iterator.hasNext do val (rowBc, colBc) = iterator.next() val colBarcodeMap = rowColBarcodeCounts.getOrElseUpdate(rowBc, mutable.HashMap()) val _ = colBarcodeMap.updateWith(colBc) { @@ -114,8 +104,7 @@ object UnexpectedSequenceWriter { case None => Some(1) case Some(c) => Some(c + 1) } - } - + end while // at this point, we either exhausted the readers or we filled the map; go through the remaining data // and tally things up, but don't add new keys to the outer map readers.foreach { rdr => @@ -150,10 +139,11 @@ object UnexpectedSequenceWriter { .toMap, mostCommonRowBarcodesRanked ) - } finally { - readers.foreach(_.close()) - } - } + finally readers.foreach(_.close()) + + end try + + end loadCache private[reports] def printUnexpectedCounts( colReference: Reference, @@ -161,7 +151,7 @@ object UnexpectedSequenceWriter { h: Map[String, Map[String, Int]], rows: Vector[String], pw: PrintWriter - ): Unit = { + ): Unit = val colBarcodes = colReference.allBarcodes.map(colReference.referenceBarcodeForDnaBarcode) pw.println(headerText(colBarcodes)) @@ -174,9 +164,10 @@ object UnexpectedSequenceWriter { val total = counts.sum pw.println(s"$rowBc\t$total\t${counts.mkString("\t")}\t$possibleIds") } - } + + end printUnexpectedCounts private[reports] def headerText(colBarcodes: Seq[String]): String = s"Sequence\tTotal\t${colBarcodes.mkString("\t")}\tPotential IDs" -} +end UnexpectedSequenceWriter diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/package.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/package.scala deleted file mode 100644 index 1a0e8b2..0000000 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/package.scala +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ -package org.broadinstitute.gpp.poolq3 - -import java.io.PrintWriter -import java.nio.file.{Path, Paths} - -import scala.collection.mutable - -import org.broadinstitute.gpp.poolq3.hist.ReadOnlyHistogram -import org.broadinstitute.gpp.poolq3.reference.Reference - -package object reports { - - def writeRowIdentifiers(rowReference: Reference, rowBc: String, pw: PrintWriter): Unit = { - // write row identifiers - val rowBarcodeIds = rowReference.idsForBarcode(rowBc).mkString(",") - val rowInputBarcode = rowReference.referenceBarcodeForDnaBarcode(rowBc) - pw.print(s"$rowInputBarcode\t$rowBarcodeIds\t") - } - - def countsHeaderText(dialect: ReportsDialect, colHeadings: String, nRows: Int, nCols: Int): String = - dialect match { - case PoolQ3Dialect => s"Row Barcode\tRow Barcode IDs\t$colHeadings" - case PoolQ2Dialect => s"Construct Barcode\tConstruct IDs\t$colHeadings" - case GctDialect => - s"""#1.2 - |$nRows\t$nCols - |NAME\tDescription\t$colHeadings""".stripMargin - } - - /** Returns a map from column ID to the total read count for that column */ - def getColumnReadCounts( - rowReference: Reference, - colReference: Reference, - hist: ReadOnlyHistogram[(String, String)] - ): Map[String, Int] = - colReference.allIds.map { colId => - val readCount = - (for { - colBarcodeLong <- colReference.barcodesForId(colId) - rowBarcodeLong <- rowReference.allBarcodes - } yield hist.count((rowBarcodeLong, colBarcodeLong))).sum - - colId -> readCount - }.toMap - - def parseFilename(p: Path): ParsedFilename = { - val nameStr = p.getFileName.toString - val lastDotIdx = nameStr.lastIndexOf('.') - val (base, ext) = - if (lastDotIdx == -1) (nameStr, None) - else { - val (b, e) = nameStr.splitAt(lastDotIdx) - (b, Some(e)) - } - ParsedFilename(Option(p.getParent).getOrElse(Paths.get(".")), base, ext) - } - - /** Returns the maximum `n` `A`s in `xs`, in descending order - * @param xs - * a sequence of `A` - * @param n - * the number to return - * @param ord - * the ordering over `A` - * @tparam A - * the type of elements - * @return - */ - def topN[A](xs: Seq[A], n: Int)(implicit ord: Ordering[A]): List[A] = { - // split xs to take the first `n` elements blindly - val (firstN, rest) = xs.splitAt(n) - - // mutable.PriorityQueue is a max heap; we want a min heap of the largest `n` `A`'s seen so far, initialized with - // the first `n` elements of `xs` - val minHeap = mutable.PriorityQueue[A](firstN: _*)(ord.reverse) - - // for the rest, find the smallest element, call it `y`, and put the larger of `x` and `y` back in the min heap - rest.foreach { x => - val y = minHeap.dequeue() - minHeap.enqueue(ord.max(x, y)) - } - minHeap.dequeueAll.reverse.toList - } - -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/reports.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/reports.scala new file mode 100644 index 0000000..9c553e4 --- /dev/null +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/reports.scala @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.broadinstitute.gpp.poolq3.reports + +import java.io.PrintWriter +import java.nio.file.{Path, Paths} + +import scala.collection.mutable + +import org.broadinstitute.gpp.poolq3.hist.ReadOnlyHistogram +import org.broadinstitute.gpp.poolq3.reference.Reference + +def writeRowIdentifiers(rowReference: Reference, rowBc: String, pw: PrintWriter): Unit = + // write row identifiers + val rowBarcodeIds = rowReference.idsForBarcode(rowBc).mkString(",") + val rowInputBarcode = rowReference.referenceBarcodeForDnaBarcode(rowBc) + pw.print(s"$rowInputBarcode\t$rowBarcodeIds\t") + +def countsHeaderText(dialect: ReportsDialect, colHeadings: String, nRows: Int, nCols: Int): String = + dialect match + case PoolQ3Dialect => s"Row Barcode\tRow Barcode IDs\t$colHeadings" + case PoolQ2Dialect => s"Construct Barcode\tConstruct IDs\t$colHeadings" + case GctDialect => + s"""#1.2 + |$nRows\t$nCols + |NAME\tDescription\t$colHeadings""".stripMargin + +/** Returns a map from column ID to the total read count for that column */ +def getColumnReadCounts( + rowReference: Reference, + colReference: Reference, + hist: ReadOnlyHistogram[(String, String)] +): Map[String, Int] = + colReference.allIds.map { colId => + val readCount = + (for + colBarcodeLong <- colReference.barcodesForId(colId) + rowBarcodeLong <- rowReference.allBarcodes + yield hist.count((rowBarcodeLong, colBarcodeLong))).sum + + colId -> readCount + }.toMap + +def parseFilename(p: Path): ParsedFilename = + val nameStr = p.getFileName.toString + val lastDotIdx = nameStr.lastIndexOf('.') + val (base, ext) = + if lastDotIdx == -1 then (nameStr, None) + else + val (b, e) = nameStr.splitAt(lastDotIdx) + (b, Some(e)) + ParsedFilename(Option(p.getParent).getOrElse(Paths.get(".")), base, ext) + +end parseFilename + +/** Returns the maximum `n` `A`s in `xs`, in descending order + * @param xs + * a sequence of `A` + * @param n + * the number to return + * @param ord + * the ordering over `A` + * @tparam A + * the type of elements + * @return + */ +def topN[A](xs: Seq[A], n: Int)(implicit ord: Ordering[A]): List[A] = + // split xs to take the first `n` elements blindly + val (firstN, rest) = xs.splitAt(n) + + // mutable.PriorityQueue is a max heap; we want a min heap of the largest `n` `A`'s seen so far, initialized with + // the first `n` elements of `xs` + val minHeap = mutable.PriorityQueue[A](firstN*)(ord.reverse) + + // for the rest, find the smallest element, call it `y`, and put the larger of `x` and `y` back in the min heap + rest.foreach { x => + val y = minHeap.dequeue() + minHeap.enqueue(ord.max(x, y)) + } + minHeap.dequeueAll.reverse.toList + +end topN diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/seq/seq.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/seq/seq.scala index adf214b..8dad342 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/seq/seq.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/seq/seq.scala @@ -1,60 +1,56 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ -package org.broadinstitute.gpp.poolq3 +package org.broadinstitute.gpp.poolq3.seq import scala.collection.mutable -package object seq { +val Bases: Seq[Char] = Vector('A', 'C', 'G', 'T') - val Bases: Seq[Char] = Vector('A', 'C', 'G', 'T') +val Complements: Map[Char, Char] = + Map('A' -> 'T', 'C' -> 'G', 'G' -> 'C', 'T' -> 'A', 'N' -> 'N') - val Complements: Map[Char, Char] = - Map('A' -> 'T', 'C' -> 'G', 'G' -> 'C', 'T' -> 'A', 'N' -> 'N') +final def complement(seq: String): String = + val length = seq.length - final def complement(seq: String): String = { - val length = seq.length + val bldr = new mutable.StringBuilder() + var i = 0 + while i < length do + bldr.append(Complements(seq.charAt(i))) + i += 1 + bldr.toString() - val bldr = new mutable.StringBuilder() - var i = 0 - while (i < length) { - bldr.append(Complements(seq.charAt(i))) - i += 1 - } - bldr.toString() - } +end complement - final def reverseComplement(seq: String): String = { - val bldr = new mutable.StringBuilder() +final def reverseComplement(seq: String): String = + val bldr = new mutable.StringBuilder() - var i = seq.length - 1 - while (i > -1) { - bldr.append(Complements(seq.charAt(i))) - i -= 1 - } - bldr.toString() - } + var i = seq.length - 1 + while i > -1 do + bldr.append(Complements(seq.charAt(i))) + i -= 1 + bldr.toString() + +end reverseComplement - /** Returns {{true}} iff the provided string consists only of DNA bases plus N */ - final def isDna(seq: String): Boolean = { +/** Returns {{true}} iff the provided string consists only of DNA bases plus N */ +final def isDna(seq: String): Boolean = + var i = seq.length - 1 + while i > -1 do + val b = seq.charAt(i) + if b != 'A' && b != 'C' && b != 'G' && b != 'T' && b != 'N' then return false + i -= 1 + true + +/** Returns {{true}} if the provided string consists only of [ACGTactg;:-] */ +final def isReferenceBarcode(seq: String): Boolean = + if seq.isEmpty then false + else { var i = seq.length - 1 - while (i > -1) { + while i > -1 do { val b = seq.charAt(i) - if (b != 'A' && b != 'C' && b != 'G' && b != 'T' && b != 'N') return false - i -= 1 - } - true - } - - /** Returns {{true}} if the provided string consists only of [ACGTactg;:-] */ - final def isReferenceBarcode(seq: String): Boolean = - if (seq.isEmpty) false - else { - var i = seq.length - 1 - while (i > -1) { - val b = seq.charAt(i) // format: off if (b != 'A' && b != 'a' && b != 'C' && b != 'c' && @@ -62,103 +58,89 @@ package object seq { b != 'T' && b != 't' && b != ':' && b != ';' && b != '-') return false // format: on - i -= 1 - } - true - } - - /** Returns the number of Ns in a given DNA sequence */ - final def nCount(seq: String): Int = { - var n = 0 - var i = seq.length - 1 - while (i > -1) { - if (seq.charAt(i) == 'N') n += 1 i -= 1 } - n - } - - final val NoN: Int = -1 - final val PolyN: Int = -2 - - // yes this is terrible and I hate it - // if seq contains no Ns, returns -1 - // if seq contains precisely 1 N, returns the index of the N - // if seq contains more than 1 N, returns -2 - final def singleNIndex(seq: Array[Char]): Int = { - var n = NoN - var i = seq.length - 1 - while (i > -1) { - if (seq(i) == 'N') { - if (n > -1) return PolyN - else n = i - } - i -= 1 - } - n - } - - /** Returns the number of Ns in a given DNA sequence up to a maximum */ - final def nCount(seq: String, max: Int): Int = { - var n = 0 - var i = seq.length - 1 - while (i > -1) { - if (seq.charAt(i) == 'N') n += 1 - if (n > 0 && n >= max) return n - i -= 1 - } - n - } - - /** Returns the number of Ns in a give DNA sequence */ - final def nCount(seq: Array[Char]): Int = { - var i = seq.length - 1 - var n = 0 - while (i > -1) { - if (seq(i) == 'N') n += 1 - i -= 1 - } - n - } - - /** Returns the number of Ns in a give DNA sequence up to a maximum */ - final def nCount(seq: Array[Char], max: Int): Int = { - var i = seq.length - 1 - var n = 0 - while (i > -1) { - if (seq(i) == 'N') n += 1 - if (n > 0 && n >= max) return n - i -= 1 - } - n - } - - /** Returns true if the provided DNA sequence contains an N */ - final def containsN(seq: Array[Char]): Boolean = { - var i = seq.length - 1 - while (i > -1) { - if (seq(i) == 'N') return true - i -= 1 - } - false - } - - final def containsN(seq: String): Boolean = { - var i = seq.length - 1 - while (i > -1) { - if (seq.charAt(i) == 'N') return true - i -= 1 - } - false - } - - final def countMismatches(s1: CharSequence, s2: CharSequence): Int = { - require(s1.length == s2.length, "Strings must be of the same length") - var distance = 0 - for (i <- 0 until s1.length) { - if (s1.charAt(i) != s2.charAt(i)) distance += 1 - } - distance + true } -} +/** Returns the number of Ns in a given DNA sequence */ +final def nCount(seq: String): Int = + var n = 0 + var i = seq.length - 1 + while i > -1 do + if seq.charAt(i) == 'N' then n += 1 + i -= 1 + n + +final val NoN: Int = -1 +final val PolyN: Int = -2 + +// yes this is terrible and I hate it +// if seq contains no Ns, returns -1 +// if seq contains precisely 1 N, returns the index of the N +// if seq contains more than 1 N, returns -2 +final def singleNIndex(seq: Array[Char]): Int = + var n = NoN + var i = seq.length - 1 + while i > -1 do + if seq(i) == 'N' then + if n > -1 then return PolyN + else n = i + i -= 1 + n + +end singleNIndex + +/** Returns the number of Ns in a given DNA sequence up to a maximum */ +final def nCount(seq: String, max: Int): Int = + var n = 0 + var i = seq.length - 1 + while i > -1 do + if seq.charAt(i) == 'N' then n += 1 + if n > 0 && n >= max then return n + i -= 1 + n + +end nCount + +/** Returns the number of Ns in a give DNA sequence */ +final def nCount(seq: Array[Char]): Int = + var i = seq.length - 1 + var n = 0 + while i > -1 do + if seq(i) == 'N' then n += 1 + i -= 1 + n + +/** Returns the number of Ns in a give DNA sequence up to a maximum */ +final def nCount(seq: Array[Char], max: Int): Int = + var i = seq.length - 1 + var n = 0 + while i > -1 do + if seq(i) == 'N' then n += 1 + if n > 0 && n >= max then return n + i -= 1 + n + +end nCount + +/** Returns true if the provided DNA sequence contains an N */ +final def containsN(seq: Array[Char]): Boolean = + var i = seq.length - 1 + while i > -1 do + if seq(i) == 'N' then return true + i -= 1 + false + +final def containsN(seq: String): Boolean = + var i = seq.length - 1 + while i > -1 do + if seq.charAt(i) == 'N' then return true + i -= 1 + false + +final def countMismatches(s1: CharSequence, s2: CharSequence): Int = + require(s1.length == s2.length, "Strings must be of the same length") + var distance = 0 + for i <- 0 until s1.length do if s1.charAt(i) != s2.charAt(i) then distance += 1 + distance diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/types/LongTuple.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/types/LongTuple.scala index 8f21bef..269a8be 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/types/LongTuple.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/types/LongTuple.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,26 +7,22 @@ package org.broadinstitute.gpp.poolq3.types import it.unimi.dsi.fastutil.HashCommon -final class LongTuple(val _1: Long, val _2: Long) { +final class LongTuple(val _1: Long, val _2: Long): override def equals(other: Any): Boolean = - other match { + other match case that: LongTuple if this._1 == that._1 && this._2 == that._2 => true case _ => false - } - override def hashCode(): Int = { + override def hashCode(): Int = val h1 = HashCommon.murmurHash3(_1).toInt val h2 = HashCommon.murmurHash3(_2).toInt (h1 & 0x7fffffff) * 16661 + (h2 & 0x7fffffff) - } -} +end LongTuple -object LongTuple { +object LongTuple: def apply(l1: Long, l2: Long): LongTuple = new LongTuple(l1, l2) def unapply(lt: LongTuple): Option[(Long, Long)] = Some((lt._1, lt._2)) - -} diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/types/OutputFileType.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/types/OutputFileType.scala index 3dfccec..8c27f0c 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/types/OutputFileType.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/types/OutputFileType.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQException.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQException.scala index 173a118..d3a0eff 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQException.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQException.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQRunSummary.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQRunSummary.scala index fa500fb..d296fbd 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQRunSummary.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQRunSummary.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQSummary.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQSummary.scala index 33fad18..a465d6c 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQSummary.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQSummary.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/types/Read.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/types/Read.scala index a40a662..fe2e2d3 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/types/Read.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/types/Read.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/types/ReadIdCheckPolicy.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/types/ReadIdCheckPolicy.scala index f355c06..fe451b6 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/types/ReadIdCheckPolicy.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/types/ReadIdCheckPolicy.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,65 +7,58 @@ package org.broadinstitute.gpp.poolq3.types import scala.annotation.tailrec -sealed trait ReadIdCheckPolicy { +sealed trait ReadIdCheckPolicy: def check(r1: Read, r2: Read): Unit def name: String -} -object ReadIdCheckPolicy { +object ReadIdCheckPolicy: - def forName(s: String): ReadIdCheckPolicy = s.toLowerCase() match { + def forName(s: String): ReadIdCheckPolicy = s.toLowerCase() match case "lax" => ReadIdCheckPolicy.Lax case "strict" => ReadIdCheckPolicy.Strict case "illumina" => ReadIdCheckPolicy.Illumina case _ => throw new IllegalArgumentException(s"$s is not a read ID check policy") - } - case object Lax extends ReadIdCheckPolicy { + case object Lax extends ReadIdCheckPolicy: def check(r1: Read, r2: Read): Unit = () val name: String = "lax" - } - case object Strict extends ReadIdCheckPolicy { + case object Strict extends ReadIdCheckPolicy: def check(r1: Read, r2: Read): Unit = - if (r1.id != r2.id) throw UncoordinatedReadsException(r1.id, r2.id) + if r1.id != r2.id then throw UncoordinatedReadsException(r1.id, r2.id) else () val name: String = "strict" - } - - case object Illumina extends ReadIdCheckPolicy { + case object Illumina extends ReadIdCheckPolicy: // we check all the characters in the ID up to the first space we find; the pattern we are matching is: // @::::::: ::: // for details, see // https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/FileFormat_FASTQ-files_swBS.htm - def check(r1: Read, r2: Read): Unit = { + def check(r1: Read, r2: Read): Unit = val rid1 = r1.id val rid2 = r2.id val end = math.min(rid1.length(), rid2.length()) @tailrec - def loop(i: Int): Unit = { - if (i >= end) () - else { + def loop(i: Int): Unit = + if i >= end then () + else val c1 = rid1.charAt(i) val c2 = rid2.charAt(i) - if (c1 == ' ' && c2 == ' ') () - else { - if (c1 != c2) throw UncoordinatedReadsException(rid1, rid2) + if c1 == ' ' && c2 == ' ' then () + else + if c1 != c2 then throw UncoordinatedReadsException(rid1, rid2) loop(i + 1) - } - } - } loop(1) // first char is assumed to be `@` - } + + end check val name = "illumina" - } + end Illumina -} +end ReadIdCheckPolicy diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/types/ReadsFileType.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/types/ReadsFileType.scala index 4f01aef..e772c75 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/types/ReadsFileType.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/types/ReadsFileType.scala @@ -1,37 +1,32 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.types -sealed trait ReadsFileType extends Product with Serializable { +sealed trait ReadsFileType extends Product with Serializable: def displayName: String -} -case object FastqType extends ReadsFileType { +case object FastqType extends ReadsFileType: override val displayName: String = "FASTQ" -} -case object SamType extends ReadsFileType { +case object SamType extends ReadsFileType: override val displayName: String = "SAM" -} -case object BamType extends ReadsFileType { +case object BamType extends ReadsFileType: override val displayName: String = "BAM" -} -case object TextType extends ReadsFileType { +case object TextType extends ReadsFileType: override val displayName: String = "text" -} -object ReadsFileType { +object ReadsFileType: def fromFilename(n: String): Option[ReadsFileType] = - if (n.endsWith(".fastq") || n.endsWith(".fastq.gz")) Some(FastqType) - else if (n.endsWith(".sam")) Some(SamType) - else if (n.endsWith(".bam")) Some(BamType) - else if (n.endsWith(".txt") || n.endsWith(".txt.gz")) Some(TextType) + if n.endsWith(".fastq") || n.endsWith(".fastq.gz") then Some(FastqType) + else if n.endsWith(".sam") then Some(SamType) + else if n.endsWith(".bam") then Some(BamType) + else if n.endsWith(".txt") || n.endsWith(".txt.gz") then Some(TextType) else None -} +end ReadsFileType diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/types/UncoordinatedReadsException.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/types/UncoordinatedReadsException.scala index a79f6ae..eb5d5f8 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/types/UncoordinatedReadsException.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/types/UncoordinatedReadsException.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/PoolQTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/PoolQTest.scala index dc5988f..748f406 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/PoolQTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/PoolQTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,7 +10,7 @@ import java.nio.file.Paths import munit.FunSuite import org.broadinstitute.gpp.poolq3.parser.{ReferenceData, ReferenceEntry} -class PoolQTest extends FunSuite { +class PoolQTest extends FunSuite: test("makeRowBarcodePolicy works for paired end cases") { val rd = new ReferenceData(List(ReferenceEntry("AAAA;CCCC", "a sea"))) @@ -34,4 +34,4 @@ class PoolQTest extends FunSuite { assert(revPolOpt.isEmpty) } -} +end PoolQTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/TestResources.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/TestResources.scala index 2d8be1e..48f64d5 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/TestResources.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/TestResources.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,18 +7,16 @@ package org.broadinstitute.gpp.poolq3 import java.nio.file.{Path, Paths} -trait TestResources { +trait TestResources: - def resourcePath(clazz: Class[_], name: String): Path = { + def resourcePath(clazz: Class[?], name: String): Path = val r = clazz.getResource(name) require(r != null, s"Can't find resource at path $name for ${getClass.getName}") Paths.get(r.getPath) - } - def resourcePath(name: String): Path = { + def resourcePath(name: String): Path = val r = this.getClass.getResource(name) require(r != null, s"Can't find resource at path $name for ${getClass.getName}") Paths.get(r.getPath) - } -} +end TestResources diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/BarcodePackageTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/BarcodePackageTest.scala index 3bd5975..6465000 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/BarcodePackageTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/BarcodePackageTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -8,9 +8,9 @@ package org.broadinstitute.gpp.poolq3.barcode import munit.FunSuite import org.broadinstitute.gpp.poolq3.parser.CloseableIterator -class BarcodePackageTest extends FunSuite { +class BarcodePackageTest extends FunSuite: - private class TestIterator(n: Int) extends CloseableIterator[Int] { + private class TestIterator(n: Int) extends CloseableIterator[Int]: private var closed = false private val internal = Range(0, n).iterator @@ -22,18 +22,18 @@ class BarcodePackageTest extends FunSuite { override def hasNext: Boolean = internal.hasNext override def next(): Int = internal.next() - } + + end TestIterator test("parserFor") { // we need to keep references to the iterators we make var iters: List[TestIterator] = Nil val iterable = parserFor[Int, Int]( List(2, 2, 3), - x => { + x => val ret = new TestIterator(x) iters ::= ret ret - } ) // get the whole iterator, convert it to a list, and close it (closes the last iterator) @@ -51,11 +51,10 @@ class BarcodePackageTest extends FunSuite { var iters: List[TestIterator] = Nil val iterable = parserFor[Int, Int]( List(2, 2, 3), - x => { + x => val ret = new TestIterator(x) iters ::= ret ret - } ) // get the whole iterator, convert it to a list, and close it (closes the last iterator) @@ -73,11 +72,10 @@ class BarcodePackageTest extends FunSuite { var iters: List[TestIterator] = Nil val iterable = parserFor[Int, Int]( Nil, - x => { + x => val ret = new TestIterator(x) iters ::= ret ret - } ) // get the whole iterator, convert it to a list, and close it (closes the last iterator) @@ -90,4 +88,4 @@ class BarcodePackageTest extends FunSuite { assert(iters.forall(_.isClosed)) } -} +end BarcodePackageTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/BarcodePolicyTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/BarcodePolicyTest.scala index 21abbfa..815cdf7 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/BarcodePolicyTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/BarcodePolicyTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,7 +7,7 @@ package org.broadinstitute.gpp.poolq3.barcode import munit.FunSuite -class BarcodePolicyTest extends FunSuite { +class BarcodePolicyTest extends FunSuite: test("fixed barcode policy") { assertEquals(BarcodePolicy("FIXED@0", 8, false), FixedOffsetPolicy(0, 8, false)) @@ -56,4 +56,4 @@ class BarcodePolicyTest extends FunSuite { ) } -} +end BarcodePolicyTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedBarcodeSourceTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedBarcodeSourceTest.scala index adbe37a..d93a87d 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedBarcodeSourceTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedBarcodeSourceTest.scala @@ -1,16 +1,16 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.barcode -import cats.syntax.all._ +import cats.syntax.all.* import munit.FunSuite import org.broadinstitute.gpp.poolq3.parser.DmuxedIterable import org.broadinstitute.gpp.poolq3.types.Read -class DmuxedBarcodeSourceTest extends FunSuite { +class DmuxedBarcodeSourceTest extends FunSuite: private[this] val rowPolicy = BarcodePolicy("FIXED@0", 10, skipShortReads = true) @@ -64,4 +64,4 @@ class DmuxedBarcodeSourceTest extends FunSuite { assertEquals(src.toList, Nil) } -} +end DmuxedBarcodeSourceTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedPairedEndBarcodeSourceTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedPairedEndBarcodeSourceTest.scala index 19e405c..e9e443f 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedPairedEndBarcodeSourceTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedPairedEndBarcodeSourceTest.scala @@ -1,16 +1,16 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.barcode -import cats.syntax.all._ +import cats.syntax.all.* import munit.FunSuite import org.broadinstitute.gpp.poolq3.parser.DmuxedIterable import org.broadinstitute.gpp.poolq3.types.{Read, ReadIdCheckPolicy} -class DmuxedPairedEndBarcodeSourceTest extends FunSuite { +class DmuxedPairedEndBarcodeSourceTest extends FunSuite: private[this] val rowPolicy = BarcodePolicy("FIXED@0", 4, skipShortReads = true) private[this] val revRowPolicy = BarcodePolicy("FIXED@0", 3, skipShortReads = true) @@ -74,4 +74,4 @@ class DmuxedPairedEndBarcodeSourceTest extends FunSuite { assertEquals(src.toList, Nil) } -} +end DmuxedPairedEndBarcodeSourceTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedTest.scala index 9f3a902..43f5207 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/DmuxedTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,7 +7,7 @@ package org.broadinstitute.gpp.poolq3.barcode import munit.FunSuite -class DmuxedTest extends FunSuite { +class DmuxedTest extends FunSuite: test("extracting a barcode with Ns from an illumina read") { assertEquals( @@ -31,4 +31,4 @@ class DmuxedTest extends FunSuite { assertEquals(Dmuxed.barcodeFromId(8)("@A01379:680:HC37HDRX3:1:2101:3224:1000 1:N:0:TGCGAGG"), None) } -} +end DmuxedTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/FixedOffsetPolicyTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/FixedOffsetPolicyTest.scala index df22e1a..81757cf 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/FixedOffsetPolicyTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/FixedOffsetPolicyTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -8,10 +8,10 @@ package org.broadinstitute.gpp.poolq3.barcode import org.broadinstitute.gpp.poolq3.gen.{acgtn, dnaSeq} import org.broadinstitute.gpp.poolq3.types.Read import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ -import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._ +import org.scalatest.matchers.should.Matchers.* +import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks.* -class FixedOffsetPolicyTest extends AnyFlatSpec { +class FixedOffsetPolicyTest extends AnyFlatSpec: "find" should "find the barcode in the read" in { forAll(dnaSeq(acgtn), dnaSeq(acgtn), dnaSeq(acgtn)) { (a: String, b: String, c: String) => @@ -37,4 +37,4 @@ class FixedOffsetPolicyTest extends AnyFlatSpec { } } -} +end FixedOffsetPolicyTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/IndexOfKnownPrefixPolicyTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/IndexOfKnownPrefixPolicyTest.scala index 2654819..8293c64 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/IndexOfKnownPrefixPolicyTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/IndexOfKnownPrefixPolicyTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,10 +9,10 @@ import org.broadinstitute.gpp.poolq3.gen.{acgt, acgtn, dnaSeqMaxN, dnaSeqOfN} import org.broadinstitute.gpp.poolq3.types.Read import org.scalacheck.Gen import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ -import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._ +import org.scalatest.matchers.should.Matchers.* +import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks.* -class IndexOfKnownPrefixPolicyTest extends AnyFlatSpec { +class IndexOfKnownPrefixPolicyTest extends AnyFlatSpec: val fixed = "NNNNNNNNNNNN" @@ -55,4 +55,4 @@ class IndexOfKnownPrefixPolicyTest extends AnyFlatSpec { } } -} +end IndexOfKnownPrefixPolicyTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KeyMaskTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KeyMaskTest.scala index 7a7aa03..7a471f6 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KeyMaskTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KeyMaskTest.scala @@ -1,14 +1,14 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.barcode import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ +import org.scalatest.matchers.should.Matchers.* -class KeyMaskTest extends AnyFlatSpec { +class KeyMaskTest extends AnyFlatSpec: "KeyMask.apply" should "construct the correct key mask from a pattern" in { val km0 = KeyMask("NNNNNNNNNNNNNNNNN") @@ -77,4 +77,4 @@ class KeyMaskTest extends AnyFlatSpec { KeyMask.parsePatternRanges("nNNnNNn") should be(List(KeyRange(1, 2), KeyRange(4, 5))) } -} +end KeyMaskTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KeyMaskTest2.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KeyMaskTest2.scala index c58d8a7..27ba115 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KeyMaskTest2.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KeyMaskTest2.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -11,7 +11,7 @@ import munit.FunSuite * its primary test class were lifted. We will try not to modify the copied test classes to make subsequent updates * from FISHR easier. Instead, new PoolQ-specific tests will live here. */ -class KeyMaskTest2 extends FunSuite { +class KeyMaskTest2 extends FunSuite: test("construct the correct key mask from a pattern") { // 0 1 2 @@ -23,4 +23,4 @@ class KeyMaskTest2 extends FunSuite { assertEquals(km0.keyLengthInBases, 10) } -} +end KeyMaskTest2 diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KeyRangeTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KeyRangeTest.scala index 3d10286..076af28 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KeyRangeTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KeyRangeTest.scala @@ -1,17 +1,17 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.barcode import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ +import org.scalatest.matchers.should.Matchers.* /** @author * Broad Institute Genetic Perturbation Platform */ -class KeyRangeTest extends AnyFlatSpec { +class KeyRangeTest extends AnyFlatSpec: "KeyRange" should "enforce well-formedness" in { val _ = noException should be thrownBy KeyRange(3, 4) @@ -43,4 +43,4 @@ class KeyRangeTest extends AnyFlatSpec { an[IllegalArgumentException] should be thrownBy KeyRange("6-5") } -} +end KeyRangeTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KmpKnownPrefixPolicyTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KmpKnownPrefixPolicyTest.scala index 5764fc2..0da54af 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KmpKnownPrefixPolicyTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KmpKnownPrefixPolicyTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,10 +9,10 @@ import org.broadinstitute.gpp.poolq3.gen.{acgt, acgtn, dnaSeqMaxN, dnaSeqOfN} import org.broadinstitute.gpp.poolq3.types.Read import org.scalacheck.Gen import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ -import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._ +import org.scalatest.matchers.should.Matchers.* +import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks.* -class KmpKnownPrefixPolicyTest extends AnyFlatSpec { +class KmpKnownPrefixPolicyTest extends AnyFlatSpec: val fixed = "NNNNNNNNNNNN" @@ -69,4 +69,4 @@ class KmpKnownPrefixPolicyTest extends AnyFlatSpec { } } -} +end KmpKnownPrefixPolicyTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KnownPrefixPolicyBenchmark.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KnownPrefixPolicyBenchmark.scala index bb38c98..5a438c5 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KnownPrefixPolicyBenchmark.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KnownPrefixPolicyBenchmark.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -8,7 +8,7 @@ package org.broadinstitute.gpp.poolq3.barcode import org.broadinstitute.gpp.poolq3.types.Read import org.scalatest.flatspec.AnyFlatSpec -class KnownPrefixPolicyBenchmark extends AnyFlatSpec { +class KnownPrefixPolicyBenchmark extends AnyFlatSpec: ignore should "compare times" in { val seqs = Seq( @@ -45,4 +45,4 @@ class KnownPrefixPolicyBenchmark extends AnyFlatSpec { } -} +end KnownPrefixPolicyBenchmark diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KnuthMorrisPrattTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KnuthMorrisPrattTest.scala index 30446ef..0f6cc17 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KnuthMorrisPrattTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/KnuthMorrisPrattTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -8,10 +8,10 @@ package org.broadinstitute.gpp.poolq3.barcode import org.broadinstitute.gpp.poolq3.gen.{acgtn, dnaSeq} import org.scalacheck.Gen import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ -import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._ +import org.scalatest.matchers.should.Matchers.* +import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks.* -class KnuthMorrisPrattTest extends AnyFlatSpec { +class KnuthMorrisPrattTest extends AnyFlatSpec: private[this] val prefixGen: Gen[String] = dnaSeq(acgtn).suchThat(!_.contains("CACCG")) @@ -23,4 +23,4 @@ class KnuthMorrisPrattTest extends AnyFlatSpec { } } -} +end KnuthMorrisPrattTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/SplitBarcodePolicyTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/SplitBarcodePolicyTest.scala index eedeab0..e9e3041 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/SplitBarcodePolicyTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/SplitBarcodePolicyTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/TemplatePolicyTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/TemplatePolicyTest.scala index 5818936..167438a 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/TemplatePolicyTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/TemplatePolicyTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,8 +9,8 @@ import org.broadinstitute.gpp.poolq3.gen.{acgt, acgtn, dnaSeqMaxN, dnaSeqOfN} import org.broadinstitute.gpp.poolq3.tools.nanoTimed import org.broadinstitute.gpp.poolq3.types.Read import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ -import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._ +import org.scalatest.matchers.should.Matchers.* +import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks.* class TemplatePolicyTest extends AnyFlatSpec { diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/TwoFileBarcodeSourceTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/TwoFileBarcodeSourceTest.scala index a9912bc..eb5b7f7 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/TwoFileBarcodeSourceTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/barcode/TwoFileBarcodeSourceTest.scala @@ -1,16 +1,16 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.barcode -import cats.syntax.all._ +import cats.syntax.all.* import munit.FunSuite import org.broadinstitute.gpp.poolq3.parser.CloseableIterable import org.broadinstitute.gpp.poolq3.types.{Read, ReadIdCheckPolicy} -class TwoFileBarcodeSourceTest extends FunSuite { +class TwoFileBarcodeSourceTest extends FunSuite: private[this] val rowPolicy = BarcodePolicy("FIXED@0", 10, skipShortReads = true) private[this] val colPolicy = BarcodePolicy("FIXED@0", 4, skipShortReads = true) @@ -25,13 +25,12 @@ class TwoFileBarcodeSourceTest extends FunSuite { val src = new TwoFileBarcodeSource(rowReads, colReads, rowPolicy, colPolicy, umiPolicy, ReadIdCheckPolicy.Strict) - src.iterator.toList match { + src.iterator.toList match case r :: Nil => assertEquals(r.col.map(b => new String(b.barcode)), Some("GGGG")) assertEquals(r.row.map(b => new String(b.barcode)), Some("AAAAAAAAAA")) assertEquals(r.umi.map(b => new String(b.barcode)), Some("TTT")) case _ => fail("This should not happen") - } } -} +end TwoFileBarcodeSourceTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/collection/CollectionPackageTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/collection/CollectionPackageTest.scala index 1dc326e..54d2b7f 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/collection/CollectionPackageTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/collection/CollectionPackageTest.scala @@ -1,18 +1,16 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.collection import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ +import org.scalatest.matchers.should.Matchers.* -class CollectionPackageTest extends AnyFlatSpec { +class CollectionPackageTest extends AnyFlatSpec: "zipWithIndex1" should "produce indexes starting with 1" in { val input = Seq("a", "b", "c") input.iterator.zipWithIndex1.toSeq should be(Seq(("a", 1), ("b", 2), ("c", 3))) } - -} diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/gen/gen.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/gen/gen.scala new file mode 100644 index 0000000..934baf5 --- /dev/null +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/gen/gen.scala @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.broadinstitute.gpp.poolq3.gen + +import org.broadinstitute.gpp.poolq3.seq.Bases +import org.scalacheck.Gen + +type Base = Char + +val acgt: Gen[Base] = Gen.oneOf(Bases) + +val acgtn: Gen[Base] = Gen.oneOf(Bases :+ 'N') + +def nonEmptyDnaSeq(bases: Gen[Base]): Gen[String] = + Gen.nonEmptyListOf(bases).flatMap(_.mkString) + +def dnaSeq(bases: Gen[Base]): Gen[String] = Gen.listOf(bases).flatMap(_.mkString) + +def dnaSeqOfN(bases: Gen[Base], n: Int): Gen[String] = + Gen.listOfN(n, bases).flatMap(_.mkString) + +def dnaSeqMaxN(bases: Gen[Base], n: Int): Gen[String] = Gen.sized(size => dnaSeqOfN(bases, math.abs(size) % n)) + +val barcode: Gen[String] = dnaSeqOfN(acgt, 20) + +val barcodeN: Gen[String] = dnaSeqOfN(acgtn, 20) diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/gen/package.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/gen/package.scala deleted file mode 100644 index 2a16911..0000000 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/gen/package.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ -package org.broadinstitute.gpp.poolq3 - -import org.broadinstitute.gpp.poolq3.seq.Bases -import org.scalacheck.Gen - -package object gen { - - type Base = Char - - val acgt: Gen[Base] = Gen.oneOf(Bases) - - val acgtn: Gen[Base] = Gen.oneOf(Bases :+ 'N') - - def nonEmptyDnaSeq(bases: Gen[Base]): Gen[String] = - Gen.nonEmptyListOf(bases).flatMap(_.mkString) - - def dnaSeq(bases: Gen[Base]): Gen[String] = Gen.listOf(bases).flatMap(_.mkString) - - def dnaSeqOfN(bases: Gen[Base], n: Int): Gen[String] = - Gen.listOfN(n, bases).flatMap(_.mkString) - - def dnaSeqMaxN(bases: Gen[Base], n: Int): Gen[String] = Gen.sized(size => dnaSeqOfN(bases, math.abs(size) % n)) - - val barcode: Gen[String] = dnaSeqOfN(acgt, 20) - - val barcodeN: Gen[String] = dnaSeqOfN(acgtn, 20) - -} diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/hist/OpenHashMapHistogramTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/hist/OpenHashMapHistogramTest.scala index b8ba3fd..30646b5 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/hist/OpenHashMapHistogramTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/hist/OpenHashMapHistogramTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -8,7 +8,7 @@ package org.broadinstitute.gpp.poolq3.hist import munit.{FunSuite, ScalaCheckSuite} import org.scalacheck.Prop.forAll -class OpenHashMapHistogramTest extends FunSuite with ScalaCheckSuite { +class OpenHashMapHistogramTest extends FunSuite with ScalaCheckSuite: test("OpenHashMapHistogram should track frequencies") { val h = new OpenHashMapHistogram[String] @@ -33,4 +33,4 @@ class OpenHashMapHistogramTest extends FunSuite with ScalaCheckSuite { } } -} +end OpenHashMapHistogramTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/hist/ShardedHistogramTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/hist/ShardedHistogramTest.scala index ee96ae8..aec33ce 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/hist/ShardedHistogramTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/hist/ShardedHistogramTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -8,7 +8,7 @@ package org.broadinstitute.gpp.poolq3.hist import munit.{FunSuite, ScalaCheckSuite} import org.scalacheck.Prop.forAll -class ShardedHistogramTest extends FunSuite with ScalaCheckSuite { +class ShardedHistogramTest extends FunSuite with ScalaCheckSuite: test("basic operations") { val h = new BasicShardedHistogram[String, String](new OpenHashMapHistogram) @@ -33,7 +33,7 @@ class ShardedHistogramTest extends FunSuite with ScalaCheckSuite { } property("track frequencies for arbitrary data") { - def key(x: Int): Option[Int] = if (x < 0) None else Some(x) + def key(x: Int): Option[Int] = if x < 0 then None else Some(x) forAll { (data: List[(Int, Int)]) => val actualHistogram = new BasicShardedHistogram[Int, Int](new OpenHashMapHistogram) data.foreach { case (shard, value) => @@ -54,4 +54,4 @@ class ShardedHistogramTest extends FunSuite with ScalaCheckSuite { } } -} +end ShardedHistogramTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/hist/TupleHistogramTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/hist/TupleHistogramTest.scala index cb6629b..1f8e9e7 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/hist/TupleHistogramTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/hist/TupleHistogramTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -8,7 +8,7 @@ package org.broadinstitute.gpp.poolq3.hist import munit.{FunSuite, ScalaCheckSuite} import org.scalacheck.Prop.forAll -class TupleHistogramTest extends FunSuite with ScalaCheckSuite { +class TupleHistogramTest extends FunSuite with ScalaCheckSuite: test("OpenHashMapHistogram should track frequencies") { val h = new TupleHistogram[String] @@ -35,4 +35,4 @@ class TupleHistogramTest extends FunSuite with ScalaCheckSuite { } } -} +end TupleHistogramTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/AmbiguousMatchTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/AmbiguousMatchTest.scala index a21c2ae..0f1b3a7 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/AmbiguousMatchTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/AmbiguousMatchTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -11,9 +11,9 @@ import org.broadinstitute.gpp.poolq3.parser.{CloseableIterable, ReferenceEntry} import org.broadinstitute.gpp.poolq3.process.ScoringConsumer import org.broadinstitute.gpp.poolq3.reference.{ExactReference, VariantReference} import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ +import org.scalatest.matchers.should.Matchers.* -class AmbiguousMatchTest extends AnyFlatSpec { +class AmbiguousMatchTest extends AnyFlatSpec: private val rowReferenceBarcodes = List( "AAAAAAAAAAAAAAAAAAAA", @@ -102,4 +102,4 @@ class AmbiguousMatchTest extends AnyFlatSpec { hist.count(("AATGTGAAAATGTGATGAAT", "CCCC")) should be(0) } -} +end AmbiguousMatchTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/LongBarcodeMatchTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/LongBarcodeMatchTest.scala index af2b173..8467312 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/LongBarcodeMatchTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/LongBarcodeMatchTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -11,9 +11,9 @@ import org.broadinstitute.gpp.poolq3.parser.{CloseableIterable, ReferenceEntry} import org.broadinstitute.gpp.poolq3.process.ScoringConsumer import org.broadinstitute.gpp.poolq3.reference.{ExactReference, VariantReference} import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ +import org.scalatest.matchers.should.Matchers.* -class LongBarcodeMatchTest extends AnyFlatSpec { +class LongBarcodeMatchTest extends AnyFlatSpec: private val rowReferenceBarcodes = List( ReferenceEntry( @@ -127,4 +127,4 @@ class LongBarcodeMatchTest extends AnyFlatSpec { ) should be(1) } -} +end LongBarcodeMatchTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/PairedEndMatchTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/PairedEndMatchTest.scala index ca3af76..a103df5 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/PairedEndMatchTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/PairedEndMatchTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -14,7 +14,7 @@ import org.broadinstitute.gpp.poolq3.parser.{CloseableIterable, ReferenceEntry} import org.broadinstitute.gpp.poolq3.process.ScoringConsumer import org.broadinstitute.gpp.poolq3.reference.ExactReference -class PairedEndMatchTest extends FunSuite { +class PairedEndMatchTest extends FunSuite: // row barcodes private val r1 = "TTTCCC" @@ -84,15 +84,13 @@ class PairedEndMatchTest extends FunSuite { assertEquals(state.exactMatches, 102) val hist = state.known - for { + for row <- rowReference.allBarcodes col <- colReference.allBarcodes tuple = (Some(row), Some(col)) expectedTupleCount = expectedCounts.getOrElse(tuple, 0) - } { - assertEquals(hist.forShard(None).count((row, col)), expectedTupleCount) - } + do assertEquals(hist.forShard(None).count((row, col)), expectedTupleCount) } -} +end PairedEndMatchTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UmiMatchTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UmiMatchTest.scala index 8760a10..2d35f38 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UmiMatchTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UmiMatchTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,7 +7,7 @@ package org.broadinstitute.gpp.poolq3.integration import scala.util.Random -import cats.syntax.all._ +import cats.syntax.all.* import munit.FunSuite import org.broadinstitute.gpp.poolq3.PoolQ import org.broadinstitute.gpp.poolq3.barcode.{Barcodes, FoundBarcode} @@ -15,7 +15,7 @@ import org.broadinstitute.gpp.poolq3.parser.{BarcodeSet, CloseableIterable, Refe import org.broadinstitute.gpp.poolq3.process.ScoringConsumer import org.broadinstitute.gpp.poolq3.reference.ExactReference -class UmiMatchTest extends FunSuite { +class UmiMatchTest extends FunSuite: // row barcodes private[this] val brdn01 = "AAAAAAAAAAAAAAAAAAAA" @@ -86,16 +86,14 @@ class UmiMatchTest extends FunSuite { assertEquals(state.exactMatches, 56) val hist = state.known - for { + for row <- rowReference.allBarcodes col <- colReference.allBarcodes umi <- umiBarcodes.barcodes tuple = (row.some, col.some, umi.some) expectedTupleCount = expectedCounts.getOrElse(tuple, 0) - } { - assertEquals(hist.forShard(umi.some).count((row, col)), expectedTupleCount) - } + do assertEquals(hist.forShard(umi.some).count((row, col)), expectedTupleCount) } -} +end UmiMatchTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnambiguousMatchTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnambiguousMatchTest.scala index 9a835d9..b732f27 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnambiguousMatchTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnambiguousMatchTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -11,9 +11,9 @@ import org.broadinstitute.gpp.poolq3.parser.{CloseableIterable, ReferenceEntry} import org.broadinstitute.gpp.poolq3.process.ScoringConsumer import org.broadinstitute.gpp.poolq3.reference.{ExactReference, VariantReference} import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ +import org.scalatest.matchers.should.Matchers.* -class UnambiguousMatchTest extends AnyFlatSpec { +class UnambiguousMatchTest extends AnyFlatSpec: private val rowReferenceBarcodes = List( "AAAAAAAAAAAAAAAAAAAA", @@ -102,4 +102,4 @@ class UnambiguousMatchTest extends AnyFlatSpec { hist.count(("AATGTGAAAATGTGATGAAT", "CCCC")) should be(0) } -} +end UnambiguousMatchTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnambiguousVariantTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnambiguousVariantTest.scala index 611a1f6..95dbb1b 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnambiguousVariantTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnambiguousVariantTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -11,9 +11,9 @@ import org.broadinstitute.gpp.poolq3.parser.{CloseableIterable, ReferenceEntry} import org.broadinstitute.gpp.poolq3.process.ScoringConsumer import org.broadinstitute.gpp.poolq3.reference.{ExactReference, VariantReference} import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ +import org.scalatest.matchers.should.Matchers.* -class UnambiguousVariantTest extends AnyFlatSpec { +class UnambiguousVariantTest extends AnyFlatSpec: private val rowReferenceBarcodes = List("AAAAAAAAAAAAAAAAAAAA", "AAAAAAAAAAAAAAAAAAAC").map(b => ReferenceEntry(b, b)) @@ -46,13 +46,12 @@ class UnambiguousVariantTest extends AnyFlatSpec { val hist = state.known val _ = hist.count(("AAAAAAAAAAAAAAAAAAAA", "AAAA")) should be(1) - for { + for row <- rowReferenceBarcodes.map(_.dnaBarcode) col <- colReferenceBarcodes.map(_.dnaBarcode) - } { - val expected = if (row.forall(_ == 'A') && col.forall(_ == 'A')) 1 else 0 + do + val expected = if row.forall(_ == 'A') && col.forall(_ == 'A') then 1 else 0 hist.count((row, col)) should be(expected) - } } -} +end UnambiguousVariantTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnlabeledConditionsTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnlabeledConditionsTest.scala index 12c6938..d8112ee 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnlabeledConditionsTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnlabeledConditionsTest.scala @@ -1,11 +1,11 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.integration -import java.nio.file.{Path => JPath} +import java.nio.file.Path as JPath import cats.effect.{IO, Resource} import fs2.io.file.{Files, Path} @@ -15,10 +15,10 @@ import org.broadinstitute.gpp.poolq3.reports.PoolQ2Dialect import org.broadinstitute.gpp.poolq3.testutil.tempFile import org.broadinstitute.gpp.poolq3.{PoolQ, PoolQConfig, PoolQInput, PoolQOutput, TestResources} -class UnlabeledConditionsTest extends CatsEffectSuite with TestResources { +class UnlabeledConditionsTest extends CatsEffectSuite with TestResources: val outputFilesResources: Resource[IO, PoolQOutput] = - for { + for countsFile <- tempFile[IO]("counts", ".txt") barcodeCountsFile <- tempFile[IO]("barcode-counts", ".txt") normalizedCountsFile <- tempFile[IO]("normcounts", ".txt") @@ -27,18 +27,16 @@ class UnlabeledConditionsTest extends CatsEffectSuite with TestResources { correlationFile <- tempFile[IO]("correlation", ".txt") unexpectedSequencesFile <- tempFile[IO]("unexpected", ".txt") runInfoFile <- tempFile[IO]("runinfo", ".txt") - } yield { - PoolQOutput( - countsFile = countsFile, - normalizedCountsFile = normalizedCountsFile, - barcodeCountsFile = barcodeCountsFile, - qualityFile = qualityFile, - conditionBarcodeCountsSummaryFile = conditionBarcodeCountsSummaryFile, - correlationFile = correlationFile, - unexpectedSequencesFile = unexpectedSequencesFile, - runInfoFile = runInfoFile - ) - } + yield PoolQOutput( + countsFile = countsFile, + normalizedCountsFile = normalizedCountsFile, + barcodeCountsFile = barcodeCountsFile, + qualityFile = qualityFile, + conditionBarcodeCountsSummaryFile = conditionBarcodeCountsSummaryFile, + correlationFile = correlationFile, + unexpectedSequencesFile = unexpectedSequencesFile, + runInfoFile = runInfoFile + ) test("Unlabeled sample barcodes aggregate together") { @@ -62,11 +60,10 @@ class UnlabeledConditionsTest extends CatsEffectSuite with TestResources { } - def filesSame(expected: JPath, actual: JPath)(implicit loc: munit.Location): IO[Unit] = { + def filesSame(expected: JPath, actual: JPath)(implicit loc: munit.Location): IO[Unit] = val ef: Stream[IO, String] = Files[IO].readAll(Path.fromNioPath(expected)).through(text.utf8.decode).foldMonoid val af: Stream[IO, String] = Files[IO].readAll(Path.fromNioPath(actual)).through(text.utf8.decode).foldMonoid ef.zip(af).map { case (e, a) => assertEquals(e, a) }.compile.drain - } -} +end UnlabeledConditionsTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/legacy/LegacyIntegrationTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/legacy/LegacyIntegrationTest.scala index 2333ec3..b9bf142 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/legacy/LegacyIntegrationTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/legacy/LegacyIntegrationTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,18 +7,17 @@ package org.broadinstitute.gpp.poolq3.integration.legacy import java.nio.file.Path -import better.files._ +import better.files.* import org.broadinstitute.gpp.poolq3.reports.PoolQ2Dialect import org.broadinstitute.gpp.poolq3.testutil.contents import org.broadinstitute.gpp.poolq3.{PoolQ, PoolQConfig, PoolQInput, PoolQOutput, TestResources} import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ +import org.scalatest.matchers.should.Matchers.* -class LegacyIntegrationTest extends AnyFlatSpec with TestResources { +class LegacyIntegrationTest extends AnyFlatSpec with TestResources: - private[this] def filesSame(actual: Path, expected: Path): Unit = { + private[this] def filesSame(actual: Path, expected: Path): Unit = val _ = contents(actual) should be(contents(expected)) - } /** Tests PoolQ end-to-end, using 10000 reads, 8 constructs, and 42 conditions. Compares the results to expected * results. @@ -28,7 +27,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { * NOTE: the correctness of the results was verified by hand. */ "PoolQ" should "testPoolQReads10000Reference8Conditions42" in { - for { + for countsFile <- File.temporaryFile("counts", ".txt") barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") @@ -38,7 +37,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") runInfoFile <- File.temporaryFile("runinfo", ".txt") - } { + do val config = PoolQConfig( input = PoolQInput( rowReference = resourcePath("reference-8.csv"), @@ -68,11 +67,10 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { filesSame(barcodeCountsFile.path, resourcePath("barcode-counts-10000-8-42.txt")) filesSame(qualityFile.path, resourcePath("quality-10000-8-42.txt")) unexpectedSequenceCacheDir.exists should be(false) - } } "PoolQ" should "optionally not remove the unexpected sequence cache" in { - for { + for countsFile <- File.temporaryFile("counts", ".txt") barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") @@ -82,7 +80,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") runInfoFile <- File.temporaryFile("runinfo", ".txt") - } { + do val config = PoolQConfig( input = PoolQInput( rowReference = resourcePath("reference-8.csv"), @@ -113,7 +111,6 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { filesSame(barcodeCountsFile.path, resourcePath("barcode-counts-10000-8-42.txt")) filesSame(qualityFile.path, resourcePath("quality-10000-8-42.txt")) unexpectedSequenceCacheDir.exists should be(true) - } } /** Tests PoolQ end-to-end, using 42 base reads. The longer reads allow for comparison with the whole construct, and @@ -125,7 +122,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { * the conditions file one successful read each for the first 10 conditions */ it should "testPoolQLongerReads" in { - for { + for countsFile <- File.temporaryFile("counts", ".txt") normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") @@ -135,7 +132,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") runInfoFile <- File.temporaryFile("runinfo", ".txt") - } { + do val config = PoolQConfig( input = PoolQInput( rowReference = resourcePath("reference-8.csv"), @@ -164,14 +161,13 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { filesSame(normalizedCountsFile.toJava.toPath, resourcePath("long-reads-lognorm.txt")) filesSame(barcodeCountsFile.toJava.toPath, resourcePath("long-reads-barcode-counts.txt")) filesSame(qualityFile.toJava.toPath, resourcePath("long-reads-quality.txt")) - } } /** Tests PoolQ end-to-end, using 42-base reads. The longer reads allow for comparison with the whole construct, and * also test the fact that PoolQ ignores any bases in the read that follow the construct. */ it should "testPoolQMultipleBarcodesPerCondition" in { - for { + for countsFile <- File.temporaryFile("counts", ".txt") normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") @@ -181,7 +177,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") runInfoFile <- File.temporaryFile("runinfo", ".txt") - } { + do val config = PoolQConfig( input = PoolQInput( rowReference = resourcePath("reference-8.csv"), @@ -211,14 +207,13 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { filesSame(barcodeCountsFile.toJava.toPath, resourcePath("overlapping-barcode-barcode-counts.txt")) filesSame(correlationFile.toJava.toPath, resourcePath("overlapping-barcode-correlation.txt")) filesSame(qualityFile.toJava.toPath, resourcePath("overlapping-barcode-quality.txt")) - } } /** Tests the case of multiplexed reads, where the reads are long enough to contain the entire construct barcode * sequence. */ it should "testPoolQMultiplexedReads" in { - for { + for countsFile <- File.temporaryFile("counts", ".txt") normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") @@ -228,7 +223,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") runInfoFile <- File.temporaryFile("runinfo", ".txt") - } { + do val config = PoolQConfig( input = PoolQInput( rowReference = resourcePath("next500-reference.txt"), @@ -256,11 +251,10 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { filesSame(countsFile.toJava.toPath, resourcePath("next500-counts.txt")) filesSame(qualityFile.toJava.toPath, resourcePath("next500-quality.txt")) - } } it should "testPoolQMultiplexedShortReads" in { - for { + for countsFile <- File.temporaryFile("counts", ".txt") normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") @@ -270,7 +264,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") runInfoFile <- File.temporaryFile("runinfo", ".txt") - } { + do val config = PoolQConfig( input = PoolQInput( rowReference = resourcePath("next500-reference.txt"), @@ -298,7 +292,6 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { filesSame(countsFile.toJava.toPath, resourcePath("next500-counts.txt")) filesSame(qualityFile.toJava.toPath, resourcePath("next500-quality.txt")) - } } /** Tests PoolQ end-to-end, using 42 base reads. The longer reads allow for comparison with the whole construct, and @@ -306,7 +299,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { * barcode maps to multiple construct IDs. */ it should "testPoolQDuplicateConstructs" in { - for { + for countsFile <- File.temporaryFile("counts", ".txt") normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") @@ -316,7 +309,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") runInfoFile <- File.temporaryFile("runinfo", ".txt") - } { + do val config = PoolQConfig( input = PoolQInput( rowReference = resourcePath("reference-9.csv"), @@ -345,7 +338,6 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { filesSame(normalizedCountsFile.toJava.toPath, resourcePath("duplicate-hp-lognorm.txt")) filesSame(barcodeCountsFile.toJava.toPath, resourcePath("duplicate-hp-barcode-counts.txt")) filesSame(qualityFile.toJava.toPath, resourcePath("duplicate-hp-quality.txt")) - } } -} +end LegacyIntegrationTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/numeric/NumericPackageTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/numeric/NumericPackageTest.scala index 7c74a0d..69dcf3d 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/numeric/NumericPackageTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/numeric/NumericPackageTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,10 +7,10 @@ package org.broadinstitute.gpp.poolq3.numeric import org.scalacheck.Gen import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ -import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._ +import org.scalatest.matchers.should.Matchers.* +import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks.* -class NumericPackageTest extends AnyFlatSpec { +class NumericPackageTest extends AnyFlatSpec: "log2" should "take the log base 2" in { val smallNonNeg = Gen.chooseNum(0.0, 48.0) @@ -30,4 +30,4 @@ class NumericPackageTest extends AnyFlatSpec { } } -} +end NumericPackageTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/BarcodeSetParserTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/BarcodeSetParserTest.scala index eed8ee0..30725f7 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/BarcodeSetParserTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/BarcodeSetParserTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -12,7 +12,7 @@ import org.broadinstitute.gpp.poolq3.TestResources import org.broadinstitute.gpp.poolq3.gen.{acgt, nonEmptyDnaSeq} import org.scalacheck.Prop.forAll -class BarcodeSetParserTest extends FunSuite with ScalaCheckSuite with TestResources { +class BarcodeSetParserTest extends FunSuite with ScalaCheckSuite with TestResources: property("parseBarcode") { forAll(nonEmptyDnaSeq(acgt)) { bc => @@ -30,4 +30,4 @@ class BarcodeSetParserTest extends FunSuite with ScalaCheckSuite with TestResour intercept[InvalidFileException](BarcodeSet(resourcePath("bad-umi.txt"))) } -} +end BarcodeSetParserTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/FastqParserTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/FastqParserTest.scala index caae53f..33e6668 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/FastqParserTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/FastqParserTest.scala @@ -1,15 +1,15 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ package org.broadinstitute.gpp.poolq3.parser -import better.files._ +import better.files.* import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ +import org.scalatest.matchers.should.Matchers.* -class FastqParserTest extends AnyFlatSpec { +class FastqParserTest extends AnyFlatSpec: "FastqParser" should "reject a malformed FASTQ file" in { val data = @@ -22,15 +22,13 @@ class FastqParserTest extends AnyFlatSpec { |+""".stripMargin val file: File = File.newTemporaryFile("FastqParserTest", ".fastq") - try { + try file.overwrite(data) val fqp = new FastqParser(file.path) intercept[InvalidFileException] { fqp.toList } - } finally { - file.delete() - } + finally file.delete() } "FastqParser" should "reject a misaligned FASTQ file" in { @@ -42,15 +40,13 @@ class FastqParserTest extends AnyFlatSpec { |+""".stripMargin val file: File = File.newTemporaryFile("FastqParserTest", ".fastq") - try { + try file.overwrite(data) val fqp = new FastqParser(file.path) intercept[InvalidFileException] { fqp.toList } - } finally { - file.delete() - } + finally file.delete() } it should "parse complete records" in { @@ -64,15 +60,13 @@ class FastqParserTest extends AnyFlatSpec { |+ |=@975@<7""".stripMargin val file: File = File.newTemporaryFile("FastqParserTest", ".fastq") - try { + try file.overwrite(data) val fqp = new FastqParser(file.path) val fqi = fqp.iterator val _ = (fqi.toList should have).length(2) fqi.close() - } finally { - file.delete() - } + finally file.delete() } -} +end FastqParserTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/ParserPackageTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/ParserPackageTest.scala index 3568097..8a1c504 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/ParserPackageTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/ParserPackageTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,7 +9,7 @@ import java.io.{BufferedReader, StringReader} import munit.FunSuite -class ParserPackageTest extends FunSuite { +class ParserPackageTest extends FunSuite: test("skipHeader should skip a header") { val source = @@ -61,4 +61,4 @@ class ParserPackageTest extends FunSuite { assertEquals(r.readLine(), "\"TTTTTTTT;TTTTTTTTT\"\t\"\"") } -} +end ParserPackageTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceDataTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceDataTest.scala index 368d023..2c06690 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceDataTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceDataTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,9 +10,9 @@ import java.io.{BufferedReader, StringReader} import org.broadinstitute.gpp.poolq3.TestResources import org.broadinstitute.gpp.poolq3.reports.PoolQ2Dialect import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ +import org.scalatest.matchers.should.Matchers.* -class ReferenceDataTest extends AnyFlatSpec with TestResources { +class ReferenceDataTest extends AnyFlatSpec with TestResources: "truncateMapping" should "truncate the barcode in a mapping" in { val actual = ReferenceData.truncator(15)("AAAAACCCCCGGGGGTTTTT") @@ -102,4 +102,4 @@ class ReferenceDataTest extends AnyFlatSpec with TestResources { e.msg.exists(_.contains("Here's an ID with no barcode!")) should be(true) } -} +end ReferenceDataTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceEntryTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceEntryTest.scala index 4b28638..dfece06 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceEntryTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/ReferenceEntryTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,7 +7,7 @@ package org.broadinstitute.gpp.poolq3.parser import munit.FunSuite -class ReferenceEntryTest extends FunSuite { +class ReferenceEntryTest extends FunSuite: test("barcodeLengths detects splits") { val cs = "C" * 10 @@ -17,4 +17,4 @@ class ReferenceEntryTest extends FunSuite { assertEquals(re.barcodeLengths, (10, 8)) } -} +end ReferenceEntryTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/SamParserTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/SamParserTest.scala index 208e775..6192449 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/SamParserTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/SamParserTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,7 +10,7 @@ import scala.util.Using import munit.FunSuite import org.broadinstitute.gpp.poolq3.TestResources -class SamParserTest extends FunSuite with TestResources { +class SamParserTest extends FunSuite with TestResources: test("SamParser") { val file = resourcePath("sample.bam") @@ -21,4 +21,4 @@ class SamParserTest extends FunSuite with TestResources { } } -} +end SamParserTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/TextParserTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/TextParserTest.scala index 980e511..52140c3 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/parser/TextParserTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/parser/TextParserTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -13,7 +13,7 @@ import scala.util.Using import munit.FunSuite import org.broadinstitute.gpp.poolq3.types.Read -class TextParserTest extends FunSuite { +class TextParserTest extends FunSuite: test("it should parse a few sequences from a file") { val data = @@ -22,13 +22,12 @@ class TextParserTest extends FunSuite { |TTTTTTTT""".stripMargin val file: Path = Files.createTempFile("TextParserTest", ".txt") - try { + try Using.resource(new BufferedWriter(new FileWriter(file.toFile)))(bw => bw.write(data)) val fqp = new TextParser(file) assertEquals(fqp.toList, List(Read("Line 1", "AACTCACG"), Read("Line 2", "TTGAGTGC"), Read("Line 3", "TTTTTTTT"))) - } finally { + finally val _ = Files.deleteIfExists(file) - } } -} +end TextParserTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/process/ScoringConsumerTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/process/ScoringConsumerTest.scala index c05cdd1..1e83af7 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/process/ScoringConsumerTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/process/ScoringConsumerTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,7 +10,7 @@ import org.broadinstitute.gpp.poolq3.barcode.{Barcodes, FoundBarcode} import org.broadinstitute.gpp.poolq3.parser.{BarcodeSet, ReferenceEntry} import org.broadinstitute.gpp.poolq3.reference.{ExactReference, Reference} -class ScoringConsumerTest extends FunSuite { +class ScoringConsumerTest extends FunSuite: val rowReference: Reference = ExactReference(Seq(ReferenceEntry("AAAAAAAAAA", "Barcode1")), identity, includeAmbiguous = false) @@ -65,10 +65,10 @@ class ScoringConsumerTest extends FunSuite { consumer.consume(barcodes) val state = consumer.state - for { + for r <- rowReference.allBarcodes c <- colReference.allBarcodes - } assertEquals(state.known.count((r, c)), 0) + do assertEquals(state.known.count((r, c)), 0) assertEquals(state.knownCol.count("AAA"), 1) assertEquals(state.reads, 1) @@ -107,10 +107,10 @@ class ScoringConsumerTest extends FunSuite { consumer.consume(barcodes) val state = consumer.state - for { + for r <- rowReference.allBarcodes c <- colReference.allBarcodes - } assertEquals(state.known.count((r, c)), 0) + do assertEquals(state.known.count((r, c)), 0) assertEquals(state.knownCol.count("AAA"), 0) assertEquals(state.reads, 1) @@ -128,10 +128,10 @@ class ScoringConsumerTest extends FunSuite { consumer.consume(barcodes) val state = consumer.state - for { + for r <- rowReference.allBarcodes c <- colReference.allBarcodes - } assertEquals(state.known.count((r, c)), 0) + do assertEquals(state.known.count((r, c)), 0) assertEquals(state.knownCol.count("AAA"), 0) assertEquals(state.reads, 1) @@ -149,10 +149,10 @@ class ScoringConsumerTest extends FunSuite { consumer.consume(barcodes) val state = consumer.state - for { + for r <- rowReference.allBarcodes c <- colReference.allBarcodes - } assertEquals(state.known.count((r, c)), 0) + do assertEquals(state.known.count((r, c)), 0) assertEquals(state.knownCol.count("AAA"), 1) assertEquals(state.reads, 1) @@ -170,10 +170,10 @@ class ScoringConsumerTest extends FunSuite { consumer.consume(barcodes) val state = consumer.state - for { + for r <- rowReference.allBarcodes c <- colReference.allBarcodes - } assertEquals(state.known.count((r, c)), 0) + do assertEquals(state.known.count((r, c)), 0) assertEquals(state.knownCol.count("AAA"), 0) assertEquals(state.reads, 1) @@ -191,10 +191,10 @@ class ScoringConsumerTest extends FunSuite { consumer.consume(barcodes) val state = consumer.state - for { + for r <- rowReference.allBarcodes c <- colReference.allBarcodes - } assertEquals(state.known.count((r, c)), 0) + do assertEquals(state.known.count((r, c)), 0) assertEquals(state.knownCol.count("AAA"), 0) assertEquals(state.reads, 1) @@ -332,4 +332,4 @@ class ScoringConsumerTest extends FunSuite { assertEquals(state.revRowBarcodeStats.max, -1) } -} +end ScoringConsumerTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reference/BkTreeTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reference/BkTreeTest.scala deleted file mode 100644 index 9954f57..0000000 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/reference/BkTreeTest.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ -package org.broadinstitute.gpp.poolq3.reference - -import org.apache.commons.text.similarity.LevenshteinDistance -import org.broadinstitute.gpp.poolq3.gen.barcode -import org.scalacheck.Gen -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ -import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._ - -class BkTreeTest extends AnyFlatSpec { - import Gen.listOfN - - val referenceGen: Gen[List[String]] = listOfN(1000, barcode) - - def levenshtein(x: String, y: String): Int = LevenshteinDistance.getDefaultInstance.apply(x, y) - - "BKTree" should "represent a small dictionary" in { - val t = - new BkTree(levenshtein, Seq("book", "books", "cake", "boo", "boon", "cook", "cape", "cart")) - - val _ = t.query("book", 1) should be(Set("book", "books", "boon", "boo", "cook")) - t.query("booky", 1) should be(Set("book", "books")) - } - - it should "support queries of arbitrary dictionaries" in { - forAll(referenceGen) { (reference: List[String]) => - val tree = new BkTree(levenshtein, reference) - reference.headOption.foreach { query => - (0 until query.length).foreach { i => - val array = query.toCharArray - array(i) = 'N' - val results = tree.query(array.mkString, 1) - results should contain(query) - } - } - } - } - -} diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reference/ExactReferenceTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reference/ExactReferenceTest.scala index 81dcd74..012a60b 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/reference/ExactReferenceTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/reference/ExactReferenceTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,10 +9,10 @@ import org.broadinstitute.gpp.poolq3.gen.barcode import org.broadinstitute.gpp.poolq3.parser.ReferenceEntry import org.scalacheck.Gen import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ -import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._ +import org.scalatest.matchers.should.Matchers.* +import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks.* -class ExactReferenceTest extends AnyFlatSpec { +class ExactReferenceTest extends AnyFlatSpec: val referenceGen: Gen[List[String]] = Gen.listOfN(1000, barcode) "ExactReference" should "find matches for a given barcode" in { @@ -42,4 +42,4 @@ class ExactReferenceTest extends AnyFlatSpec { reference.idsForBarcode("AAAAAAAAAAAAAAAAAAAA") should be(Seq("One")) } -} +end ExactReferenceTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reference/VariantReferenceTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reference/VariantReferenceTest.scala index e9e6fa0..852380a 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/reference/VariantReferenceTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/reference/VariantReferenceTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -12,10 +12,10 @@ import org.broadinstitute.gpp.poolq3.parser.ReferenceEntry import org.broadinstitute.gpp.poolq3.tools.withNs import org.scalacheck.Gen import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ -import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._ +import org.scalatest.matchers.should.Matchers.* +import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks.* -class VariantReferenceTest extends AnyFlatSpec { +class VariantReferenceTest extends AnyFlatSpec: private[this] val referenceGen: Gen[List[String]] = Gen.listOfN(1000, barcode) @@ -74,4 +74,4 @@ class VariantReferenceTest extends AnyFlatSpec { reference2.find("AAAAAAAAAAAAAAAAAAN").sorted should be(barcodes.map(bc => MatchedBarcode(bc.dnaBarcode, 1)).sorted) } -} +end VariantReferenceTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/BarcodeCountsTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/BarcodeCountsTest.scala index 8d6368c..6dfb1d0 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/BarcodeCountsTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/BarcodeCountsTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,7 +9,7 @@ import java.nio.file.{Files, Path} import java.util.function.Predicate import java.util.stream.Collectors -import cats.syntax.all._ +import cats.syntax.all.* import munit.FunSuite import org.broadinstitute.gpp.poolq3.PoolQ import org.broadinstitute.gpp.poolq3.barcode.{Barcodes, FoundBarcode} @@ -19,7 +19,7 @@ import org.broadinstitute.gpp.poolq3.process.{ScoringConsumer, State} import org.broadinstitute.gpp.poolq3.reference.ExactReference import org.broadinstitute.gpp.poolq3.testutil.contents -class BarcodeCountsTest extends FunSuite { +class BarcodeCountsTest extends FunSuite: private[this] val Condition1 = "DMSO" private[this] val Condition2 = "ITMFA" @@ -88,7 +88,7 @@ class BarcodeCountsTest extends FunSuite { test("BarcodeCountsWriter should write a correct counts file") { val outputFile = Files.createTempFile("barcode-counts-file-test", ".txt") - try { + try val consumer = new ScoringConsumer(rowReference, colReference, countAmbiguous = true, false, None, None, false) val ret = PoolQ.runProcess(barcodes, consumer) @@ -105,14 +105,14 @@ class BarcodeCountsTest extends FunSuite { |""".stripMargin assertEquals(contents(outputFile), expected) - } finally { + finally val _ = Files.deleteIfExists(outputFile) - } + end try } test("BarcodeCountsWriter should write a GCT file") { val outputFile = Files.createTempFile("barcode-counts-file-test", ".gct") - try { + try val consumer = new ScoringConsumer(rowReference, colReference, countAmbiguous = true, false, None, None, false) val ret = PoolQ.runProcess(barcodes, consumer) @@ -131,9 +131,9 @@ class BarcodeCountsTest extends FunSuite { |""".stripMargin assertEquals(contents(outputFile), expected) - } finally { + finally val _ = Files.deleteIfExists(outputFile) - } + end try } test("CountsWriter should write UMI files") { @@ -194,12 +194,11 @@ class BarcodeCountsTest extends FunSuite { f.getFileName != aggregateOutputFile.getFileName val bcre = "barcode-counts-file-test-umi-.+-([ACGT]{5}|UNMATCHED-UMI).txt".r - def umiBarcodeFor(f: Path): Option[String] = f.getFileName.toString match { + def umiBarcodeFor(f: Path): Option[String] = f.getFileName.toString match case bcre(umi) => umi.some case _ => None - } - try { + try val _ = BarcodeCountsWriter.write( aggregateOutputFile, aggregateOutputFile.getParent.some, @@ -225,7 +224,7 @@ class BarcodeCountsTest extends FunSuite { umiFiles.forEach { umiFile => val qualifier = umiBarcodeFor(umiFile) qualifier.foreach(q => umiFileQualifiers ::= q) - qualifier match { + qualifier match case None => fail(s"no UMI barcode found for $umiFile") case Some("UNMATCHED-UMI") => val expected = @@ -269,11 +268,11 @@ class BarcodeCountsTest extends FunSuite { assertEquals(contents(umiFile), expected) case Some(bc) => fail(s"Unexpected UMI barcode $bc") - } + end match } // make sure we got everything assertEquals(umiFileQualifiers.toSet, umiBarcodes.barcodes + "UNMATCHED-UMI") - } finally { + finally // delete the named output file val _ = Files.deleteIfExists(aggregateOutputFile) @@ -281,7 +280,7 @@ class BarcodeCountsTest extends FunSuite { Files.list(aggregateOutputFile.getParent).filter(outputFileFilter).forEach { f => val _ = Files.deleteIfExists(f) } - } + end try } -} +end BarcodeCountsTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/ConditionBarcodeCountsSummaryTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/ConditionBarcodeCountsSummaryTest.scala index b0e4340..efcd669 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/ConditionBarcodeCountsSummaryTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/ConditionBarcodeCountsSummaryTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -13,7 +13,7 @@ import org.broadinstitute.gpp.poolq3.parser.ReferenceEntry import org.broadinstitute.gpp.poolq3.process.State import org.broadinstitute.gpp.poolq3.reference.ExactReference -class ConditionBarcodeCountsSummaryTest extends CatsEffectSuite { +class ConditionBarcodeCountsSummaryTest extends CatsEffectSuite: private val Condition1 = "DMSO" private val Condition2 = "ITMFA" @@ -101,4 +101,4 @@ class ConditionBarcodeCountsSummaryTest extends CatsEffectSuite { } -} +end ConditionBarcodeCountsSummaryTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/CorrelationFileTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/CorrelationFileTest.scala index b53b9d4..7f27cc9 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/CorrelationFileTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/CorrelationFileTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -16,9 +16,9 @@ import org.broadinstitute.gpp.poolq3.parser.{CloseableIterable, ReferenceEntry} import org.broadinstitute.gpp.poolq3.process.ScoringConsumer import org.broadinstitute.gpp.poolq3.reference.ExactReference import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ +import org.scalatest.matchers.should.Matchers.* -class CorrelationFileTest extends AnyFlatSpec { +class CorrelationFileTest extends AnyFlatSpec: private[this] val Condition1 = "DMSO" private[this] val Condition2 = "ITMFA" @@ -73,7 +73,7 @@ class CorrelationFileTest extends AnyFlatSpec { "CorrelationFileWriter" should "write a correct correlation file" in { val outputFile = Files.createTempFile("correlation-file-test", ".txt") - try { + try val consumer = new ScoringConsumer(rowReference, colReference, countAmbiguous = true, false, None, None, false) val ret = PoolQ.runProcess(barcodes, consumer) @@ -93,9 +93,9 @@ class CorrelationFileTest extends AnyFlatSpec { // now check the contents contents.mkString should be(expected) } - } finally { + finally val _ = Files.deleteIfExists(outputFile) - } + end try } it should "not write a correlation file for a run with a single condition" in { @@ -106,7 +106,7 @@ class CorrelationFileTest extends AnyFlatSpec { includeAmbiguous = false ) val outputFile = Files.createTempFile("correlation-file-test", "txt") - try { + try val consumer = new ScoringConsumer(rowReference, singleCondRef, countAmbiguous = true, false, None, None, false) val ret = PoolQ.runProcess(barcodes, consumer) @@ -122,9 +122,9 @@ class CorrelationFileTest extends AnyFlatSpec { // be sure also that we didn't actually write anything to the file Using(Source.fromFile(outputFile.toFile))(src => src.getLines().mkString("\n") should be("")) - } finally { + finally val _ = Files.deleteIfExists(outputFile) - } + end try } it should "not write a correlation file for a run with a single row barcode" in { @@ -137,7 +137,7 @@ class CorrelationFileTest extends AnyFlatSpec { includeAmbiguous = false ) val outputFile = Files.createTempFile("correlation-file-test", "txt") - try { + try val consumer = new ScoringConsumer(rowReference, singleCondRef, countAmbiguous = true, false, None, None, false) val ret = PoolQ.runProcess(barcodes, consumer) @@ -153,9 +153,9 @@ class CorrelationFileTest extends AnyFlatSpec { // be sure also that we didn't actually write anything to the file Using(Source.fromFile(outputFile.toFile))(src => src.getLines().mkString("\n") should be("")) - } finally { + finally val _ = Files.deleteIfExists(outputFile) - } + end try } -} +end CorrelationFileTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/CountsTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/CountsTest.scala index 951f408..a8b734f 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/CountsTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/CountsTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,7 +9,7 @@ import java.nio.file.{Files, Path} import java.util.function.Predicate import java.util.stream.Collectors -import cats.syntax.all._ +import cats.syntax.all.* import munit.FunSuite import org.broadinstitute.gpp.poolq3.PoolQ import org.broadinstitute.gpp.poolq3.barcode.{Barcodes, FoundBarcode} @@ -19,7 +19,7 @@ import org.broadinstitute.gpp.poolq3.process.{ScoringConsumer, State} import org.broadinstitute.gpp.poolq3.reference.ExactReference import org.broadinstitute.gpp.poolq3.testutil.contents -class CountsTest extends FunSuite { +class CountsTest extends FunSuite: private[this] val Condition1 = "DMSO" private[this] val Condition2 = "ITMFA" @@ -93,7 +93,7 @@ class CountsTest extends FunSuite { test("CountsWriter write a correct counts file") { val outputFile = Files.createTempFile("counts-file-test", ".txt") - try { + try val consumer = new ScoringConsumer(rowReference, colReference, countAmbiguous = true, false, None, None, false) val ret = PoolQ.runProcess(barcodes, consumer) @@ -110,9 +110,9 @@ class CountsTest extends FunSuite { |""".stripMargin assertEquals(contents(outputFile), expected) - } finally { + finally val _ = Files.deleteIfExists(outputFile) - } + end try } test("CountsWriter should preserve input barcodes") { @@ -123,7 +123,7 @@ class CountsTest extends FunSuite { new OpenHashMapHistogram ) val outputFile = Files.createTempFile("counts-file-test-orig", ".txt") - try { + try val rowReference2 = ExactReference( Seq( ReferenceEntry("CTC:GAG", "Stem Loop"), @@ -143,14 +143,14 @@ class CountsTest extends FunSuite { |""".stripMargin assertEquals(contents(outputFile), expected) - } finally { + finally val _ = Files.deleteIfExists(outputFile) - } + end try } test("CountsWriter should write a GCT file") { val outputFile = Files.createTempFile("counts-file-test", ".gct") - try { + try val consumer = new ScoringConsumer(rowReference, colReference, countAmbiguous = true, false, None, None, false) val ret = PoolQ.runProcess(barcodes, consumer) @@ -169,9 +169,9 @@ class CountsTest extends FunSuite { |""".stripMargin assertEquals(contents(outputFile), expected) - } finally { + finally val _ = Files.deleteIfExists(outputFile) - } + end try } test("CountsWriter should write UMI files") { @@ -232,12 +232,11 @@ class CountsTest extends FunSuite { f.getFileName != aggregateOutputFile.getFileName val bcre = "counts-file-test-umi-.+-([ACGT]{5}|UNMATCHED-UMI).txt".r - def umiBarcodeFor(f: Path): Option[String] = f.getFileName.toString match { + def umiBarcodeFor(f: Path): Option[String] = f.getFileName.toString match case bcre(umi) => umi.some case _ => None - } - try { + try val _ = CountsWriter.write( aggregateOutputFile, aggregateOutputFile.getParent.some, @@ -263,7 +262,7 @@ class CountsTest extends FunSuite { umiFiles.forEach { umiFile => val qualifier = umiBarcodeFor(umiFile) qualifier.foreach(q => umiFileQualifiers ::= q) - qualifier match { + qualifier match case None => fail(s"no UMI barcode found for $umiFile") case Some("UNMATCHED-UMI") => val expected = @@ -307,11 +306,11 @@ class CountsTest extends FunSuite { assertEquals(contents(umiFile), expected) case Some(bc) => fail(s"Unexpected UMI barcode $bc") - } + end match } // make sure we got everything assertEquals(umiFileQualifiers.toSet, umiBarcodes.barcodes + "UNMATCHED-UMI") - } finally { + finally // delete the named output file val _ = Files.deleteIfExists(aggregateOutputFile) @@ -322,7 +321,7 @@ class CountsTest extends FunSuite { .forEach { f => val _ = Files.deleteIfExists(f) } - } + end try } -} +end CountsTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/ReportsTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/ReportsTest.scala index 35f15b9..ddc4768 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/ReportsTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/ReportsTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -14,7 +14,7 @@ import org.broadinstitute.gpp.poolq3.reference.{ExactReference, Reference} import org.scalacheck.Gen import org.scalacheck.Prop.forAll -class ReportsTest extends FunSuite with ScalaCheckSuite { +class ReportsTest extends FunSuite with ScalaCheckSuite: test("writeRowIdentifiers maps barcodes back to their original form") { val reference: Reference = @@ -42,4 +42,4 @@ class ReportsTest extends FunSuite with ScalaCheckSuite { } } -} +end ReportsTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/RunInfoWriterTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/RunInfoWriterTest.scala index b0dc995..25cb131 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/RunInfoWriterTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/RunInfoWriterTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -13,11 +13,11 @@ import scala.util.Using import munit.FunSuite import org.broadinstitute.gpp.poolq3.{BuildInfo, PoolQConfig, PoolQInput, PoolQOutput} -class RunInfoWriterTest extends FunSuite { +class RunInfoWriterTest extends FunSuite: test("runinfo") { val outputFile = Files.createTempFile("runinfo", ".txt") - try { + try val config = PoolQConfig( input = PoolQInput( rowReference = Paths.get("/gpp/reference/reference_20191115.csv"), @@ -71,10 +71,9 @@ class RunInfoWriterTest extends FunSuite { val actual = Using.resource(Source.fromFile(outputFile.toFile))(_.getLines().mkString("\n")) assertEquals(actual, expected) - } finally { - Files.delete(outputFile) - } + finally Files.delete(outputFile) + end try } -} +end RunInfoWriterTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/UmiQualityTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/UmiQualityTest.scala index 8b884bd..e6930c6 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/UmiQualityTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/UmiQualityTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -13,7 +13,7 @@ import org.broadinstitute.gpp.poolq3.hist.{BasicShardedHistogram, OpenHashMapHis import org.broadinstitute.gpp.poolq3.process.State import org.broadinstitute.gpp.poolq3.testutil.contents -class UmiQualityTest extends FunSuite with TestResources { +class UmiQualityTest extends FunSuite with TestResources: test("umi quality report") { val state = new State( @@ -24,14 +24,14 @@ class UmiQualityTest extends FunSuite with TestResources { ) val umi: LazyList[String] = - for { + for b1 <- LazyList('A', 'C', 'G', 'T') b2 <- LazyList('A', 'C', 'G', 'T') b3 <- LazyList('A', 'C', 'G', 'T') b4 <- LazyList('A', 'C', 'G', 'T') b5 <- LazyList('A', 'C', 'G', 'T') b6 <- LazyList('A', 'C', 'G', 'T') - } yield s"$b1$b2$b3$b4$b5$b6" + yield s"$b1$b2$b3$b4$b5$b6" val (expectedUmiLL, rest) = umi.splitAt(96) val expectedUmi = expectedUmiLL.toList.zipWithIndex.map { case (x, i) => (x, i + 1) } @@ -48,7 +48,7 @@ class UmiQualityTest extends FunSuite with TestResources { } val file = Files.createTempFile("umi-quality-", ".txt") - try { + try val _ = UmiQualityWriter.write(file, state) // read the file, split into lines, drop the 1st header @@ -62,10 +62,10 @@ class UmiQualityTest extends FunSuite with TestResources { rest.slice(2, 102).zip(unexpectedUmi.reverse.take(100)).foreach { case (a, e) => assertEquals(a, s"${e._1}\t${e._2}") } - } finally { + finally val _ = Files.deleteIfExists(file) - } + end try } -} +end UmiQualityTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequencesTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequencesTest.scala index 1c9bddb..4be6f06 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequencesTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequencesTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,7 +10,7 @@ import java.nio.file.Files import scala.io.Source import scala.util.{Random, Using} -import better.files._ +import better.files.* import munit.{FunSuite, Location} import org.broadinstitute.gpp.poolq3.barcode.{Barcodes, FoundBarcode} import org.broadinstitute.gpp.poolq3.parser.{CloseableIterable, ReferenceEntry} @@ -18,7 +18,7 @@ import org.broadinstitute.gpp.poolq3.process.{ScoringConsumer, UnexpectedSequenc import org.broadinstitute.gpp.poolq3.reference.{ExactReference, VariantReference} import org.broadinstitute.gpp.poolq3.{PoolQ, TestResources} -class UnexpectedSequencesTest extends FunSuite with TestResources { +class UnexpectedSequencesTest extends FunSuite with TestResources: private[this] val rowReferenceBarcodes = List("AAAAAAAAAAAAAAAAAAAA", "AAAAAAAAAAAAAAAAAAAC", "AAAAAAAAAAAAAAAAAAAG", "AAAAAAAAAAAAAAAAAAAT").map(b => @@ -94,7 +94,7 @@ class UnexpectedSequencesTest extends FunSuite with TestResources { test("read unexpected sequence cache") { val cachePath = resourcePath("unexpected-sequences") val outputFile = Files.createTempFile("unexpected", ".txt") - try { + try val unexpectedReadCount = 9 UnexpectedSequenceWriter @@ -112,31 +112,28 @@ class UnexpectedSequencesTest extends FunSuite with TestResources { val actual = contents.mkString assertEquals(actual, expected) } - } finally { + finally val _ = Files.deleteIfExists(outputFile) - } + end try } test("breadth-first iterator") { import scala.collection.mutable - class TestCachedBarcodes(val colBc: String, iter: Iterator[String]) - extends UnexpectedSequenceWriter.CachedBarcodes { + class TestCachedBarcodes(val colBc: String, iter: Iterator[String]) extends UnexpectedSequenceWriter.CachedBarcodes: var closed = false override def hasNext: Boolean = iter.hasNext override def next(): String = iter.next() override def close(): Unit = closed = true - } val i1 = new TestCachedBarcodes("AAA", Iterator("AAAAAA")) val i2 = new TestCachedBarcodes("CCC", Iterator("AAAAAA", "AAAAAT", "AAAAAA")) val i3 = new TestCachedBarcodes("GGG", Iterator("AAAAAA")) val i4 = new TestCachedBarcodes("TTT", Iterator("AAAAAA")) - val circularBuffer: UnexpectedSequenceWriter.BreadthFirstIterator = { + val circularBuffer: UnexpectedSequenceWriter.BreadthFirstIterator = val readers = mutable.Queue[UnexpectedSequenceWriter.CachedBarcodes](i1, i2, i3, i4) new UnexpectedSequenceWriter.BreadthFirstIterator(readers) - } val barcodes = circularBuffer.toList assertEquals( @@ -158,12 +155,12 @@ class UnexpectedSequencesTest extends FunSuite with TestResources { private def testIt(underlyingBarcodes: List[(String, String)], unexpectedReadCount: Int, maxMapSize: Int)(implicit loc: Location - ): Unit = { + ): Unit = val barcodes = CloseableIterable.ofList(underlyingBarcodes.map { case (row, col) => Barcodes(Some(FoundBarcode(row.toCharArray, 0)), None, Some(FoundBarcode(col.toCharArray, 0)), None) }) val tmpPath = Files.createTempDirectory("unexpected-sequences-test") - try { + try val outputFile = tmpPath.resolve("unexpected-sequences.txt") val cachePath = tmpPath.resolve("cache") val ust = new UnexpectedSequenceTracker(cachePath, colReference) @@ -188,9 +185,11 @@ class UnexpectedSequencesTest extends FunSuite with TestResources { val actual = contents.mkString assertEquals(actual, expected) } - } finally { + finally val _ = tmpPath.toFile.toScala.delete() - } - } -} + end try + + end testIt + +end UnexpectedSequencesTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/seq/SeqPackageTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/seq/SeqPackageTest.scala index 4367b1a..7183b67 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/seq/SeqPackageTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/seq/SeqPackageTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -8,10 +8,10 @@ package org.broadinstitute.gpp.poolq3.seq import org.broadinstitute.gpp.poolq3.gen.{acgtn, barcode, dnaSeq, nonEmptyDnaSeq} import org.scalacheck.Gen import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers._ -import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._ +import org.scalatest.matchers.should.Matchers.* +import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks.* -class SeqPackageTest extends AnyFlatSpec { +class SeqPackageTest extends AnyFlatSpec: "complement" should "complement a string of DNA" in { val _ = complement("") should be("") @@ -40,11 +40,11 @@ class SeqPackageTest extends AnyFlatSpec { it should "be 0 iff and only iff x = y" in { forAll(barcode, barcode) { (x, y) => val xydist = countMismatches(x, y) - if (x != y) { + if x != y then val _ = xydist should be > 0 val _ = countMismatches(x, x) should be(0) countMismatches(y, y) should be(0) - } else xydist should be(0) + else xydist should be(0) } } @@ -90,4 +90,4 @@ class SeqPackageTest extends AnyFlatSpec { } } -} +end SeqPackageTest diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/testutil/package.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/testutil/package.scala deleted file mode 100644 index 9a9273a..0000000 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/testutil/package.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ -package org.broadinstitute.gpp.poolq3 - -import java.nio.file.{Path => JPath} - -import scala.io.Source -import scala.util.Using - -import cats.effect.Resource -import fs2.io.file.Files - -package object testutil { - - def contents(p: JPath): String = Using.resource(Source.fromFile(p.toFile))(_.mkString) - - def tempFile[F[_]: Files](prefix: String, suffix: String): Resource[F, JPath] = - Files[F].tempFile(None, prefix, suffix, None).map(_.toNioPath) - -} diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/testutil/testutil.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/testutil/testutil.scala new file mode 100644 index 0000000..c74ccc8 --- /dev/null +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/testutil/testutil.scala @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.broadinstitute.gpp.poolq3.testutil + +import java.nio.file.Path as JPath + +import scala.io.Source +import scala.util.Using + +import cats.effect.Resource +import fs2.io.file.Files + +def contents(p: JPath): String = Using.resource(Source.fromFile(p.toFile))(_.mkString) + +def tempFile[F[_]: Files](prefix: String, suffix: String): Resource[F, JPath] = + Files[F].tempFile(None, prefix, suffix, None).map(_.toNioPath) diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/tools/package.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/tools/package.scala deleted file mode 100644 index 085e700..0000000 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/tools/package.scala +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ -package org.broadinstitute.gpp.poolq3 - -import scala.util.Random - -import org.broadinstitute.gpp.poolq3.seq.nCount - -package object tools { - - def timed(times: Int)(f: Unit => Unit): Long = { - val range = 1 to times - val t0 = System.currentTimeMillis() - range.foreach(_ => f(())) - val t1 = System.currentTimeMillis() - t1 - t0 - } - - def nanoTimed[A](times: Int)(f: Unit => A): (A, Long) = { - val range = 1 to times - var a: A = f(()) - val t0 = System.nanoTime() - range.foreach(_ => a = f(())) - val t1 = System.nanoTime() - (a, t1 - t0) - } - - def withNs(barcode: String, n: Int): String = { - require(n <= barcode.length) - val bases = barcode.toCharArray - while (nCount(bases) < n) { - bases(Random.nextInt(bases.length)) = 'N' - } - new String(bases) - } - -} diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/tools/tools.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/tools/tools.scala new file mode 100644 index 0000000..190ffc8 --- /dev/null +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/tools/tools.scala @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.broadinstitute.gpp.poolq3.tools + +import scala.util.Random + +import org.broadinstitute.gpp.poolq3.seq.nCount + +def timed(times: Int)(f: Unit => Unit): Long = + val range = 1 to times + val t0 = System.currentTimeMillis() + range.foreach(_ => f(())) + val t1 = System.currentTimeMillis() + t1 - t0 + +def nanoTimed[A](times: Int)(f: Unit => A): (A, Long) = + val range = 1 to times + var a: A = f(()) + val t0 = System.nanoTime() + range.foreach(_ => a = f(())) + val t1 = System.nanoTime() + (a, t1 - t0) + +def withNs(barcode: String, n: Int): String = + require(n <= barcode.length) + val bases = barcode.toCharArray + while nCount(bases) < n do bases(Random.nextInt(bases.length)) = 'N' + new String(bases) diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/types/ReadIdCheckPolicyTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/types/ReadIdCheckPolicyTest.scala index 129890f..a382a02 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/types/ReadIdCheckPolicyTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/types/ReadIdCheckPolicyTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * Copyright (c) 2024 The Broad Institute, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,7 +7,7 @@ package org.broadinstitute.gpp.poolq3.types import munit.FunSuite -class ReadIdCheckPolicyTest extends FunSuite { +class ReadIdCheckPolicyTest extends FunSuite: val pe1 = Read("@SL-HDG:HL3FLBCX3210428:HL3FLBCX3:1:1101:10000:10930 1:N:0:", "") val pe2 = Read("@SL-HDG:HL3FLBCX3210428:HL3FLBCX3:1:1101:10000:10930 2:N:0:", "") @@ -17,10 +17,10 @@ class ReadIdCheckPolicyTest extends FunSuite { val pes = List(pe1, pe2, pe3) val pairedEndTuples = - for { + for a <- pes b <- pes if a != b - } yield (a, b) + yield (a, b) test("Illumina policy checks up to the first space") { pairedEndTuples.foreach { case (a, b) => ReadIdCheckPolicy.Illumina.check(a, b) } @@ -40,4 +40,4 @@ class ReadIdCheckPolicyTest extends FunSuite { } } -} +end ReadIdCheckPolicyTest diff --git a/version.sbt b/version.sbt index 7956062..d5fa47a 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -ThisBuild / version := "3.11.1-SNAPSHOT" +ThisBuild / version := "3.12.0-SNAPSHOT"