Skip to content

Commit

Permalink
hotfix(MergedIntervalTree): Improve performance of MergedIntervalTree…
Browse files Browse the repository at this point in the history
… and reliant code paths. (#18)
  • Loading branch information
Kentalot authored Nov 29, 2022
1 parent f02a43f commit e32af48
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 121 deletions.
2 changes: 2 additions & 0 deletions Ilmn.Das.App.Wittyer.sln
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ReleaseNotes", "ReleaseNote
docs\release-notes\v0.3.2.md = docs\release-notes\v0.3.2.md
docs\release-notes\v0.3.3.md = docs\release-notes\v0.3.3.md
docs\release-notes\v0.3.4.md = docs\release-notes\v0.3.4.md
docs\release-notes\v0.3.5.md = docs\release-notes\v0.3.5.md
docs\release-notes\v0.3.5.1.md = docs\release-notes\v0.3.5.1.md
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "examples", "examples", "{3CCBABA0-BF50-4691-86B3-B2AC662A4F30}"
Expand Down
6 changes: 3 additions & 3 deletions Ilmn.Das.App.Wittyer/Ilmn.Das.App.Wittyer.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

<PropertyGroup>
<TargetFramework>netcoreapp2.0</TargetFramework>
<Version>0.3.5.0</Version>
<AssemblyVersion>0.3.5.0</AssemblyVersion>
<FileVersion>0.3.5.0</FileVersion>
<Version>0.3.5.1</Version>
<AssemblyVersion>0.3.5.1</AssemblyVersion>
<FileVersion>0.3.5.1</FileVersion>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
Expand Down
53 changes: 26 additions & 27 deletions Ilmn.Das.App.Wittyer/Infrastructure/Quantify.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Collections.Immutable;
Expand All @@ -19,7 +19,6 @@
using Ilmn.Das.Std.BioinformaticUtils.Contigs;
using Ilmn.Das.Std.BioinformaticUtils.GenomicFeatures;
using Ilmn.Das.Std.VariantUtils.SimpleVariants;
using JetBrains.Annotations;

namespace Ilmn.Das.App.Wittyer.Infrastructure
{
Expand All @@ -39,10 +38,9 @@ private static readonly IReadOnlyList<MatchEnum> GenotypeMatchTypes
/// <param name="isGenotypeEvaluated">if set to <c>true</c> [is genotype evaluated].</param>
/// <param name="inputSpecs">The input specs.</param>
/// <returns></returns>
[NotNull]
public static SampleMetrics GenerateSampleStats([NotNull] IWittyerResult truth,
[NotNull] IWittyerResult query, bool isGenotypeEvaluated,
[NotNull] IReadOnlyDictionary<WittyerType, InputSpec> inputSpecs)
public static SampleMetrics GenerateSampleStats(IWittyerResult truth,
IWittyerResult query, bool isGenotypeEvaluated,
IReadOnlyDictionary<WittyerType, InputSpec> inputSpecs)
{
var perTypeBinnedDictionary =
inputSpecs.ToDictionary(kvp => kvp.Key,
Expand Down Expand Up @@ -105,29 +103,29 @@ public static SampleMetrics GenerateSampleStats([NotNull] IWittyerResult truth,

private static (IBasicStatsCount overallBaseStats,
IReadOnlyDictionary<WittyerType, IBasicStatsCount> perTypeOverallBaseStats)
GenerateStats([NotNull] IReadOnlyDictionary<WittyerType, InputSpec> inputSpecs,
[NotNull] IDictionary<WittyerType, BinnedDictionary> perTypeBinnedDictionary,
[NotNull] IWittyerResult result, bool isGenotypeEvaluated, WitDecision falseDecision,
[NotNull] Func<IMutableStats, IMutableEventStatsCount> eventsStatsSelector,
[NotNull] Func<MutableEventAndBasesStats, IMutableBaseStatsCount> baseStatsSelector)
GenerateStats(IReadOnlyDictionary<WittyerType, InputSpec> inputSpecs,
IDictionary<WittyerType, BinnedDictionary> perTypeBinnedDictionary,
IWittyerResult result, bool isGenotypeEvaluated, WitDecision falseDecision,
Func<IMutableStats, IMutableEventStatsCount> eventsStatsSelector,
Func<MutableEventAndBasesStats, IMutableBaseStatsCount> baseStatsSelector)
{
// tracks the summary OverallStats total base stats
var grandTotalDictionary = new ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>();
var grandTotalTpDictionary = new ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>();
var grandTotalDictionary = new ConcurrentDictionary<IContigInfo, List<IInterval<uint>>>();
var grandTotalTpDictionary = new ConcurrentDictionary<IContigInfo, List<IInterval<uint>>>();

// tracks the per type OverallStats for bases
var perTypeTotalDictionary = new ConcurrentDictionary<WittyerType,
ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>>();
ConcurrentDictionary<IContigInfo, List<IInterval<uint>>>>();
var perTypeTotalTpDictionary = new ConcurrentDictionary<WittyerType,
ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>>();
ConcurrentDictionary<IContigInfo, List<IInterval<uint>>>>();

// tracks the Per Type Per bin base stats
var perTypePerBinTotalDictionary =
new ConcurrentDictionary<WittyerType, ConcurrentDictionary<uint,
ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>>>();
ConcurrentDictionary<IContigInfo, List<IInterval<uint>>>>>();
var perTypePerBinTpDictionary =
new ConcurrentDictionary<WittyerType, ConcurrentDictionary<uint,
ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>>>();
ConcurrentDictionary<IContigInfo, List<IInterval<uint>>>>>();

foreach (var (type, variants) in result.Variants
.Select(kvp => (kvp.Key, kvp.Value.AsEnumerable<IWittyerSimpleVariant>()))
Expand All @@ -137,13 +135,13 @@ private static (IBasicStatsCount overallBaseStats,
var bedRegion = inputSpecs[type].IncludedRegions?.IntervalTree;
var statsBinnedDictionary = perTypeBinnedDictionary[type];
var perBinTotalDictionary = perTypePerBinTotalDictionary.GetOrAdd(type,
_ => new ConcurrentDictionary<uint, ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>>());
_ => new ConcurrentDictionary<uint, ConcurrentDictionary<IContigInfo, List<IInterval<uint>>>>());
var perBinTpDictionary = perTypePerBinTpDictionary.GetOrAdd(type,
_ => new ConcurrentDictionary<uint, ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>>());
_ => new ConcurrentDictionary<uint, ConcurrentDictionary<IContigInfo, List<IInterval<uint>>>>());
var typeTotalTrees = perTypeTotalDictionary.GetOrAdd(type,
_ => new ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>());
_ => new ConcurrentDictionary<IContigInfo, List<IInterval<uint>>>());
var typeTotalTpTrees = perTypeTotalTpDictionary.GetOrAdd(type,
_ => new ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>());
_ => new ConcurrentDictionary<IContigInfo, List<IInterval<uint>>>());

foreach (var binGroup in variants
.GroupBy(v => v.Win.Start))
Expand Down Expand Up @@ -249,8 +247,9 @@ private static (IBasicStatsCount overallBaseStats,
var totalTree = perTypePerBinTotalTrees[chr];
if (stats == null)
stats = baseStatsSelector((MutableEventAndBasesStats) mutableStats);
if (perTypePerBinTpTrees.TryGetValue(chr, out var tpTree))
if (perTypePerBinTpTrees.TryGetValue(chr, out var tpTreeUnmerged))
{
var tpTree = tpTreeUnmerged.ToMergedIntervalTree();
foreach (var wholeInterval in totalTree)
{
var overlaps = tpTree.Search(wholeInterval).ToList();
Expand Down Expand Up @@ -289,15 +288,15 @@ private static (IBasicStatsCount overallBaseStats,
// must filter out keys that don't have base level stats for better cleanliness in case we want to not output Json stats etc for these
typedOverBases);

MergedIntervalTree<uint> GetOrAddTree(in ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>> dict,
List<IInterval<uint>> GetOrAddTree(in ConcurrentDictionary<IContigInfo, List<IInterval<uint>>> dict,
in IWittyerSimpleVariant variant)
=> dict.GetOrAdd(variant.Contig, _ => MergedIntervalTree.Create<uint>());
=> dict.GetOrAdd(variant.Contig, _ => new List<IInterval<uint>>());

ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>> GetOrAddGenomeTree<T>(
in ConcurrentDictionary<T, ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>>
ConcurrentDictionary<IContigInfo, List<IInterval<uint>>> GetOrAddGenomeTree<T>(
in ConcurrentDictionary<T, ConcurrentDictionary<IContigInfo, List<IInterval<uint>>>>
perBinTpDictionary, in IGrouping<T, IWittyerSimpleVariant> binGroup)
=> perBinTpDictionary.GetOrAdd(binGroup.Key,
_ => new ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>());
_ => new ConcurrentDictionary<IContigInfo, List<IInterval<uint>>>());
}
}
}
33 changes: 13 additions & 20 deletions Ilmn.Das.App.Wittyer/Input/IncludeBedFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,17 @@ public class IncludeBedFile
/// <summary>
/// The IntervalTree from this bed file.
/// </summary>
[NotNull]
public GenomeIntervalTree<IContigAndInterval> IntervalTree => _intervalTree.Value;
[NotNull] private readonly Lazy<GenomeIntervalTree<IContigAndInterval>> _intervalTree;
private readonly Lazy<GenomeIntervalTree<IContigAndInterval>> _intervalTree;

/// <summary>
/// The bed file associated with this instance. If created from <see cref="CreateFromContigIntervals"/>, this will write out a bed file.
/// </summary>
[NotNull] public FileInfo BedFile => _fileSource.Value;
public FileInfo BedFile => _fileSource.Value;
private readonly Lazy<FileInfo> _fileSource;

private IncludeBedFile([NotNull] Lazy<GenomeIntervalTree<IContigAndInterval>> tree,
[NotNull] Lazy<FileInfo> fileSource)
private IncludeBedFile(Lazy<GenomeIntervalTree<IContigAndInterval>> tree,
Lazy<FileInfo> fileSource)
{
_intervalTree = tree;
_fileSource = fileSource;
Expand All @@ -49,10 +48,9 @@ private IncludeBedFile([NotNull] Lazy<GenomeIntervalTree<IContigAndInterval>> tr
/// <param name="pathToWriteBedFile">The path that you want to output the bedfile if you need it
/// (won't be written until you access <see cref="BedFile"/>)
/// <c>WARNING:</c> This will overwrite the file!</param>
[NotNull]
[Pure]
public static IncludeBedFile CreateFromContigIntervals(
[NotNull] IEnumerable<IContigAndInterval> contigIntervals, [NotNull] FileInfo pathToWriteBedFile)
IEnumerable<IContigAndInterval> contigIntervals, FileInfo pathToWriteBedFile)
// ReSharper disable PossibleMultipleEnumeration // TypeCache doesn't enumerate.
=> TypeCache<IEnumerable<IContigAndInterval>, IncludeBedFile>.GetOrAdd(contigIntervals, () =>
{
Expand Down Expand Up @@ -88,18 +86,17 @@ Lazy<FileInfo> CreateBedFileLazy(
});
// ReSharper restore PossibleMultipleEnumeration

[NotNull]
private static GenomeIntervalTree<IContigAndInterval> CreateGenomeIntervalTree(
[NotNull] IEnumerable<IContigAndInterval> contigIntervals)
IEnumerable<IContigAndInterval> contigIntervals)
{
var dictionary = new Dictionary<IContigInfo, MergedIntervalTree<uint>>();
var dictionary = new Dictionary<IContigInfo, List<IInterval<uint>>>();
var listOrder = new List<IContigInfo>();
foreach (var contigInterval in contigIntervals)
{
var contig = contigInterval.Contig;
if (!dictionary.TryGetValue(contig, out var tree))
{
tree = MergedIntervalTree<uint>.Create(null);
tree = new List<IInterval<uint>>();
listOrder.Add(contig);
dictionary.Add(contig, tree);
}
Expand All @@ -109,9 +106,8 @@ private static GenomeIntervalTree<IContigAndInterval> CreateGenomeIntervalTree(
var ret = GenomeIntervalTree<IContigAndInterval>.Create();
foreach (var contig in listOrder)
{
ret.AddRange(dictionary[contig]
.Select(i => i as IContigAndInterval
?? ContigAndInterval.Create(contig, i.Start, i.Stop)));
ret.AddRange(dictionary[contig].ToMergedIntervalTree()
.Select(i => i as IContigAndInterval ?? ContigAndInterval.Create(contig, i.Start, i.Stop)));
var other = contig.ToUcscStyle();
if (other.Name == contig.Name)
other = contig.ToGrchStyle();
Expand All @@ -128,9 +124,8 @@ private static GenomeIntervalTree<IContigAndInterval> CreateGenomeIntervalTree(
/// Assumes it's a valid bed file, otherwise, might crash.
/// </summary>
/// <param name="bedFile">The source bed file</param>
[NotNull]
[Pure]
public static IncludeBedFile CreateFromBedFile([NotNull] FileInfo bedFile)
public static IncludeBedFile CreateFromBedFile(FileInfo bedFile)
=> bedFile.ExistsNow()
? CreateFromBedReader(BedReader.Create(bedFile))
: TypeCache<string, IncludeBedFile>.GetOrAdd(bedFile.FullName,
Expand All @@ -140,16 +135,14 @@ public static IncludeBedFile CreateFromBedFile([NotNull] FileInfo bedFile)
/// Creates a new instance of <see cref="IncludeBedFile"/> from a <see cref="BedReader"/>.
/// </summary>
/// <param name="bedReader">The source bed reader</param>
[NotNull]
[Pure]
public static IncludeBedFile CreateFromBedReader([NotNull] BedReader bedReader)
public static IncludeBedFile CreateFromBedReader(BedReader bedReader)
=> TypeCache<string, IncludeBedFile>.GetOrAdd(bedReader.FileSource.GetCompleteRealPath().FullName, () =>
new IncludeBedFile(new Lazy<GenomeIntervalTree<IContigAndInterval>>(
() => CreateGenomeIntervalTree(bedReader)),
new Lazy<FileInfo>(() => bedReader.FileSource)));

/// <inheritdoc/>
[NotNull]
public override string ToString() => BedFile.FullName;
}
}
}
22 changes: 9 additions & 13 deletions Ilmn.Das.App.Wittyer/Stats/Counts/MutableBaseStatsCount.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,18 @@
using Ilmn.Das.Std.AppUtils.Comparers;
using Ilmn.Das.Std.AppUtils.Intervals;
using Ilmn.Das.Std.BioinformaticUtils.Contigs;
using JetBrains.Annotations;

namespace Ilmn.Das.App.Wittyer.Stats.Counts
{
internal interface IMutableBaseStatsCount : IEquatable<IMutableBaseStatsCount>
{
[NotNull] IReadOnlyDictionary<IContigInfo, IIntervalTree<uint, IInterval<uint>>> TrueCount { get; }
IReadOnlyDictionary<IContigInfo, IIntervalTree<uint, IInterval<uint>>> TrueCount { get; }

[NotNull] IReadOnlyDictionary<IContigInfo, IIntervalTree<uint, IInterval<uint>>> FalseCount { get; }
IReadOnlyDictionary<IContigInfo, IIntervalTree<uint, IInterval<uint>>> FalseCount { get; }

void AddTrueCount([NotNull] IContigInfo contig, [NotNull] IInterval<uint> interval);
void AddTrueCount(IContigInfo contig, IInterval<uint> interval);

void AddFalseCount([NotNull] IContigInfo contig, [NotNull] IInterval<uint> interval);
void AddFalseCount(IContigInfo contig, IInterval<uint> interval);

}

Expand All @@ -30,17 +29,15 @@ private MutableBaseStatsCount(ConcurrentDictionary<IContigInfo, IIntervalTree<ui
_falseCount = falseCount;
}

[NotNull]
internal static IMutableBaseStatsCount Create(ConcurrentDictionary<IContigInfo, IIntervalTree<uint, IInterval<uint>>> trueCount,
ConcurrentDictionary<IContigInfo, IIntervalTree<uint, IInterval<uint>>> falseCount)
=> new MutableBaseStatsCount(trueCount, falseCount);

[NotNull]
public static IMutableBaseStatsCount Create()
=> Create(new ConcurrentDictionary<IContigInfo, IIntervalTree<uint, IInterval<uint>>>(),
new ConcurrentDictionary<IContigInfo, IIntervalTree<uint, IInterval<uint>>>());

public bool Equals([NotNull] IMutableBaseStatsCount other)
public bool Equals(IMutableBaseStatsCount other)
=> TrueCount.Equals(other.TrueCount) && FalseCount.Equals(other.FalseCount);

private readonly ConcurrentDictionary<IContigInfo, IIntervalTree<uint, IInterval<uint>>> _trueCount;
Expand All @@ -52,11 +49,10 @@ public bool Equals([NotNull] IMutableBaseStatsCount other)
public IReadOnlyDictionary<IContigInfo, IIntervalTree<uint, IInterval<uint>>> FalseCount => _falseCount;

public void AddTrueCount(IContigInfo contig, IInterval<uint> interval)
=>
_trueCount.GetOrAdd(contig, _ => MergedIntervalTree.Create<uint>()).Add(interval);
=> _trueCount.GetOrAdd(contig, _ => new IntervalTree<uint>()).Add(interval);

public void AddFalseCount(IContigInfo contig, IInterval<uint> interval) =>
_falseCount.GetOrAdd(contig, _ => MergedIntervalTree.Create<uint>()).Add(interval);
public void AddFalseCount(IContigInfo contig, IInterval<uint> interval)
=> _falseCount.GetOrAdd(contig, _ => new IntervalTree<uint>()).Add(interval);

public override int GetHashCode()
{
Expand All @@ -66,4 +62,4 @@ public override int GetHashCode()
return hashCode;
}
}
}
}
Loading

0 comments on commit e32af48

Please sign in to comment.