From 96430929246cd8e42a8bc2e8d766b74c1480dbc1 Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Mon, 5 Aug 2024 16:59:06 +0300 Subject: [PATCH 01/14] --no-edit-distance added to FlowFeatureMap --- .../featuremapping/FlowFeatureMapper.java | 7 ++-- .../FlowFeatureMapperArgumentCollection.java | 6 ++++ .../walkers/featuremapping/SNVMapper.java | 3 +- .../FlowFeatureMapperIntegrationTest.java | 34 +++++++++++++++++++ ..._feature_mapper_no_edit_disance_output.vcf | 3 ++ ...ture_mapper_no_edit_disance_output.vcf.idx | 3 ++ 6 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf create mode 100644 src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index 19783e1984f..1e387c8204a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -34,6 +34,7 @@ import org.broadinstitute.hellbender.utils.read.FlowBasedRead; import java.util.*; +import java.util.function.Supplier; /** @@ -197,7 +198,7 @@ protected static class MappedFeature implements Comparable { int filteredCount; int nonIdentMBasesOnRead; int featuresOnRead; - int refEditDistance; + Supplier refEditDistance; int index; int smqLeft; int smqRight; @@ -703,7 +704,9 @@ private void emitFeature(final MappedFeature fr) { vcb.attribute(VCF_FC1, fr.nonIdentMBasesOnRead); vcb.attribute(VCF_FC2, fr.featuresOnRead); vcb.attribute(VCF_LENGTH, fr.read.getLength()); - vcb.attribute(VCF_EDIST, fr.refEditDistance); + if ( !fmArgs.noEditDistance && fr.refEditDistance != null ) { + vcb.attribute(VCF_EDIST, fr.refEditDistance.get()); + } vcb.attribute(VCF_INDEX, fr.index); // median/mean quality on? diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java index 03660746d2b..2e257ee97c3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java @@ -89,6 +89,12 @@ enum MappingFeatureEnum { @Argument(fullName = "keep-supplementary-alignments", doc = "keep supplementary alignments ?", optional = true) public boolean keepSupplementaryAlignments = false; + /** + * no edit distance (compute intensive)? + **/ + @Argument(fullName = "no-edit-distance", doc = "do not emit edit-distance metrics (compute intensive) ?", optional = true) + public boolean noEditDistance = false; + /** * debug negatives? **/ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/SNVMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/SNVMapper.java index e67f5ee570e..e299d594883 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/SNVMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/SNVMapper.java @@ -70,7 +70,6 @@ public void forEachOnRead(GATKRead read, ReferenceContext referenceContext, Cons } else { basesString = new String(Arrays.copyOfRange(bases, startSoftClip, bases.length - endSoftClip)); } - int refEditDistance = levDistance.apply(basesString, new String(ref)); // count bases delta on M cigar elements int nonIdentMBases = 0; @@ -144,7 +143,7 @@ public void forEachOnRead(GATKRead read, ReferenceContext referenceContext, Cons if ( (fmArgs.reportAllAlts || fmArgs.tagBasesWithAdjacentRefDiff) && !surrounded ) feature.adjacentRefDiff = true; feature.nonIdentMBasesOnRead = nonIdentMBases; - feature.refEditDistance = refEditDistance; + feature.refEditDistance = () -> levDistance.apply(basesString, new String(ref)); if ( !read.isReverseStrand() ) feature.index = readOfs; else diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java index 19d8cb8fc3b..ecf9c6f6767 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java @@ -220,4 +220,38 @@ public void testTagBasesWithAdjacentRefDiff() throws IOException { } } + @Test + public void testNoEditDistance() throws IOException { + + final File outputDir = createTempDir("testFlowFeatureMapperTest"); + final File expectedFile = new File(testDir + "/snv_feature_mapper_no_edit_disance_output.vcf"); + final File outputFile = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expectedFile : new File(outputDir + "/snv_feature_mapper_no_edit_disance_output.vcf"); + + final String[] args = new String[] { + "-R", largeFileTestDir + "/Homo_sapiens_assembly38.fasta.gz", + "-O", outputFile.getAbsolutePath(), + "-I", testDir + "/snv_feature_mapper_input.bam", + "--copy-attr", "RG", + "--copy-attr", "AS,Integer,AS attribute, as copied", + "--copy-attr", "rq,Float", + "--limit-score", "100", + "--min-score", "0", + "--snv-identical-bases", "10", + "--debug-negatives", "false", + "--debug-read-name", "150451-BC94-0645901755", + "--no-edit-distance" + }; + + // run the tool + runCommandLine(args); // no assert, just make sure we don't throw + + // make sure we've generated the otuput file + Assert.assertTrue(outputFile.exists()); + + // walk the output and expected files, compare non-comment lines + if ( !UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ) { + IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile, "#"); + } + } + } diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf new file mode 100644 index 00000000000..0155467d7f0 --- /dev/null +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07689d2722ddcbc8d5471f1fa51bbeba60e62ce8754d6ebb9795e64fe4270ed4 +size 5476289 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx new file mode 100644 index 00000000000..87c01405d17 --- /dev/null +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e18ecfc4a3ff82d796f6f80852a945d1a586acf052dd7224cb55ee6efeb468c +size 120129 From d961443f493d32220d0f693262ce5fa4e07b45e3 Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Tue, 6 Aug 2024 00:18:42 +0300 Subject: [PATCH 02/14] --use-mapper-thread added --- .../featuremapping/FlowFeatureMapper.java | 80 +++++++++++++++++-- .../FlowFeatureMapperArgumentCollection.java | 6 ++ .../FlowFeatureMapperIntegrationTest.java | 33 ++++++++ .../snv_feature_mapper_input.cram | 3 + .../snv_feature_mapper_input.cram.crai | 3 + 5 files changed, 119 insertions(+), 6 deletions(-) create mode 100644 src/test/resources/large/featureMapping/snv_feature_mapper_input.cram create mode 100644 src/test/resources/large/featureMapping/snv_feature_mapper_input.cram.crai diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index 1e387c8204a..bac75505286 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -17,23 +17,30 @@ import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.programgroups.FlowBasedProgramGroup; -import org.broadinstitute.hellbender.engine.*; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.GATKPath; +import org.broadinstitute.hellbender.engine.ReadWalker; +import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection; -import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.*; +import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerArgumentCollection; +import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerArgumentCollection; +import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReferenceConfidenceMode; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.haplotype.FlowBasedHaplotype; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; +import org.broadinstitute.hellbender.utils.read.FlowBasedRead; import org.broadinstitute.hellbender.utils.read.FlowBasedReadUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import org.broadinstitute.hellbender.utils.variant.writers.GVCFWriter; -import org.broadinstitute.hellbender.utils.haplotype.FlowBasedHaplotype; -import org.broadinstitute.hellbender.utils.read.FlowBasedRead; import java.util.*; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; import java.util.function.Supplier; @@ -82,7 +89,9 @@ @DocumentedFeature @ExperimentalFeature -public final class FlowFeatureMapper extends ReadWalker { +public final class FlowFeatureMapper extends ReadWalker implements Runnable { + + private static final int MESSAGE_QUEUE_CAPACITY = 5000; static class CopyAttrInfo { public final String name; @@ -127,6 +136,14 @@ public CopyAttrInfo(final String spec) { final private List copyAttrInfo = new LinkedList<>(); + static class Message { + GATKRead read; + ReferenceContext referenceContext; + } + + private BlockingQueue downstreamQueue; + private BlockingQueue upstreamQueue; + // order here is according to SequenceUtil.VALID_BASES_UPPER final private String scoreForBaseNames[] = new String[SequenceUtil.VALID_BASES_UPPER.length]; @@ -277,11 +294,27 @@ public void onTraversalStart() { final SAMSequenceDictionary sequenceDictionary = getHeaderForReads().getSequenceDictionary(); vcfWriter = makeVCFWriter(outputVCF, sequenceDictionary, createOutputVariantIndex, createOutputVariantMD5, outputSitesOnlyVCFs); vcfWriter.writeHeader(makeVCFHeader(sequenceDictionary, getDefaultToolVCFHeaderLines())); + + // start threading? + if ( fmArgs.useMapperThread ) { + downstreamQueue = new LinkedBlockingQueue<>(MESSAGE_QUEUE_CAPACITY); + upstreamQueue = new LinkedBlockingQueue<>(); + (new Thread(this)).start(); + } } @Override public void closeTool() { - flushQueue(null, null); + if ( downstreamQueue != null ) { + downstreamQueue.add(new Message()); + try { + upstreamQueue.take(); + } catch (Throwable e) { + throw new GATKException("", e); + } + } else { + flushQueue(null, null); + } super.closeTool(); if ( vcfWriter != null ) { vcfWriter.close(); @@ -394,6 +427,22 @@ public void apply(final GATKRead read, final ReferenceContext referenceContext, return; } + // process the read + if ( downstreamQueue != null ) { + Message message = new Message(); + message.read = read; + message.referenceContext = referenceContext; + try { + downstreamQueue.put(message); + } catch (Throwable e) { + throw new GATKException("", e); + } + } else { + processRead(read, referenceContext); + } + } + + private void processRead(final GATKRead read, final ReferenceContext referenceContext) { // flush qeues up to this read flushQueue(read, referenceContext); @@ -762,5 +811,24 @@ private FeatureMapper buildMapper() { throw new GATKException("unsupported mappingFeature: " + fmArgs.mappingFeature); } } + + @Override + public void run() { + try { + boolean done = false; + while (!done) { + Message message = downstreamQueue.take(); + if (message.read != null) { + processRead(message.read, message.referenceContext); + } else { + flushQueue(null, null); + upstreamQueue.put(message); + done = true; + } + } + } catch (Throwable e) { + throw new GATKException("", e); + } + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java index 2e257ee97c3..333cbbe897e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java @@ -95,6 +95,12 @@ enum MappingFeatureEnum { @Argument(fullName = "no-edit-distance", doc = "do not emit edit-distance metrics (compute intensive) ?", optional = true) public boolean noEditDistance = false; + /** + * use a separate thread for the mapper + **/ + @Argument(fullName = "use-mapper-thread", doc = "use a separate thread for the mapper ?", optional = true) + public boolean useMapperThread = false; + /** * debug negatives? **/ diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java index ecf9c6f6767..5ca9fa6f0cd 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java @@ -254,4 +254,37 @@ public void testNoEditDistance() throws IOException { } } + @Test + public void testMapperThread() throws IOException { + + final File outputDir = createTempDir("testFlowFeatureMapperTest"); + final File expectedFile = new File(testDir + "/snv_feature_mapper_output.vcf"); + final File outputFile = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expectedFile : new File(outputDir + "/snv_feature_mapper_output.vcf"); + + final String[] args = new String[] { + "-R", largeFileTestDir + "/Homo_sapiens_assembly38.fasta.gz", + "-O", outputFile.getAbsolutePath(), + "-I", testDir + "/snv_feature_mapper_input.cram", + "--copy-attr", "RG", + "--copy-attr", "AS,Integer,AS attribute, as copied", + "--copy-attr", "rq,Float", + "--limit-score", "100", + "--min-score", "0", + "--snv-identical-bases", "10", + "--debug-negatives", "false", + "--debug-read-name", "150451-BC94-0645901755", + "--use-mapper-thread" + }; + + // run the tool + runCommandLine(args); // no assert, just make sure we don't throw + + // make sure we've generated the otuput file + Assert.assertTrue(outputFile.exists()); + + // walk the output and expected files, compare non-comment lines + if ( !UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ) { + IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile, "#"); + } + } } diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_input.cram b/src/test/resources/large/featureMapping/snv_feature_mapper_input.cram new file mode 100644 index 00000000000..fcc9a60c592 --- /dev/null +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_input.cram @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44edf4cfe7bf4c3897a2626dcc69f5a16c0a95b74ef101fbfc33e42204b723f9 +size 17140176 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_input.cram.crai b/src/test/resources/large/featureMapping/snv_feature_mapper_input.cram.crai new file mode 100644 index 00000000000..df440c58981 --- /dev/null +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_input.cram.crai @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fcaf3c2e53ad43c9db324f1573f94ee59cf52b8f2c05246026e17ede9e477ae +size 453 From 57b52d7730b187037b60c2289da70d55a87d5ae6 Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Tue, 6 Aug 2024 19:12:14 +0300 Subject: [PATCH 03/14] --threaded-walker refactoring --- .../featuremapping/FlowFeatureMapper.java | 91 +++------------ .../FlowFeatureMapperArgumentCollection.java | 6 - .../featuremapping/ThreadedReadWalker.java | 106 ++++++++++++++++++ .../FlowFeatureMapperIntegrationTest.java | 2 +- 4 files changed, 122 insertions(+), 83 deletions(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/ThreadedReadWalker.java diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index bac75505286..9d20c5c8b1b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -17,30 +17,23 @@ import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.programgroups.FlowBasedProgramGroup; -import org.broadinstitute.hellbender.engine.FeatureContext; -import org.broadinstitute.hellbender.engine.GATKPath; -import org.broadinstitute.hellbender.engine.ReadWalker; -import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.engine.*; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection; -import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerArgumentCollection; -import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerArgumentCollection; -import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReferenceConfidenceMode; +import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.*; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.utils.haplotype.FlowBasedHaplotype; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; -import org.broadinstitute.hellbender.utils.read.FlowBasedRead; import org.broadinstitute.hellbender.utils.read.FlowBasedReadUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import org.broadinstitute.hellbender.utils.variant.writers.GVCFWriter; +import org.broadinstitute.hellbender.utils.haplotype.FlowBasedHaplotype; +import org.broadinstitute.hellbender.utils.read.FlowBasedRead; import java.util.*; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.LinkedBlockingQueue; import java.util.function.Supplier; @@ -89,9 +82,7 @@ @DocumentedFeature @ExperimentalFeature -public final class FlowFeatureMapper extends ReadWalker implements Runnable { - - private static final int MESSAGE_QUEUE_CAPACITY = 5000; +public final class FlowFeatureMapper extends ThreadedReadWalker { static class CopyAttrInfo { public final String name; @@ -136,14 +127,6 @@ public CopyAttrInfo(final String spec) { final private List copyAttrInfo = new LinkedList<>(); - static class Message { - GATKRead read; - ReferenceContext referenceContext; - } - - private BlockingQueue downstreamQueue; - private BlockingQueue upstreamQueue; - // order here is according to SequenceUtil.VALID_BASES_UPPER final private String scoreForBaseNames[] = new String[SequenceUtil.VALID_BASES_UPPER.length]; @@ -294,28 +277,12 @@ public void onTraversalStart() { final SAMSequenceDictionary sequenceDictionary = getHeaderForReads().getSequenceDictionary(); vcfWriter = makeVCFWriter(outputVCF, sequenceDictionary, createOutputVariantIndex, createOutputVariantMD5, outputSitesOnlyVCFs); vcfWriter.writeHeader(makeVCFHeader(sequenceDictionary, getDefaultToolVCFHeaderLines())); - - // start threading? - if ( fmArgs.useMapperThread ) { - downstreamQueue = new LinkedBlockingQueue<>(MESSAGE_QUEUE_CAPACITY); - upstreamQueue = new LinkedBlockingQueue<>(); - (new Thread(this)).start(); - } } @Override public void closeTool() { - if ( downstreamQueue != null ) { - downstreamQueue.add(new Message()); - try { - upstreamQueue.take(); - } catch (Throwable e) { - throw new GATKException("", e); - } - } else { - flushQueue(null, null); - } super.closeTool(); + flushQueue(null, null); if ( vcfWriter != null ) { vcfWriter.close(); } @@ -410,39 +377,30 @@ private String scoreNameForBase(int baseIndex) { } @Override - public void apply(final GATKRead read, final ReferenceContext referenceContext, final FeatureContext featureContext) { + public boolean acceptRead(GATKRead read, ReferenceContext referenceContext, FeatureContext featureContext) { // include dups? if ( read.isDuplicate() && !fmArgs.includeDupReads ) { - return; + return false; } // include supplementary alignments? if ( read.isSupplementaryAlignment() && !fmArgs.keepSupplementaryAlignments ) { - return; + return false; } // include qc-failed reads? if ( ((read.getFlags() & VENDOR_QUALITY_CHECK_FLAG) != 0) && !includeQcFailedReads ) { - return; + return false; } - // process the read - if ( downstreamQueue != null ) { - Message message = new Message(); - message.read = read; - message.referenceContext = referenceContext; - try { - downstreamQueue.put(message); - } catch (Throwable e) { - throw new GATKException("", e); - } - } else { - processRead(read, referenceContext); - } + return true; } - private void processRead(final GATKRead read, final ReferenceContext referenceContext) { + + @Override + public void applyPossiblyThreaded(final GATKRead read, final ReferenceContext referenceContext, final FeatureContext featureContext) { + // flush qeues up to this read flushQueue(read, referenceContext); @@ -811,24 +769,5 @@ private FeatureMapper buildMapper() { throw new GATKException("unsupported mappingFeature: " + fmArgs.mappingFeature); } } - - @Override - public void run() { - try { - boolean done = false; - while (!done) { - Message message = downstreamQueue.take(); - if (message.read != null) { - processRead(message.read, message.referenceContext); - } else { - flushQueue(null, null); - upstreamQueue.put(message); - done = true; - } - } - } catch (Throwable e) { - throw new GATKException("", e); - } - } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java index 333cbbe897e..2e257ee97c3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java @@ -95,12 +95,6 @@ enum MappingFeatureEnum { @Argument(fullName = "no-edit-distance", doc = "do not emit edit-distance metrics (compute intensive) ?", optional = true) public boolean noEditDistance = false; - /** - * use a separate thread for the mapper - **/ - @Argument(fullName = "use-mapper-thread", doc = "use a separate thread for the mapper ?", optional = true) - public boolean useMapperThread = false; - /** * debug negatives? **/ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/ThreadedReadWalker.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/ThreadedReadWalker.java new file mode 100644 index 00000000000..5577be7020f --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/ThreadedReadWalker.java @@ -0,0 +1,106 @@ +package org.broadinstitute.hellbender.tools.walkers.featuremapping; + +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.ReadWalker; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.utils.read.GATKRead; + +import java.util.concurrent.LinkedBlockingQueue; + +public abstract class ThreadedReadWalker extends ReadWalker implements Runnable { + + public static final int CAPACITY = 5000; + + // apply() message and queue - carrying read/referenceContext tuples + static class ApplyMessage { + GATKRead read; + ReferenceContext referenceContext; + FeatureContext featureContext; + } + + private LinkedBlockingQueue applyQueue; + private Thread worker; + + /** + * turn threaded walker on + **/ + @Argument(fullName = "threaded-walker", doc = "turn threaded walker on?", optional = true) + public boolean threadedWalker = false; + + + @Override + public void onTraversalStart() { + + // if running in threaded mode, start worker thread + if ( threadedWalker ) { + applyQueue = new LinkedBlockingQueue<>(CAPACITY); + worker = new Thread(this); + worker.start(); + } + } + + @Override + public void closeTool() { + + // if running in threaded mode, signal end of input and wait for thread to complete + if ( threadedWalker ) { + try { + applyQueue.put(new ApplyMessage()); + worker.join(); + } catch (InterruptedException e) { + throw new GATKException("", e ); + } + } + } + + @Override + final public void apply(final GATKRead read, final ReferenceContext referenceContext, final FeatureContext featureContext) { + + if ( !acceptRead(read, referenceContext, featureContext) ) { + return; + } + + if ( threadedWalker ) { + // redirect message into queue + ApplyMessage message = new ApplyMessage(); + message.read = read; + message.referenceContext = referenceContext; + message.featureContext = featureContext; + try { + applyQueue.put(message); + } catch (InterruptedException e) { + throw new GATKException("", e); + } + } else { + applyPossiblyThreaded(read, referenceContext, featureContext); + } + } + + @Override + public void run() { + + // loop on messages + ApplyMessage message; + while ( true ) { + // take next message off the queue + try { + message = applyQueue.take(); + } catch (InterruptedException e) { + throw new GATKException("", e); + } + + // end of stream? + if ( message.read == null ) { + break; + } + + // process message + applyPossiblyThreaded(message.read, message.referenceContext, message.featureContext); + } + } + + public abstract boolean acceptRead(GATKRead read, ReferenceContext referenceContext, FeatureContext featureContext); + public abstract void applyPossiblyThreaded(final GATKRead read, final ReferenceContext referenceContext, final FeatureContext featureContext); +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java index 5ca9fa6f0cd..3279bbb4c6a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java @@ -273,7 +273,7 @@ public void testMapperThread() throws IOException { "--snv-identical-bases", "10", "--debug-negatives", "false", "--debug-read-name", "150451-BC94-0645901755", - "--use-mapper-thread" + "--threaded-walker" }; // run the tool From 6642c68d7ad04f43542ff7e92e5267e436cd407c Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Tue, 6 Aug 2024 19:50:30 +0300 Subject: [PATCH 04/14] --threaded-writer added --- .../featuremapping/FlowFeatureMapper.java | 67 ++++++++++++++++++- .../FlowFeatureMapperIntegrationTest.java | 34 ++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index 9d20c5c8b1b..395aa923ec9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -34,6 +34,7 @@ import org.broadinstitute.hellbender.utils.read.FlowBasedRead; import java.util.*; +import java.util.concurrent.LinkedBlockingQueue; import java.util.function.Supplier; @@ -84,6 +85,8 @@ @ExperimentalFeature public final class FlowFeatureMapper extends ThreadedReadWalker { + public static final int CAPACITY1 = 5000; + static class CopyAttrInfo { public final String name; public final VCFHeaderLineType type; @@ -164,6 +167,15 @@ public CopyAttrInfo(final String spec) { @ArgumentCollection public FlowBasedArgumentCollection fbargs = new FlowBasedArgumentCollection(); + // emit() message and queue + static class WriterMessage { + MappedFeature mappedFeature; + } + private LinkedBlockingQueue writerQueue; + private Thread writerWorker; + @Argument(fullName = "threaded-writer", doc = "turn threaded writer on?", optional = true) + public boolean threadedWriter = false; + protected static class ReadContext implements Comparable { final GATKRead read; final ReferenceContext referenceContext; @@ -277,12 +289,43 @@ public void onTraversalStart() { final SAMSequenceDictionary sequenceDictionary = getHeaderForReads().getSequenceDictionary(); vcfWriter = makeVCFWriter(outputVCF, sequenceDictionary, createOutputVariantIndex, createOutputVariantMD5, outputSitesOnlyVCFs); vcfWriter.writeHeader(makeVCFHeader(sequenceDictionary, getDefaultToolVCFHeaderLines())); + + // threaded writer? + if ( threadedWriter ) { + writerQueue = new LinkedBlockingQueue<>(CAPACITY1); + writerWorker = new Thread(new Runnable() { + @Override + public void run() { + WriterMessage message; + while ( true ) { + try { + message = writerQueue.take(); + if ( message.mappedFeature == null ) { + break; + } + emitFeature(message.mappedFeature); + } catch (InterruptedException e) { + throw new GATKException("", e); + } + } + } + }); + writerWorker.start();; + } } @Override public void closeTool() { super.closeTool(); flushQueue(null, null); + if ( threadedWriter ) { + try { + writerQueue.put(new WriterMessage()); + writerWorker.join(); + } catch (InterruptedException e) { + throw new GATKException("", e); + } + } if ( vcfWriter != null ) { vcfWriter.close(); } @@ -441,7 +484,17 @@ private void flushQueue(final GATKRead read, final ReferenceContext referenceCon while ( featureQueue.size() != 0 ) { final MappedFeature fr = featureQueue.poll(); enrichFeature(fr); - emitFeature(fr); + if ( !threadedWriter ) { + emitFeature(fr); + } else { + try { + WriterMessage message = new WriterMessage(); + message.mappedFeature = fr; + writerQueue.put(message); + } catch (InterruptedException e) { + throw new GATKException("", e); + } + } } } else { // enter read into the queue @@ -454,7 +507,17 @@ private void flushQueue(final GATKRead read, final ReferenceContext referenceCon || (fr.start < read.getStart()) ) { fr = featureQueue.poll(); enrichFeature(fr); - emitFeature(fr); + if ( !threadedWriter ) { + emitFeature(fr); + } else { + try { + WriterMessage message = new WriterMessage(); + message.mappedFeature = fr; + writerQueue.put(message); + } catch (InterruptedException e) { + throw new GATKException("", e); + } + } } else { break; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java index 3279bbb4c6a..7fcdfe91199 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java @@ -287,4 +287,38 @@ public void testMapperThread() throws IOException { IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile, "#"); } } + + @Test + public void testMapperWriterThreaded() throws IOException { + + final File outputDir = createTempDir("testFlowFeatureMapperTest"); + final File expectedFile = new File(testDir + "/snv_feature_mapper_output.vcf"); + final File outputFile = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expectedFile : new File(outputDir + "/snv_feature_mapper_output.vcf"); + + final String[] args = new String[] { + "-R", largeFileTestDir + "/Homo_sapiens_assembly38.fasta.gz", + "-O", outputFile.getAbsolutePath(), + "-I", testDir + "/snv_feature_mapper_input.cram", + "--copy-attr", "RG", + "--copy-attr", "AS,Integer,AS attribute, as copied", + "--copy-attr", "rq,Float", + "--limit-score", "100", + "--min-score", "0", + "--snv-identical-bases", "10", + "--debug-negatives", "false", + "--debug-read-name", "150451-BC94-0645901755", + "--threaded-writer" + }; + + // run the tool + runCommandLine(args); // no assert, just make sure we don't throw + + // make sure we've generated the otuput file + Assert.assertTrue(outputFile.exists()); + + // walk the output and expected files, compare non-comment lines + if ( !UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ) { + IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile, "#"); + } + } } From 8e2e68624a45b710f524962f56d4c4844642b646 Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Wed, 7 Aug 2024 12:07:24 +0300 Subject: [PATCH 05/14] --nm-edit-distance added --- .../featuremapping/FlowFeatureMapper.java | 27 +++++++++++++------ .../FlowFeatureMapperArgumentCollection.java | 6 ++--- .../walkers/featuremapping/SNVMapper.java | 2 +- .../FlowFeatureMapperIntegrationTest.java | 8 +++--- ..._feature_mapper_nm_edit_disance_output.vcf | 3 +++ ...ture_mapper_nm_edit_disance_output.vcf.idx | 3 +++ ..._feature_mapper_no_edit_disance_output.vcf | 3 --- ...ture_mapper_no_edit_disance_output.vcf.idx | 3 --- 8 files changed, 33 insertions(+), 22 deletions(-) create mode 100644 src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf create mode 100644 src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf.idx delete mode 100644 src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf delete mode 100644 src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index 395aa923ec9..bec149ffb1d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -17,21 +17,25 @@ import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.programgroups.FlowBasedProgramGroup; -import org.broadinstitute.hellbender.engine.*; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.GATKPath; +import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection; -import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.*; +import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerArgumentCollection; +import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerArgumentCollection; +import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReferenceConfidenceMode; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.haplotype.FlowBasedHaplotype; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; +import org.broadinstitute.hellbender.utils.read.FlowBasedRead; import org.broadinstitute.hellbender.utils.read.FlowBasedReadUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import org.broadinstitute.hellbender.utils.variant.writers.GVCFWriter; -import org.broadinstitute.hellbender.utils.haplotype.FlowBasedHaplotype; -import org.broadinstitute.hellbender.utils.read.FlowBasedRead; import java.util.*; import java.util.concurrent.LinkedBlockingQueue; @@ -199,6 +203,7 @@ public int compareTo(ReadContext o) { protected static class MappedFeature implements Comparable { GATKRead read; + ReferenceContext referenceContext; FlowFeatureMapperArgumentCollection.MappingFeatureEnum type; byte[] readBases; byte[] refBases; @@ -220,9 +225,10 @@ protected static class MappedFeature implements Comparable { double scoreForBase[]; boolean adjacentRefDiff; - public MappedFeature(GATKRead read, FlowFeatureMapperArgumentCollection.MappingFeatureEnum type, byte[] readBases, + public MappedFeature(GATKRead read, ReferenceContext referenceContext, FlowFeatureMapperArgumentCollection.MappingFeatureEnum type, byte[] readBases, byte[] refBases, int readBasesOffset, int start, int offsetDelta) { this.read = read; + this.referenceContext = referenceContext; this.type = type; this.readBases = readBases; this.refBases = refBases; @@ -231,11 +237,12 @@ public MappedFeature(GATKRead read, FlowFeatureMapperArgumentCollection.MappingF this.offsetDelta = offsetDelta; } - static MappedFeature makeSNV(GATKRead read, int offset, byte refBase, int start, int offsetDelta) { + static MappedFeature makeSNV(GATKRead read, int offset, byte refBase, int start, int offsetDelta, ReferenceContext referenceContext) { byte[] readBases = {read.getBasesNoCopy()[offset]}; byte[] refBases = {refBase}; return new MappedFeature( read, + referenceContext, FlowFeatureMapperArgumentCollection.MappingFeatureEnum.SNV, readBases, refBases, @@ -774,8 +781,12 @@ private void emitFeature(final MappedFeature fr) { vcb.attribute(VCF_FC1, fr.nonIdentMBasesOnRead); vcb.attribute(VCF_FC2, fr.featuresOnRead); vcb.attribute(VCF_LENGTH, fr.read.getLength()); - if ( !fmArgs.noEditDistance && fr.refEditDistance != null ) { - vcb.attribute(VCF_EDIST, fr.refEditDistance.get()); + if ( fmArgs.nmEditDistance ) { + int nmScore = SequenceUtil.calculateSamNmTag(fr.read.convertToSAMRecord(getHeaderForReads()), fr.referenceContext.getBases(new SimpleInterval(fr.read)), fr.read.getStart() - 1); + vcb.attribute(VCF_EDIST, nmScore); + } else if ( fr.refEditDistance != null ) { + int editDisance = fr.refEditDistance.get(); // this actually computes the distance + vcb.attribute(VCF_EDIST, editDisance); } vcb.attribute(VCF_INDEX, fr.index); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java index 2e257ee97c3..7cb0747a8b8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java @@ -90,10 +90,10 @@ enum MappingFeatureEnum { public boolean keepSupplementaryAlignments = false; /** - * no edit distance (compute intensive)? + * edit distance should come from computed NM **/ - @Argument(fullName = "no-edit-distance", doc = "do not emit edit-distance metrics (compute intensive) ?", optional = true) - public boolean noEditDistance = false; + @Argument(fullName = "nm-edit-distance", doc = "edit distance should come from computed NM ?", optional = true) + public boolean nmEditDistance = false; /** * debug negatives? diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/SNVMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/SNVMapper.java index e299d594883..8da3ae82c79 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/SNVMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/SNVMapper.java @@ -139,7 +139,7 @@ public void forEachOnRead(GATKRead read, ReferenceContext referenceContext, Cons } // add this feature - FlowFeatureMapper.MappedFeature feature = FlowFeatureMapper.MappedFeature.makeSNV(read, readOfs, ref[refOfs], referenceContext.getStart() + refOfs, readOfs - refOfs); + FlowFeatureMapper.MappedFeature feature = FlowFeatureMapper.MappedFeature.makeSNV(read, readOfs, ref[refOfs], referenceContext.getStart() + refOfs, readOfs - refOfs, referenceContext); if ( (fmArgs.reportAllAlts || fmArgs.tagBasesWithAdjacentRefDiff) && !surrounded ) feature.adjacentRefDiff = true; feature.nonIdentMBasesOnRead = nonIdentMBases; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java index 7fcdfe91199..7cbbe9b6ffd 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java @@ -221,11 +221,11 @@ public void testTagBasesWithAdjacentRefDiff() throws IOException { } @Test - public void testNoEditDistance() throws IOException { + public void testNmEditDistance() throws IOException { final File outputDir = createTempDir("testFlowFeatureMapperTest"); - final File expectedFile = new File(testDir + "/snv_feature_mapper_no_edit_disance_output.vcf"); - final File outputFile = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expectedFile : new File(outputDir + "/snv_feature_mapper_no_edit_disance_output.vcf"); + final File expectedFile = new File(testDir + "/snv_feature_mapper_nm_edit_disance_output.vcf"); + final File outputFile = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expectedFile : new File(outputDir + "/snv_feature_mapper_nm_edit_disance_output.vcf"); final String[] args = new String[] { "-R", largeFileTestDir + "/Homo_sapiens_assembly38.fasta.gz", @@ -239,7 +239,7 @@ public void testNoEditDistance() throws IOException { "--snv-identical-bases", "10", "--debug-negatives", "false", "--debug-read-name", "150451-BC94-0645901755", - "--no-edit-distance" + "--nm-edit-distance" }; // run the tool diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf new file mode 100644 index 00000000000..83f8e0c0375 --- /dev/null +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f385f11035635b35543e32f26239c403115c6ea9fb9fe52f1c22b8170daa3e8c +size 5727533 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf.idx new file mode 100644 index 00000000000..fbf73b124cc --- /dev/null +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8068b45d5aa83080c41dad8b251012a9cf4287834dcf746e3b6f325fa4ebe +size 120129 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf deleted file mode 100644 index 0155467d7f0..00000000000 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:07689d2722ddcbc8d5471f1fa51bbeba60e62ce8754d6ebb9795e64fe4270ed4 -size 5476289 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx deleted file mode 100644 index 87c01405d17..00000000000 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4e18ecfc4a3ff82d796f6f80852a945d1a586acf052dd7224cb55ee6efeb468c -size 120129 From 62089a7fd2fbc52bf5225bf7e64c38a08cf68822 Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Wed, 7 Aug 2024 12:56:49 +0300 Subject: [PATCH 06/14] --no-edit-distance is back --- .../featuremapping/FlowFeatureMapper.java | 14 ++++---- .../FlowFeatureMapperArgumentCollection.java | 6 ++++ .../FlowFeatureMapperIntegrationTest.java | 34 +++++++++++++++++++ ..._feature_mapper_no_edit_disance_output.vcf | 3 ++ ...ture_mapper_no_edit_disance_output.vcf.idx | 3 ++ 5 files changed, 54 insertions(+), 6 deletions(-) create mode 100644 src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf create mode 100644 src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index bec149ffb1d..ade6a3b6c53 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -781,12 +781,14 @@ private void emitFeature(final MappedFeature fr) { vcb.attribute(VCF_FC1, fr.nonIdentMBasesOnRead); vcb.attribute(VCF_FC2, fr.featuresOnRead); vcb.attribute(VCF_LENGTH, fr.read.getLength()); - if ( fmArgs.nmEditDistance ) { - int nmScore = SequenceUtil.calculateSamNmTag(fr.read.convertToSAMRecord(getHeaderForReads()), fr.referenceContext.getBases(new SimpleInterval(fr.read)), fr.read.getStart() - 1); - vcb.attribute(VCF_EDIST, nmScore); - } else if ( fr.refEditDistance != null ) { - int editDisance = fr.refEditDistance.get(); // this actually computes the distance - vcb.attribute(VCF_EDIST, editDisance); + if ( !fmArgs.noEditDistance) { + if (fmArgs.nmEditDistance) { + int nmScore = SequenceUtil.calculateSamNmTag(fr.read.convertToSAMRecord(getHeaderForReads()), fr.referenceContext.getBases(new SimpleInterval(fr.read)), fr.read.getStart() - 1); + vcb.attribute(VCF_EDIST, nmScore); + } else if (fr.refEditDistance != null) { + int editDisance = fr.refEditDistance.get(); // this actually computes the distance + vcb.attribute(VCF_EDIST, editDisance); + } } vcb.attribute(VCF_INDEX, fr.index); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java index 7cb0747a8b8..8175c9d9f4b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java @@ -89,6 +89,12 @@ enum MappingFeatureEnum { @Argument(fullName = "keep-supplementary-alignments", doc = "keep supplementary alignments ?", optional = true) public boolean keepSupplementaryAlignments = false; + /** + * do not emit edit distance at all + **/ + @Argument(fullName = "no-edit-distance", doc = "do not emit edit distance at all ?", optional = true) + public boolean noEditDistance = false; + /** * edit distance should come from computed NM **/ diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java index 7cbbe9b6ffd..785fd924c2c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java @@ -254,6 +254,40 @@ public void testNmEditDistance() throws IOException { } } + @Test + public void testNoEditDistance() throws IOException { + + final File outputDir = createTempDir("testFlowFeatureMapperTest"); + final File expectedFile = new File(testDir + "/snv_feature_mapper_no_edit_disance_output.vcf"); + final File outputFile = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expectedFile : new File(outputDir + "/snv_feature_mapper_no_edit_disance_output.vcf"); + + final String[] args = new String[] { + "-R", largeFileTestDir + "/Homo_sapiens_assembly38.fasta.gz", + "-O", outputFile.getAbsolutePath(), + "-I", testDir + "/snv_feature_mapper_input.bam", + "--copy-attr", "RG", + "--copy-attr", "AS,Integer,AS attribute, as copied", + "--copy-attr", "rq,Float", + "--limit-score", "100", + "--min-score", "0", + "--snv-identical-bases", "10", + "--debug-negatives", "false", + "--debug-read-name", "150451-BC94-0645901755", + "--no-edit-distance" + }; + + // run the tool + runCommandLine(args); // no assert, just make sure we don't throw + + // make sure we've generated the otuput file + Assert.assertTrue(outputFile.exists()); + + // walk the output and expected files, compare non-comment lines + if ( !UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ) { + IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile, "#"); + } + } + @Test public void testMapperThread() throws IOException { diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf new file mode 100644 index 00000000000..8349913ee6b --- /dev/null +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f107859a15da37e78211eed16f70ba592d09f49fa71680ec85bd4af6dd6c8c53 +size 5476362 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx new file mode 100644 index 00000000000..667e2f555d2 --- /dev/null +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d49b4bafdd1730b04296a814d6902547666f07b73778fbd12243e9b69eeb49c5 +size 120129 From a014717a0b9d859c9df51c6846c1a43462183105 Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Sun, 11 Aug 2024 15:31:03 +0300 Subject: [PATCH 07/14] calc score on shorter flow reads --- .../featuremapping/FlowFeatureMapper.java | 77 ++++++++++++++++--- 1 file changed, 67 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index ade6a3b6c53..15f5cbc0fa6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -32,6 +32,7 @@ import org.broadinstitute.hellbender.utils.read.FlowBasedRead; import org.broadinstitute.hellbender.utils.read.FlowBasedReadUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; +import org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; @@ -90,6 +91,7 @@ public final class FlowFeatureMapper extends ThreadedReadWalker { public static final int CAPACITY1 = 5000; + private static final int HAPLOTYPE_OPTIMIZED_SIZE = 10; static class CopyAttrInfo { public final String name; @@ -180,6 +182,11 @@ static class WriterMessage { @Argument(fullName = "threaded-writer", doc = "turn threaded writer on?", optional = true) public boolean threadedWriter = false; + static class TrimInfo { + int trimFrom; + int trimTo; + } + protected static class ReadContext implements Comparable { final GATKRead read; final ReferenceContext referenceContext; @@ -564,19 +571,22 @@ private void enrichFeature(final MappedFeature fr) { private double scoreFeature(final MappedFeature fr) { return scoreFeature(fr, (byte)0); } + private double scoreFeature(final MappedFeature fr, byte altBase) { // build haplotypes final FlowBasedReadUtils.ReadGroupInfo rgInfo = FlowBasedReadUtils.getReadGroupInfo(getHeaderForReads(), fr.read); - final FlowBasedHaplotype[] haplotypes = buildHaplotypes(fr, rgInfo.flowOrder, altBase); - - // create flow read - final FlowBasedRead flowRead = new FlowBasedRead(fr.read, rgInfo.flowOrder, rgInfo.maxClass, fbargs); - - final int diffLeft = haplotypes[0].getStart() - flowRead.getStart() + fr.offsetDelta; - final int diffRight = flowRead.getEnd() - haplotypes[0].getEnd(); - flowRead.applyBaseClipping(Math.max(0, diffLeft), Math.max(diffRight, 0), false); + final TrimInfo trimInfo = new TrimInfo(); + final FlowBasedHaplotype[] haplotypes = buildHaplotypes(fr, rgInfo.flowOrder, altBase, trimInfo); + final FlowBasedRead flowRead = buildTrimmedFlowBasedRead(fr.read, trimInfo, rgInfo); + // check lengths + if ( haplotypes[0].length() != haplotypes[1].length() ) { + logger.warn("haplotypes are different in length: " + haplotypes[0].length() + " != " + haplotypes[1].length()); + } + if ( flowRead.getLength() != haplotypes[0].length() ) { + logger.warn("flow read is different in length from haplotypes: " + flowRead.getLength() + " != " + haplotypes[0].length()); + } if ( !flowRead.isValid() ) { return -1; } @@ -676,7 +686,7 @@ public static double computeLikelihoodLocal(final FlowBasedRead read, final Flow return result; } - private FlowBasedHaplotype[] buildHaplotypes(final MappedFeature fr, final String flowOrder, byte altBase) { + private FlowBasedHaplotype[] buildHaplotypes(final MappedFeature fr, final String flowOrder, byte altBase, TrimInfo trimInfo) { // build bases for flow haplotypes // NOTE!!!: this code assumes length of feature on read and reference is the same @@ -694,6 +704,7 @@ private FlowBasedHaplotype[] buildHaplotypes(final MappedFeature fr, final Strin fr.refBases[0] = altBase; } + int altOffset = offset; if ( offset > 0 ) { // reach into hmer before offset--; @@ -708,8 +719,18 @@ private FlowBasedHaplotype[] buildHaplotypes(final MappedFeature fr, final Strin refStart--; } } - final byte[] sAltBases = Arrays.copyOfRange(bases, offset, bases.length); + + // build two haplotypes with existing data from the read + final int trimTo = Math.min(altOffset + fr.readBases.length + HAPLOTYPE_OPTIMIZED_SIZE, bases.length); + final byte[] sAltBases = Arrays.copyOfRange(bases, offset, trimTo); final byte[] sRefBases = Arrays.copyOf(sAltBases, sAltBases.length); + + // verify that we are correctly positioned + if ( sRefBases[refModOfs] != fr.readBases[0] ) { + logger.warn("sRefBases[refModOfs] != fr.readBases[0] : " + sRefBases[refModOfs] + " != " + fr.readBases[0]); + } + + // restore reference in ref haplotype System.arraycopy(fr.refBases, 0, sRefBases, refModOfs, fr.refBases.length); // construct haplotypes @@ -734,6 +755,12 @@ private FlowBasedHaplotype[] buildHaplotypes(final MappedFeature fr, final Strin fr.refBases[0] = orgBase; } + // returning trimming info + if ( trimInfo != null ) { + trimInfo.trimFrom = offset; + trimInfo.trimTo = trimTo - 1; + } + // return return result; } @@ -845,5 +872,35 @@ private FeatureMapper buildMapper() { throw new GATKException("unsupported mappingFeature: " + fmArgs.mappingFeature); } } + + private FlowBasedRead buildTrimmedFlowBasedRead(final GATKRead read, final TrimInfo trimInfo, final FlowBasedReadUtils.ReadGroupInfo rgInfo) { + + // access origin arrays + final byte[] bases = read.getBasesNoCopy(); + final byte[] quals = read.getBaseQualitiesNoCopy(); + final byte[] tp = read.getAttributeAsByteArray("tp"); + final String t0 = read.getAttributeAsString("t0"); + + // trim + final byte[] trimmed_bases = Arrays.copyOfRange(bases, trimInfo.trimFrom, trimInfo.trimTo + 1); + final byte[] trimmed_quals = Arrays.copyOfRange(quals, trimInfo.trimFrom, trimInfo.trimTo + 1); + final byte[] trimmed_tp = Arrays.copyOfRange(tp, trimInfo.trimFrom, trimInfo.trimTo + 1); + final String trimmed_t0 = t0.substring(trimInfo.trimFrom, trimInfo.trimTo + 1); + + // build read + final SAMRecord samRecord = new SAMRecord(getHeaderForReads()); + samRecord.setReadBases(trimmed_bases); + samRecord.setBaseQualities(trimmed_quals); + samRecord.setAttribute("tp", trimmed_tp); + samRecord.setAttribute("t0", trimmed_t0); + samRecord.setAttribute("RG", read.getAttributeAsString("RG")); + samRecord.setCigarString("" + (trimInfo.trimTo - trimInfo.trimFrom + 1) + "M"); + final GATKRead gatkRead = new SAMRecordToGATKReadAdapter(samRecord); + + // build flow based read + final FlowBasedRead flowRead = new FlowBasedRead(gatkRead, rgInfo.flowOrder, rgInfo.maxClass, fbargs); + + return flowRead; + } } From b4a888459918bf724428486d84b7f853c0c24717 Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Thu, 15 Aug 2024 11:01:03 +0200 Subject: [PATCH 08/14] make CachingIndexedFastaSequenceFile.java thread safe + allow to increasing cache size --- .../featuremapping/FlowFeatureMapper.java | 7 +++++++ .../fasta/CachingIndexedFastaSequenceFile.java | 18 +++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index 15f5cbc0fa6..e06daf1d687 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -27,6 +27,7 @@ import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReferenceConfidenceMode; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.hellbender.utils.haplotype.FlowBasedHaplotype; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.read.FlowBasedRead; @@ -92,6 +93,7 @@ public final class FlowFeatureMapper extends ThreadedReadWalker { public static final int CAPACITY1 = 5000; private static final int HAPLOTYPE_OPTIMIZED_SIZE = 10; + private static final long CACHE_SIZE_FACTOR = 5; static class CopyAttrInfo { public final String name; @@ -326,6 +328,11 @@ public void run() { }); writerWorker.start();; } + + // if using threaded walker extend reference cache to avoid thrushing + if ( threadedWalker ) { + CachingIndexedFastaSequenceFile.requestedCacheSize = CachingIndexedFastaSequenceFile.DEFAULT_CACHE_SIZE * CACHE_SIZE_FACTOR; + } } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/utils/fasta/CachingIndexedFastaSequenceFile.java b/src/main/java/org/broadinstitute/hellbender/utils/fasta/CachingIndexedFastaSequenceFile.java index 6cd2d678347..0d8d8f30e81 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/fasta/CachingIndexedFastaSequenceFile.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/fasta/CachingIndexedFastaSequenceFile.java @@ -49,6 +49,9 @@ public final class CachingIndexedFastaSequenceFile implements ReferenceSequenceF /** The default cache size in bp */ public static final long DEFAULT_CACHE_SIZE = 1000000; + /** Requested cache size in bp - will be respected if set */ + public static long requestedCacheSize = 0; + /** The cache size of this CachingIndexedFastaSequenceFile */ private final long cacheSize; @@ -236,9 +239,14 @@ public long getCacheMisses() { * @return the size of the cache we are using */ public long getCacheSize() { - return cacheSize; + return requestedCacheSize != 0 ? requestedCacheSize : cacheSize; + } + + private long getCacheMissBackup() { + return Math.max(getCacheSize() / 1000, 1); } + /** * Is this CachingIndexedFastaReader keeping the original case of bases in the fasta, or is * everything being made upper case? @@ -315,10 +323,10 @@ public ReferenceSequence getSequence(String contig) { * all of the bases in the ReferenceSequence returned by this method will be upper cased. */ @Override - public ReferenceSequence getSubsequenceAt( final String contig, long start, final long stop ) { + public synchronized ReferenceSequence getSubsequenceAt( final String contig, long start, final long stop ) { final ReferenceSequence result; - if ( (stop - start + 1) > cacheSize ) { + if ( (stop - start + 1) > getCacheSize() ) { cacheMisses++; result = sequenceFile.getSubsequenceAt(contig, start, stop); if ( ! preserveCase ) StringUtil.toUpperCase(result.getBases()); @@ -335,8 +343,8 @@ public ReferenceSequence getSubsequenceAt( final String contig, long start, fina if ( start < cache.start || stop > cache.stop || cache.seq == null || cache.seq.getContigIndex() != contigInfo.getSequenceIndex() ) { cacheMisses++; - cache.start = Math.max(start - cacheMissBackup, 0); - cache.stop = Math.min(start + cacheSize + cacheMissBackup, contigInfo.getSequenceLength()); + cache.start = Math.max(start - getCacheMissBackup(), 0); + cache.stop = Math.min(start + getCacheSize() + getCacheMissBackup(), contigInfo.getSequenceLength()); cache.seq = sequenceFile.getSubsequenceAt(contig, cache.start, cache.stop); // convert all of the bases in the sequence to upper case if we aren't preserving cases From 2f7720874780171d3a7ed74f8a69cd95c82a013b Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Thu, 15 Aug 2024 12:28:46 +0200 Subject: [PATCH 09/14] handle thread exceptions - make program exit --- .../walkers/featuremapping/FlowFeatureMapper.java | 7 +++++++ .../walkers/featuremapping/ThreadedReadWalker.java | 10 ++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index e06daf1d687..c690c4219d7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -326,13 +326,20 @@ public void run() { } } }); + writerWorker.setUncaughtExceptionHandler(getUncaughtExceptionHandler()); writerWorker.start();; } + // if using threads, set handler for this thread as well + if ( threadedWalker || threadedWriter ) { + Thread.currentThread().setUncaughtExceptionHandler(getUncaughtExceptionHandler()); + } + // if using threaded walker extend reference cache to avoid thrushing if ( threadedWalker ) { CachingIndexedFastaSequenceFile.requestedCacheSize = CachingIndexedFastaSequenceFile.DEFAULT_CACHE_SIZE * CACHE_SIZE_FACTOR; } + } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/ThreadedReadWalker.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/ThreadedReadWalker.java index 5577be7020f..5758cdf1542 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/ThreadedReadWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/ThreadedReadWalker.java @@ -37,6 +37,7 @@ public void onTraversalStart() { if ( threadedWalker ) { applyQueue = new LinkedBlockingQueue<>(CAPACITY); worker = new Thread(this); + worker.setUncaughtExceptionHandler(getUncaughtExceptionHandler()); worker.start(); } } @@ -103,4 +104,13 @@ public void run() { public abstract boolean acceptRead(GATKRead read, ReferenceContext referenceContext, FeatureContext featureContext); public abstract void applyPossiblyThreaded(final GATKRead read, final ReferenceContext referenceContext, final FeatureContext featureContext); + + public Thread.UncaughtExceptionHandler getUncaughtExceptionHandler() { + Thread.UncaughtExceptionHandler handler = (th, ex) -> { + System.out.println("Uncaught exception: " + ex); + ex.printStackTrace(System.out); + System.exit(-1); + }; + return handler; + } } From 2cae74b225b41b075a80706c55979789b643e85e Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Mon, 23 Sep 2024 15:02:28 +0300 Subject: [PATCH 10/14] FlowFeatureMapperIntegrationTest passed --- .../tools/walkers/featuremapping/FlowFeatureMapper.java | 6 ++++-- .../snv_feature_mapper_nm_edit_disance_output.vcf | 4 ++-- .../snv_feature_mapper_nm_edit_disance_output.vcf.idx | 2 +- .../snv_feature_mapper_no_edit_disance_output.vcf | 4 ++-- .../snv_feature_mapper_no_edit_disance_output.vcf.idx | 2 +- .../large/featureMapping/snv_feature_mapper_output.vcf | 4 ++-- .../large/featureMapping/snv_feature_mapper_output.vcf.idx | 2 +- .../featureMapping/snv_feature_mapper_qc_failed_output.vcf | 4 ++-- .../snv_feature_mapper_qc_failed_output.vcf.idx | 2 +- .../snv_feature_mapper_report_all_alts_output.vcf | 4 ++-- .../snv_feature_mapper_report_all_alts_output.vcf.idx | 2 +- .../featureMapping/snv_feature_mapper_smq_all_output.vcf | 4 ++-- .../snv_feature_mapper_smq_all_output.vcf.idx | 2 +- .../large/featureMapping/snv_feature_mapper_smq_output.vcf | 4 ++-- .../featureMapping/snv_feature_mapper_smq_output.vcf.idx | 2 +- .../featureMapping/snv_feature_mapper_tag_adjs_output.vcf | 4 ++-- .../snv_feature_mapper_tag_adjs_output.vcf.idx | 2 +- 17 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index c690c4219d7..63e354ed7dd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -899,14 +899,16 @@ private FlowBasedRead buildTrimmedFlowBasedRead(final GATKRead read, final TrimI final byte[] trimmed_bases = Arrays.copyOfRange(bases, trimInfo.trimFrom, trimInfo.trimTo + 1); final byte[] trimmed_quals = Arrays.copyOfRange(quals, trimInfo.trimFrom, trimInfo.trimTo + 1); final byte[] trimmed_tp = Arrays.copyOfRange(tp, trimInfo.trimFrom, trimInfo.trimTo + 1); - final String trimmed_t0 = t0.substring(trimInfo.trimFrom, trimInfo.trimTo + 1); + final String trimmed_t0 = t0 != null ? t0.substring(trimInfo.trimFrom, trimInfo.trimTo + 1) : null; // build read final SAMRecord samRecord = new SAMRecord(getHeaderForReads()); samRecord.setReadBases(trimmed_bases); samRecord.setBaseQualities(trimmed_quals); samRecord.setAttribute("tp", trimmed_tp); - samRecord.setAttribute("t0", trimmed_t0); + if ( trimmed_t0 != null ) { + samRecord.setAttribute("t0", trimmed_t0); + } samRecord.setAttribute("RG", read.getAttributeAsString("RG")); samRecord.setCigarString("" + (trimInfo.trimTo - trimInfo.trimFrom + 1) + "M"); final GATKRead gatkRead = new SAMRecordToGATKReadAdapter(samRecord); diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf index 83f8e0c0375..42294e264cb 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f385f11035635b35543e32f26239c403115c6ea9fb9fe52f1c22b8170daa3e8c -size 5727533 +oid sha256:cabc8f2f6847a2a0d2f26e3fe8641f7831b2e1eae21ffdd077ef21b9a3478551 +size 5719925 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf.idx index fbf73b124cc..ecedee7b408 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cfc8068b45d5aa83080c41dad8b251012a9cf4287834dcf746e3b6f325fa4ebe +oid sha256:07dbc2abbda6618de2e62e4172c54f7bb1d111e2247a5ab9de1f7644f14b450d size 120129 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf index 8349913ee6b..c865b731222 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f107859a15da37e78211eed16f70ba592d09f49fa71680ec85bd4af6dd6c8c53 -size 5476362 +oid sha256:f22cee47b6eb186d37185455fc60cf40d6daa22be09701b16afef809456cb308 +size 5468729 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx index 667e2f555d2..a7044be9c47 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d49b4bafdd1730b04296a814d6902547666f07b73778fbd12243e9b69eeb49c5 +oid sha256:cda146a9ebc12eb9d4a8ac98025eac18df799ecfaca825cab75a65f0d24e8048 size 120129 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf index 15420a30c69..8de4fe7a339 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fec49990080a8d7a21c3d5639c2bcba83a67c5a88d48bc58127fd6c51c10987a -size 5727362 +oid sha256:b033a57cfd71ac647cffc527e1528176f04465e6c5429f8ee1b51ada8ed5aaaf +size 5719830 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf.idx index 51f9292562f..97d892f3c17 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2048ce82810a5752d6fb1de9e6e9c2166bc0cf0981cc63a82f4fd06d4bf81275 +oid sha256:89983519b444da7436d3d9779d8ab74a273acb5ad64681d7d89f8efd4e3399e2 size 120113 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf index 19530e544e0..399eca8a608 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5868392f6e82af04e84a960f99f958f3f30e2476af3997bf5ad9db70a727961f -size 5057024 +oid sha256:67b8bb9bfc560dfcde10a8caa08b894ba322cce10f238b0460920509505b3984 +size 5049492 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf.idx index f19964be546..1e10332ad25 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94675097a7732fb42d58bbb4c4beed366a5a92b68b106b5f827d9d7d1d1f479e +oid sha256:3140c6355d7edece40299dd93d52482741b8c12ce5dedbb037eed59dc8194d8d size 120123 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf index 438ff402e44..2b0a91a5b4a 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2bb077a1ed1632cf48ea3ee710c1d3517b86c679070e07880e8fffe0ed0acafb -size 22013893 +oid sha256:b0598028e4747c939ac5cdac42302e59d372057be0ab9f2c2575dac9797bc5a3 +size 21932775 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf.idx index 3306703bc4e..6c5c7f68777 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa90be0835317f993e082e9543aa17e83b8f0ba29c2faa18314de0017356eb23 +oid sha256:a67fc2fdb6289e5e30a915365617358c534f4bdec7d24f2848f05c227e46c9e0 size 134797 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf index 075cdd31993..ac9c9c048ef 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:56033f5f98ef9999741c7f8a0c140045bd42558194a4512f3c4742f8a24e4aab -size 6760184 +oid sha256:684fbb692db3cc6f2fe7cf8ccd60c7e91274e3a54601946ef2c4a9dd2c38d56e +size 6752652 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf.idx index c29140ecdb9..47fdbd779c7 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0df79aee9e4be92defe36b7d40f21615a6883c32f99e7df35cbb8c407639d363 +oid sha256:c9ade804b59369b1724d827d75904eb0987e5f2129a7c6243525bf5a9fb42b1b size 120121 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf index 32a060c9c5e..b84406f6354 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a38048e71ea5b01195bf52b38d07cef3f38e91e80b76f2737165e2e84e78bf1e -size 5783362 +oid sha256:6d8118fbb6dbb01406163aacc17f1fe0f69d1163c63a2ddd7ed940b159b13371 +size 5775830 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf.idx index 340b9475475..67b4ed7a6b6 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f5755a11e3577ed97ed8ccddded2135a0de1b5bbbdd247087bd355a5f4c6770 +oid sha256:851c2234ccffa1c44beecb1232dad934fbd61f9536a60e019e3c41f8f223bfe3 size 120117 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf index ae8d2a47b9b..34a60c12f00 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:511e7b3faded9b68fddb05098ad681965c8ac3d69a7c0879e1e404e0d7ac0992 -size 250675 +oid sha256:400eee023f21c8a0af5c98bb0e301f01dc2e5be5eba6e869e35313b12f05310c +size 250732 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf.idx index 888d93a2b15..e424a1dcee0 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0e3cb83a487006e075466405dc48dc2e4f5eaa222597361ac77f2cb0d2648cf +oid sha256:f538499784d7e89b2e94fb3a5e86896572de1a2535b397fc46a8c64c8cdbc4b0 size 118520 From 2adab1f251009effcba2d732aa223fd81a6a045e Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Mon, 23 Sep 2024 15:16:04 +0300 Subject: [PATCH 11/14] NM edit distance now default --- .../tools/walkers/featuremapping/FlowFeatureMapper.java | 2 +- .../featuremapping/FlowFeatureMapperArgumentCollection.java | 4 ++-- .../featuremapping/FlowFeatureMapperIntegrationTest.java | 4 ++-- .../snv_feature_mapper_nm_edit_disance_output.vcf | 4 ++-- .../snv_feature_mapper_nm_edit_disance_output.vcf.idx | 2 +- .../snv_feature_mapper_no_edit_disance_output.vcf | 4 ++-- .../snv_feature_mapper_no_edit_disance_output.vcf.idx | 2 +- .../large/featureMapping/snv_feature_mapper_output.vcf | 4 ++-- .../large/featureMapping/snv_feature_mapper_output.vcf.idx | 2 +- .../featureMapping/snv_feature_mapper_qc_failed_output.vcf | 4 ++-- .../snv_feature_mapper_qc_failed_output.vcf.idx | 2 +- .../snv_feature_mapper_report_all_alts_output.vcf | 4 ++-- .../snv_feature_mapper_report_all_alts_output.vcf.idx | 2 +- .../featureMapping/snv_feature_mapper_smq_all_output.vcf | 4 ++-- .../featureMapping/snv_feature_mapper_smq_all_output.vcf.idx | 2 +- .../large/featureMapping/snv_feature_mapper_smq_output.vcf | 4 ++-- .../featureMapping/snv_feature_mapper_smq_output.vcf.idx | 2 +- .../featureMapping/snv_feature_mapper_tag_adjs_output.vcf | 4 ++-- .../featureMapping/snv_feature_mapper_tag_adjs_output.vcf.idx | 2 +- .../flowBasedAlignment/input_jukebox_for_test.expected.alm | 4 ++-- 20 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index 63e354ed7dd..500f2a5d4b1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -823,7 +823,7 @@ private void emitFeature(final MappedFeature fr) { vcb.attribute(VCF_FC2, fr.featuresOnRead); vcb.attribute(VCF_LENGTH, fr.read.getLength()); if ( !fmArgs.noEditDistance) { - if (fmArgs.nmEditDistance) { + if ( !fmArgs.levenshteinEditDistance ) { int nmScore = SequenceUtil.calculateSamNmTag(fr.read.convertToSAMRecord(getHeaderForReads()), fr.referenceContext.getBases(new SimpleInterval(fr.read)), fr.read.getStart() - 1); vcb.attribute(VCF_EDIST, nmScore); } else if (fr.refEditDistance != null) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java index 8175c9d9f4b..f75af6d0653 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java @@ -98,8 +98,8 @@ enum MappingFeatureEnum { /** * edit distance should come from computed NM **/ - @Argument(fullName = "nm-edit-distance", doc = "edit distance should come from computed NM ?", optional = true) - public boolean nmEditDistance = false; + @Argument(fullName = "levenshtein-edit-distance", doc = "edit distance should come from a Levenshtein edit distance instead of NM ?", optional = true) + public boolean levenshteinEditDistance = false; /** * debug negatives? diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java index 785fd924c2c..47050d8795c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java @@ -221,7 +221,7 @@ public void testTagBasesWithAdjacentRefDiff() throws IOException { } @Test - public void testNmEditDistance() throws IOException { + public void testLevenshteinEditDistance() throws IOException { final File outputDir = createTempDir("testFlowFeatureMapperTest"); final File expectedFile = new File(testDir + "/snv_feature_mapper_nm_edit_disance_output.vcf"); @@ -239,7 +239,7 @@ public void testNmEditDistance() throws IOException { "--snv-identical-bases", "10", "--debug-negatives", "false", "--debug-read-name", "150451-BC94-0645901755", - "--nm-edit-distance" + "--levenshtein-edit-distance" }; // run the tool diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf index 42294e264cb..031b5335d0b 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cabc8f2f6847a2a0d2f26e3fe8641f7831b2e1eae21ffdd077ef21b9a3478551 -size 5719925 +oid sha256:9ff5150c391f7b3fcf254d9efa2fc0e63137b693900ebfe69b655e7ce9c3f88b +size 5719854 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf.idx index ecedee7b408..7929b21fada 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_nm_edit_disance_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07dbc2abbda6618de2e62e4172c54f7bb1d111e2247a5ab9de1f7644f14b450d +oid sha256:f4191ea87a60ca0f903100a3a4c2cd1c58be37ec3880c78ae402168f0b1f9006 size 120129 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf index c865b731222..c011c84b2da 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f22cee47b6eb186d37185455fc60cf40d6daa22be09701b16afef809456cb308 -size 5468729 +oid sha256:c4dec39f194156e50bbd628b95c887de4421a0059d5a0d7c4098c5c36af6daba +size 5468738 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx index a7044be9c47..2eb87babb35 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cda146a9ebc12eb9d4a8ac98025eac18df799ecfaca825cab75a65f0d24e8048 +oid sha256:1e7cd26902848f511b47eca6c85d34daa8bacba41aa0d64062e2132bd61b1bf4 size 120129 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf index 8de4fe7a339..39f57153f94 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b033a57cfd71ac647cffc527e1528176f04465e6c5429f8ee1b51ada8ed5aaaf -size 5719830 +oid sha256:ce8d9bc5cc31c7e249ba2fc4dc1924e190ddd59b69c0b4654d432f60d7e5846d +size 5719919 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf.idx index 97d892f3c17..cc9843cba62 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89983519b444da7436d3d9779d8ab74a273acb5ad64681d7d89f8efd4e3399e2 +oid sha256:7b0213572039ac7271af11165c3ec9144434bd532f5e17475f36cd0a027ff7a3 size 120113 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf index 399eca8a608..9946c047c32 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:67b8bb9bfc560dfcde10a8caa08b894ba322cce10f238b0460920509505b3984 -size 5049492 +oid sha256:6e7091e446242545c12837f5d6bd03fca745f4d84f74783febe766a2e22eb2f3 +size 5049581 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf.idx index 1e10332ad25..9064273ffbc 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_qc_failed_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3140c6355d7edece40299dd93d52482741b8c12ce5dedbb037eed59dc8194d8d +oid sha256:b90008bf01b3a7a7808594ec58a52e29a9349464e0aec4ec8b9d722c2c3dd032 size 120123 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf index 2b0a91a5b4a..6b8901ed38a 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0598028e4747c939ac5cdac42302e59d372057be0ab9f2c2575dac9797bc5a3 -size 21932775 +oid sha256:defc64361ef51bb0ed4d425f937301e006c3beb6b2321ba04d8f6b690026e231 +size 21933008 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf.idx index 6c5c7f68777..920da615f5e 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_report_all_alts_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a67fc2fdb6289e5e30a915365617358c534f4bdec7d24f2848f05c227e46c9e0 +oid sha256:323ee6c3bdf8170020a38749e478fcef1a647746e69b611a3c283d7c2066aa96 size 134797 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf index ac9c9c048ef..def604f6006 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:684fbb692db3cc6f2fe7cf8ccd60c7e91274e3a54601946ef2c4a9dd2c38d56e -size 6752652 +oid sha256:7d9408f46cdf74f1195584223503c92aa9be427b749a4323c5213df216d0e564 +size 6752741 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf.idx index 47fdbd779c7..11ba43d408a 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_all_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c9ade804b59369b1724d827d75904eb0987e5f2129a7c6243525bf5a9fb42b1b +oid sha256:7c74a6d7ff041b57bcb16221a70eec74e0133385c715e6559cff352bf91eb0c5 size 120121 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf index b84406f6354..1578a2aa688 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d8118fbb6dbb01406163aacc17f1fe0f69d1163c63a2ddd7ed940b159b13371 -size 5775830 +oid sha256:e826d1c8519ac409a42637926e6df7d14d3ed9286fd7d798b1ce2a49ceb489b7 +size 5775919 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf.idx index 67b4ed7a6b6..eb7a5cfdb2e 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_smq_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:851c2234ccffa1c44beecb1232dad934fbd61f9536a60e019e3c41f8f223bfe3 +oid sha256:c0f2e687d5dc63d57c5a700989935bda01e53a39eb744e90c94826a37bea6e72 size 120117 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf index 34a60c12f00..9fd2aa2720f 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:400eee023f21c8a0af5c98bb0e301f01dc2e5be5eba6e869e35313b12f05310c -size 250732 +oid sha256:75e86a411dddb92983bc67c46c92ca12b3b77bdf71d7289894de7f439a29c1a1 +size 250744 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf.idx index e424a1dcee0..efa61f5cc2a 100644 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf.idx +++ b/src/test/resources/large/featureMapping/snv_feature_mapper_tag_adjs_output.vcf.idx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f538499784d7e89b2e94fb3a5e86896572de1a2535b397fc46a8c64c8cdbc4b0 +oid sha256:4aacba3a23b834b689144a57408eed0620921f678f1d1ae1a71cf1fbd95c268f size 118520 diff --git a/src/test/resources/large/flowBasedAlignment/input_jukebox_for_test.expected.alm b/src/test/resources/large/flowBasedAlignment/input_jukebox_for_test.expected.alm index 693cf6cc895..7be9a9b5c4f 100644 --- a/src/test/resources/large/flowBasedAlignment/input_jukebox_for_test.expected.alm +++ b/src/test/resources/large/flowBasedAlignment/input_jukebox_for_test.expected.alm @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06c11b05b97e89f99ebe83376e166eaca438f0d748f03da8512e26ca559be6c2 -size 24335517 +oid sha256:8e7071df04b55b1d98e86edd6cf0c5880c630b65a07fd904a0f422e9436848cc +size 24333709 From 12e8e0661ebc00c1a05027c005e1caefd3cdd525 Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Mon, 23 Sep 2024 15:19:21 +0300 Subject: [PATCH 12/14] --no-edit-distance removed --- .../featuremapping/FlowFeatureMapper.java | 14 ++++---- .../FlowFeatureMapperArgumentCollection.java | 6 ---- .../FlowFeatureMapperIntegrationTest.java | 34 ------------------- ..._feature_mapper_no_edit_disance_output.vcf | 3 -- ...ture_mapper_no_edit_disance_output.vcf.idx | 3 -- 5 files changed, 6 insertions(+), 54 deletions(-) delete mode 100644 src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf delete mode 100644 src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index 500f2a5d4b1..8e73930a3bb 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -822,14 +822,12 @@ private void emitFeature(final MappedFeature fr) { vcb.attribute(VCF_FC1, fr.nonIdentMBasesOnRead); vcb.attribute(VCF_FC2, fr.featuresOnRead); vcb.attribute(VCF_LENGTH, fr.read.getLength()); - if ( !fmArgs.noEditDistance) { - if ( !fmArgs.levenshteinEditDistance ) { - int nmScore = SequenceUtil.calculateSamNmTag(fr.read.convertToSAMRecord(getHeaderForReads()), fr.referenceContext.getBases(new SimpleInterval(fr.read)), fr.read.getStart() - 1); - vcb.attribute(VCF_EDIST, nmScore); - } else if (fr.refEditDistance != null) { - int editDisance = fr.refEditDistance.get(); // this actually computes the distance - vcb.attribute(VCF_EDIST, editDisance); - } + if ( !fmArgs.levenshteinEditDistance ) { + int nmScore = SequenceUtil.calculateSamNmTag(fr.read.convertToSAMRecord(getHeaderForReads()), fr.referenceContext.getBases(new SimpleInterval(fr.read)), fr.read.getStart() - 1); + vcb.attribute(VCF_EDIST, nmScore); + } else if (fr.refEditDistance != null) { + int editDisance = fr.refEditDistance.get(); // this actually computes the distance + vcb.attribute(VCF_EDIST, editDisance); } vcb.attribute(VCF_INDEX, fr.index); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java index f75af6d0653..ac652bf48b6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperArgumentCollection.java @@ -89,12 +89,6 @@ enum MappingFeatureEnum { @Argument(fullName = "keep-supplementary-alignments", doc = "keep supplementary alignments ?", optional = true) public boolean keepSupplementaryAlignments = false; - /** - * do not emit edit distance at all - **/ - @Argument(fullName = "no-edit-distance", doc = "do not emit edit distance at all ?", optional = true) - public boolean noEditDistance = false; - /** * edit distance should come from computed NM **/ diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java index 47050d8795c..5069603ff44 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapperIntegrationTest.java @@ -254,40 +254,6 @@ public void testLevenshteinEditDistance() throws IOException { } } - @Test - public void testNoEditDistance() throws IOException { - - final File outputDir = createTempDir("testFlowFeatureMapperTest"); - final File expectedFile = new File(testDir + "/snv_feature_mapper_no_edit_disance_output.vcf"); - final File outputFile = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expectedFile : new File(outputDir + "/snv_feature_mapper_no_edit_disance_output.vcf"); - - final String[] args = new String[] { - "-R", largeFileTestDir + "/Homo_sapiens_assembly38.fasta.gz", - "-O", outputFile.getAbsolutePath(), - "-I", testDir + "/snv_feature_mapper_input.bam", - "--copy-attr", "RG", - "--copy-attr", "AS,Integer,AS attribute, as copied", - "--copy-attr", "rq,Float", - "--limit-score", "100", - "--min-score", "0", - "--snv-identical-bases", "10", - "--debug-negatives", "false", - "--debug-read-name", "150451-BC94-0645901755", - "--no-edit-distance" - }; - - // run the tool - runCommandLine(args); // no assert, just make sure we don't throw - - // make sure we've generated the otuput file - Assert.assertTrue(outputFile.exists()); - - // walk the output and expected files, compare non-comment lines - if ( !UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ) { - IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile, "#"); - } - } - @Test public void testMapperThread() throws IOException { diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf deleted file mode 100644 index c011c84b2da..00000000000 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c4dec39f194156e50bbd628b95c887de4421a0059d5a0d7c4098c5c36af6daba -size 5468738 diff --git a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx b/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx deleted file mode 100644 index 2eb87babb35..00000000000 --- a/src/test/resources/large/featureMapping/snv_feature_mapper_no_edit_disance_output.vcf.idx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1e7cd26902848f511b47eca6c85d34daa8bacba41aa0d64062e2132bd61b1bf4 -size 120129 From 75deee8efda4761f20bbbd963673b27464e66ee1 Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Mon, 23 Sep 2024 16:32:27 +0300 Subject: [PATCH 13/14] featrure snapping haplotype/read creation cleanup --- .../featuremapping/FlowFeatureMapper.java | 258 +++++++++--------- 1 file changed, 123 insertions(+), 135 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index 8e73930a3bb..56151f6ff84 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -184,11 +184,6 @@ static class WriterMessage { @Argument(fullName = "threaded-writer", doc = "turn threaded writer on?", optional = true) public boolean threadedWriter = false; - static class TrimInfo { - int trimFrom; - int trimTo; - } - protected static class ReadContext implements Comparable { final GATKRead read; final ReferenceContext referenceContext; @@ -284,6 +279,124 @@ public int compareTo(MappedFeature o) { } } + // private utility class, for generating haplotypes and a read spanning the area around the feature + private class FeatureHaplotypes { + + final FlowBasedReadUtils.ReadGroupInfo rgInfo; + final FlowBasedHaplotype[] haplotypes = new FlowBasedHaplotype[2]; + final int fromBaseIndex; + final int toBaseIndex; + + private FeatureHaplotypes(MappedFeature fr, byte altBase) { + + // build bases for flow haplotypes + // NOTE!!!: this code assumes length of feature on read and reference is the same + // this is true for SNP but not for INDELs - it will have to be re-written! + // TODO: write for INDEL + byte[] bases = fr.read.getBasesNoCopy(); + int offset = fr.readBasesOffset; + int refStart = fr.start; + int refModOfs = 0; + + // install alt base? + byte orgBase = 0; + if ( altBase != 0 ) { + orgBase = fr.refBases[0]; + fr.refBases[0] = altBase; + } + + int altOffset = offset; + if ( offset > 0 ) { + // reach into hmer before + offset--; + refModOfs++; + refStart--; + + // extend until start of hmer + final byte hmerBase = bases[offset]; + while ( offset > 0 && bases[offset-1] == hmerBase ) { + offset--; + refModOfs++; + refStart--; + } + } + + // build two haplotypes with existing data from the read + final int trimTo = Math.min(altOffset + fr.readBases.length + HAPLOTYPE_OPTIMIZED_SIZE, bases.length); + final byte[] sAltBases = Arrays.copyOfRange(bases, offset, trimTo); + final byte[] sRefBases = Arrays.copyOf(sAltBases, sAltBases.length); + + // verify that we are correctly positioned + if ( sRefBases[refModOfs] != fr.readBases[0] ) { + logger.warn("sRefBases[refModOfs] != fr.readBases[0] : " + sRefBases[refModOfs] + " != " + fr.readBases[0]); + } + + // restore reference in ref haplotype + System.arraycopy(fr.refBases, 0, sRefBases, refModOfs, fr.refBases.length); + + // construct haplotypes + final SimpleInterval genomeLoc = new SimpleInterval(fr.read.getContig(), refStart, refStart + sAltBases.length - 1); + final Cigar cigar = new Cigar(); + cigar.add(new CigarElement(sAltBases.length, CigarOperator.M)); + final Haplotype altHaplotype = new Haplotype(sAltBases, false); + final Haplotype refHaplotype = new Haplotype(sRefBases, true); + altHaplotype.setGenomeLocation(genomeLoc); + refHaplotype.setGenomeLocation(genomeLoc); + altHaplotype.setCigar(cigar); + refHaplotype.setCigar(cigar); + + // prepare flow based haplotypes + rgInfo = FlowBasedReadUtils.getReadGroupInfo(getHeaderForReads(), fr.read); + haplotypes[0] = new FlowBasedHaplotype(altHaplotype, rgInfo.flowOrder); + haplotypes[1] = new FlowBasedHaplotype(refHaplotype, rgInfo.flowOrder); + + // restore changes + if ( altBase != 0 ) { + fr.refBases[0] = orgBase; + } + + // establish span + fromBaseIndex = offset; + toBaseIndex = trimTo - 1; + } + + private FlowBasedRead spanningRead(final GATKRead read) { + + // access origin arrays + final byte[] bases = read.getBasesNoCopy(); + final byte[] quals = read.getBaseQualitiesNoCopy(); + final byte[] tp = read.getAttributeAsByteArray("tp"); + final String t0 = read.getAttributeAsString("t0"); + + // trim + final byte[] trimmed_bases = Arrays.copyOfRange(bases, fromBaseIndex, toBaseIndex + 1); + final byte[] trimmed_quals = Arrays.copyOfRange(quals, fromBaseIndex, toBaseIndex + 1); + final byte[] trimmed_tp = Arrays.copyOfRange(tp, fromBaseIndex, toBaseIndex + 1); + final String trimmed_t0 = t0 != null ? t0.substring(fromBaseIndex, toBaseIndex + 1) : null; + + // build read + final SAMRecord samRecord = new SAMRecord(getHeaderForReads()); + samRecord.setReadBases(trimmed_bases); + samRecord.setBaseQualities(trimmed_quals); + samRecord.setAttribute("tp", trimmed_tp); + if ( trimmed_t0 != null ) { + samRecord.setAttribute("t0", trimmed_t0); + } + samRecord.setAttribute("RG", read.getAttributeAsString("RG")); + samRecord.setCigarString("" + (toBaseIndex - fromBaseIndex + 1) + "M"); + final GATKRead gatkRead = new SAMRecordToGATKReadAdapter(samRecord); + + // build flow based read + final FlowBasedRead flowRead = new FlowBasedRead(gatkRead, rgInfo.flowOrder, rgInfo.maxClass, fbargs); + + return flowRead; + } + + public FlowBasedHaplotype[] spanningHaplotypes() { + return haplotypes; + } + } + // locals private VariantContextWriter vcfWriter; final private PriorityQueue featureQueue = new PriorityQueue<>(); @@ -335,7 +448,7 @@ public void run() { Thread.currentThread().setUncaughtExceptionHandler(getUncaughtExceptionHandler()); } - // if using threaded walker extend reference cache to avoid thrushing + // if using threaded walker extend reference cache to avoid thrashing if ( threadedWalker ) { CachingIndexedFastaSequenceFile.requestedCacheSize = CachingIndexedFastaSequenceFile.DEFAULT_CACHE_SIZE * CACHE_SIZE_FACTOR; } @@ -589,10 +702,9 @@ private double scoreFeature(final MappedFeature fr) { private double scoreFeature(final MappedFeature fr, byte altBase) { // build haplotypes - final FlowBasedReadUtils.ReadGroupInfo rgInfo = FlowBasedReadUtils.getReadGroupInfo(getHeaderForReads(), fr.read); - final TrimInfo trimInfo = new TrimInfo(); - final FlowBasedHaplotype[] haplotypes = buildHaplotypes(fr, rgInfo.flowOrder, altBase, trimInfo); - final FlowBasedRead flowRead = buildTrimmedFlowBasedRead(fr.read, trimInfo, rgInfo); + final FeatureHaplotypes featureHaplotypes = new FeatureHaplotypes(fr, altBase); + final FlowBasedHaplotype[] haplotypes = featureHaplotypes.spanningHaplotypes(); + final FlowBasedRead flowRead = featureHaplotypes.spanningRead(fr.read); // check lengths if ( haplotypes[0].length() != haplotypes[1].length() ) { @@ -629,19 +741,6 @@ private double scoreFeature(final MappedFeature fr, byte altBase) { logger.info("refrHapKey: " + haplotypes[1].getKeyLength() + " " + Arrays.toString(haplotypes[1].getKey())); computeLikelihoodLocal(flowRead, haplotypes[1], hapKeyLength, true); logger.info("score: " + score); - - // analyze read - final FlowBasedRead flowRead2 = new FlowBasedRead(fr.read, rgInfo.flowOrder, rgInfo.maxClass, fbargs); - final int[] key2 = flowRead2.getKey(); - for ( int i = 0 ; i < key2.length ; i++ ) { - final double p1 = flowRead2.getProb(i, key2[i]); - for ( int j = 0 ; j < rgInfo.maxClass ; j++ ) { - final double p2 = flowRead2.getProb(i, j); - if ( p2 > p1 ) - logger.info(String.format("prob at %s key[%d]=%d, %f is lower than at %d which is %f", - flowRead2.getName(), i, key2[i], p1, j, p2)); - } - } } if ( score < 0 && !fmArgs.keepNegatives && score != -1.0 ) { @@ -667,7 +766,7 @@ public static double computeLikelihoodLocal(final FlowBasedRead read, final Flow // debug support StringBuffer debugMessage = null; if ( debug ) - debugMessage = new StringBuffer(Integer.toString(startingPoint) + " hmer prob |"); + debugMessage = new StringBuffer(startingPoint + " hmer prob |"); double result = 0 ; for (int i = 0; i < read.getKeyLength(); i++) { int index = i + startingPoint; @@ -700,85 +799,6 @@ public static double computeLikelihoodLocal(final FlowBasedRead read, final Flow return result; } - private FlowBasedHaplotype[] buildHaplotypes(final MappedFeature fr, final String flowOrder, byte altBase, TrimInfo trimInfo) { - - // build bases for flow haplotypes - // NOTE!!!: this code assumes length of feature on read and reference is the same - // this is true for SNP but not for INDELs - it will have to be re-written! - // TODO: write for INDEL - byte[] bases = fr.read.getBasesNoCopy(); - int offset = fr.readBasesOffset; - int refStart = fr.start; - int refModOfs = 0; - - // install alt base? - byte orgBase = 0; - if ( altBase != 0 ) { - orgBase = fr.refBases[0]; - fr.refBases[0] = altBase; - } - - int altOffset = offset; - if ( offset > 0 ) { - // reach into hmer before - offset--; - refModOfs++; - refStart--; - - // extend until start of hmer - final byte hmerBase = bases[offset]; - while ( offset > 0 && bases[offset-1] == hmerBase ) { - offset--; - refModOfs++; - refStart--; - } - } - - // build two haplotypes with existing data from the read - final int trimTo = Math.min(altOffset + fr.readBases.length + HAPLOTYPE_OPTIMIZED_SIZE, bases.length); - final byte[] sAltBases = Arrays.copyOfRange(bases, offset, trimTo); - final byte[] sRefBases = Arrays.copyOf(sAltBases, sAltBases.length); - - // verify that we are correctly positioned - if ( sRefBases[refModOfs] != fr.readBases[0] ) { - logger.warn("sRefBases[refModOfs] != fr.readBases[0] : " + sRefBases[refModOfs] + " != " + fr.readBases[0]); - } - - // restore reference in ref haplotype - System.arraycopy(fr.refBases, 0, sRefBases, refModOfs, fr.refBases.length); - - // construct haplotypes - final SimpleInterval genomeLoc = new SimpleInterval(fr.read.getContig(), refStart, refStart + sAltBases.length - 1); - final Cigar cigar = new Cigar(); - cigar.add(new CigarElement(sAltBases.length, CigarOperator.M)); - final Haplotype altHaplotype = new Haplotype(sAltBases, false); - final Haplotype refHaplotype = new Haplotype(sRefBases, true); - altHaplotype.setGenomeLocation(genomeLoc); - refHaplotype.setGenomeLocation(genomeLoc); - altHaplotype.setCigar(cigar); - refHaplotype.setCigar(cigar); - - // prepare flow based haplotypes - final FlowBasedHaplotype[] result = { - new FlowBasedHaplotype(altHaplotype, flowOrder), - new FlowBasedHaplotype(refHaplotype, flowOrder) - }; - - // restore changes - if ( altBase != 0 ) { - fr.refBases[0] = orgBase; - } - - // returning trimming info - if ( trimInfo != null ) { - trimInfo.trimFrom = offset; - trimInfo.trimTo = trimTo - 1; - } - - // return - return result; - } - private boolean filterFeature(final MappedFeature fr) { if ( fmArgs.excludeNaNScores && Double.isNaN(fr.score) ) { @@ -884,37 +904,5 @@ private FeatureMapper buildMapper() { throw new GATKException("unsupported mappingFeature: " + fmArgs.mappingFeature); } } - - private FlowBasedRead buildTrimmedFlowBasedRead(final GATKRead read, final TrimInfo trimInfo, final FlowBasedReadUtils.ReadGroupInfo rgInfo) { - - // access origin arrays - final byte[] bases = read.getBasesNoCopy(); - final byte[] quals = read.getBaseQualitiesNoCopy(); - final byte[] tp = read.getAttributeAsByteArray("tp"); - final String t0 = read.getAttributeAsString("t0"); - - // trim - final byte[] trimmed_bases = Arrays.copyOfRange(bases, trimInfo.trimFrom, trimInfo.trimTo + 1); - final byte[] trimmed_quals = Arrays.copyOfRange(quals, trimInfo.trimFrom, trimInfo.trimTo + 1); - final byte[] trimmed_tp = Arrays.copyOfRange(tp, trimInfo.trimFrom, trimInfo.trimTo + 1); - final String trimmed_t0 = t0 != null ? t0.substring(trimInfo.trimFrom, trimInfo.trimTo + 1) : null; - - // build read - final SAMRecord samRecord = new SAMRecord(getHeaderForReads()); - samRecord.setReadBases(trimmed_bases); - samRecord.setBaseQualities(trimmed_quals); - samRecord.setAttribute("tp", trimmed_tp); - if ( trimmed_t0 != null ) { - samRecord.setAttribute("t0", trimmed_t0); - } - samRecord.setAttribute("RG", read.getAttributeAsString("RG")); - samRecord.setCigarString("" + (trimInfo.trimTo - trimInfo.trimFrom + 1) + "M"); - final GATKRead gatkRead = new SAMRecordToGATKReadAdapter(samRecord); - - // build flow based read - final FlowBasedRead flowRead = new FlowBasedRead(gatkRead, rgInfo.flowOrder, rgInfo.maxClass, fbargs); - - return flowRead; - } } From 511565ddcf95ce427465fa2abd7d5bd327da00a4 Mon Sep 17 00:00:00 2001 From: Dror Kessler Date: Mon, 23 Sep 2024 17:14:28 +0300 Subject: [PATCH 14/14] further refactoring --- .../featuremapping/FlowFeatureMapper.java | 22 +++++-------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java index 56151f6ff84..021838833c2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/featuremapping/FlowFeatureMapper.java @@ -362,25 +362,13 @@ private FeatureHaplotypes(MappedFeature fr, byte altBase) { private FlowBasedRead spanningRead(final GATKRead read) { - // access origin arrays - final byte[] bases = read.getBasesNoCopy(); - final byte[] quals = read.getBaseQualitiesNoCopy(); - final byte[] tp = read.getAttributeAsByteArray("tp"); - final String t0 = read.getAttributeAsString("t0"); - - // trim - final byte[] trimmed_bases = Arrays.copyOfRange(bases, fromBaseIndex, toBaseIndex + 1); - final byte[] trimmed_quals = Arrays.copyOfRange(quals, fromBaseIndex, toBaseIndex + 1); - final byte[] trimmed_tp = Arrays.copyOfRange(tp, fromBaseIndex, toBaseIndex + 1); - final String trimmed_t0 = t0 != null ? t0.substring(fromBaseIndex, toBaseIndex + 1) : null; - // build read final SAMRecord samRecord = new SAMRecord(getHeaderForReads()); - samRecord.setReadBases(trimmed_bases); - samRecord.setBaseQualities(trimmed_quals); - samRecord.setAttribute("tp", trimmed_tp); - if ( trimmed_t0 != null ) { - samRecord.setAttribute("t0", trimmed_t0); + samRecord.setReadBases(Arrays.copyOfRange(read.getBasesNoCopy(), fromBaseIndex, toBaseIndex + 1)); + samRecord.setBaseQualities(Arrays.copyOfRange(read.getBaseQualitiesNoCopy(), fromBaseIndex, toBaseIndex + 1)); + samRecord.setAttribute("tp", Arrays.copyOfRange(read.getAttributeAsByteArray("tp"), fromBaseIndex, toBaseIndex + 1)); + if ( read.hasAttribute("t0") ) { + samRecord.setAttribute("t0", read.getAttributeAsString("t0").substring(fromBaseIndex, toBaseIndex + 1)); } samRecord.setAttribute("RG", read.getAttributeAsString("RG")); samRecord.setCigarString("" + (toBaseIndex - fromBaseIndex + 1) + "M");