Skip to content

Commit a20d9b6

Browse files
committed
proofreading
1 parent aaa16cd commit a20d9b6

File tree

10 files changed

+42
-38
lines changed

10 files changed

+42
-38
lines changed

src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/BaseRecalibratorSparkFn.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public static RecalibrationReport apply(final JavaPairRDD<GATKRead, Iterable<GAT
3838
return Iterators.singletonIterator(bqsr.getRecalibrationTables());
3939
});
4040

41-
final RecalibrationTables emptyRecalibrationTable = new RecalibrationTables(new BQSRCovariateList(recalArgs, header));
41+
final RecalibrationTables emptyRecalibrationTable = new RecalibrationTables(new BQSRCovariateList(recalArgs, header)); // tsato: Potentially here!
4242
final RecalibrationTables combinedTables = unmergedTables.treeAggregate(emptyRecalibrationTable,
4343
RecalibrationTables::inPlaceCombine,
4444
RecalibrationTables::inPlaceCombine,

src/main/java/org/broadinstitute/hellbender/transformers/BQSRReadTransformer.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ public final class BQSRReadTransformer implements ReadTransformer {
3939

4040
//These fields are created to avoid redoing these calculations for every read
4141
private final int totalCovariateCount;
42-
private final int additionalCovariateCount;
4342

4443
public static final int BASE_SUBSTITUTION_INDEX = EventType.BASE_SUBSTITUTION.ordinal();
4544

@@ -101,7 +100,7 @@ private BQSRReadTransformer( final SAMFileHeader header, final RecalibrationTabl
101100

102101
this.quantizedQuals = quantizationInfo.getQuantizedQuals();
103102
this.totalCovariateCount = bqsrCovariateList.size();
104-
additionalCovariateCount = bqsrCovariateList.getAdditionalCovariates().size();
103+
final int additionalCovariateCount = bqsrCovariateList.getAdditionalCovariates().size();
105104

106105
//Note: We pre-create the varargs arrays that will be used in the calls. Otherwise we're spending a lot of time allocating those int[] objects
107106
this.recalDatumsForAdditionalCovariates = new RecalDatum[additionalCovariateCount];

src/main/java/org/broadinstitute/hellbender/utils/recalibration/RecalibrationArgumentCollection.java

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@
99

1010
import java.io.File;
1111
import java.io.Serializable;
12-
import java.util.*;
12+
import java.util.ArrayList;
13+
import java.util.LinkedHashMap;
14+
import java.util.List;
15+
import java.util.Map;
16+
import java.util.Set;
17+
import java.util.HashSet;
1318

1419
/**
1520
* A collection of the arguments that are used for BQSR. Used to be common to both CovariateCounterWalker and TableRecalibrationWalker.
@@ -41,7 +46,6 @@ public final class RecalibrationArgumentCollection implements Serializable {
4146
* Also, unless --no-standard-covariates is specified, the Cycle and Context covariates are standard and are included by default.
4247
* Use the --list argument to see the available covariates.
4348
*
44-
* e.g.
4549
*/
4650
@Argument(fullName = COVARIATES_LONG_NAME, shortName = COVARIATES_SHORT_NAME, doc = "One or more covariates to be used in the recalibration. Can be specified multiple times", optional = true)
4751
public List<String> COVARIATES = new ArrayList<>();
@@ -249,35 +253,38 @@ public Map<String,? extends CharSequence> compareReportArguments(final Recalibra
249253
}
250254

251255
/**
252-
* Compares the covariate report lists.
256+
* A helper method for {@link #compareReportArguments}.
257+
* Compares the covariate report lists and update <code>diffs</code> with
258+
* key = "covariate" and
259+
* value = a message that explains the difference to the end user.
253260
*
254-
* @param diffs map where to annotate the difference.
261+
* @param diffs the map to be updated by side-effect by this method.
255262
* @param other the argument collection to compare against.
256263
* @param thisRole the name for this argument collection that makes sense to the user.
257264
* @param otherRole the name for the other argument collection that makes sense to the end user.
258265
*
259266
* @return <code>true</code> if a difference was found.
260267
*/
261-
private boolean compareRequestedCovariates(final Map<String,String> diffs,
262-
final RecalibrationArgumentCollection other, final String thisRole, final String otherRole) {
268+
private boolean compareRequestedCovariates(final Map<String,String> diffs, final RecalibrationArgumentCollection other,
269+
final String thisRole, final String otherRole) {
263270

264271
final Set<String> beforeNames = new HashSet<>(this.COVARIATES.size());
265272
final Set<String> afterNames = new HashSet<>(other.COVARIATES.size());
266273
beforeNames.addAll(this.COVARIATES);
267274
afterNames.addAll(other.COVARIATES);
268-
final Set<String> intersect = new HashSet<>(Math.min(beforeNames.size(), afterNames.size()));
269-
intersect.addAll(beforeNames);
270-
intersect.retainAll(afterNames);
275+
final Set<String> intersection = new HashSet<>(Math.min(beforeNames.size(), afterNames.size()));
276+
intersection.addAll(beforeNames);
277+
intersection.retainAll(afterNames);
271278

272279
String diffMessage = null;
273-
if (intersect.size() == 0) { // In practice this is not possible due to required covariates but...
280+
if (intersection.size() == 0) { // In practice this is not possible due to required covariates but...
274281
diffMessage = String.format("There are no common covariates between '%s' and '%s'"
275282
+ " recalibrator reports. Covariates in '%s': {%s}. Covariates in '%s': {%s}.", thisRole, otherRole,
276283
thisRole, String.join(", ", this.COVARIATES),
277284
otherRole, String.join(",", other.COVARIATES));
278-
} else if (intersect.size() != beforeNames.size() || intersect.size() != afterNames.size()) {
279-
beforeNames.removeAll(intersect);
280-
afterNames.removeAll(intersect);
285+
} else if (intersection.size() != beforeNames.size() || intersection.size() != afterNames.size()) {
286+
beforeNames.removeAll(intersection);
287+
afterNames.removeAll(intersection);
281288
diffMessage = String.format("There are differences in the set of covariates requested in the"
282289
+ " '%s' and '%s' recalibrator reports. "
283290
+ " Exclusive to '%s': {%s}. Exclusive to '%s': {%s}.", thisRole, otherRole,

src/main/java/org/broadinstitute/hellbender/utils/recalibration/covariates/BQSRCovariateList.java

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,17 @@
1515
import java.util.stream.Collectors;
1616

1717
/**
18-
* Represents a list of BQSR covariates.
18+
* Represents a list of BQSR covariates. Formerly called StandardCovariateList.
1919
*
2020
* Note: the first two covariates ({@link ReadGroupCovariate} and {@link QualityScoreCovariate})
2121
* are special in the way that they are represented in the BQSR recalibration table, and are
2222
* always required. We call these the "required covariates."
2323
*
2424
* The remaining covariates are called "additional covariates". The default additional covariates
2525
* are the context and cycle covariates, but the client can request others and/or disable the default
26-
* additional covariates. TSATO: non required? additional?
26+
* additional covariates.
27+
*
28+
* Also see the documentation in: {@link CustomCovariate}.
2729
*/
2830
public final class BQSRCovariateList implements Iterable<Covariate>, Serializable {
2931
private static final long serialVersionUID = 1L;
@@ -32,22 +34,21 @@ public final class BQSRCovariateList implements Iterable<Covariate>, Serializabl
3234

3335
private final ReadGroupCovariate readGroupCovariate;
3436
private final QualityScoreCovariate qualityScoreCovariate;
35-
private final List<Covariate> additionalCovariates; // additional covariates are [context, cycle, (custom covariates)]
37+
private final List<Covariate> additionalCovariates; // additional covariates are [context, cycle, { custom covariates }]
3638
private final List<Covariate> allCovariates;
3739

3840
private final Map<Class<? extends Covariate>, Integer> indexByClass;
3941

4042
private static final List<String> REQUIRED_COVARIATE_NAMES =
41-
Collections.unmodifiableList(Arrays.asList("ReadGroupCovariate", "QualityScoreCovariate")); // TODO: can I replace these with regex so the
43+
Collections.unmodifiableList(Arrays.asList("ReadGroupCovariate", "QualityScoreCovariate"));
4244

4345
private static final List<String> STANDARD_COVARIATE_NAMES =
4446
Collections.unmodifiableList(Arrays.asList("ContextCovariate", "CycleCovariate"));
4547

4648
public static final List<String> COVARIATE_PACKAGES =
4749
Collections.unmodifiableList(Arrays.asList("org.broadinstitute.hellbender.utils.recalibration.covariates"));
48-
public static final Set<Class<?>> DISCOVERED_COVARIATES; // tsato: can replace with ? extends Covariate right?
50+
public static final Set<Class<?>> DISCOVERED_COVARIATES;
4951

50-
// tsato: ported from StandardCovariateList; may not need this, or might be better to remove them
5152
public static final int READ_GROUP_COVARIATE_DEFAULT_INDEX = 0;
5253
public static final int BASE_QUALITY_COVARIATE_DEFAULT_INDEX = 1;
5354
public static final int CONTEXT_COVARIATE_DEFAULT_INDEX = 2;
@@ -61,7 +62,8 @@ public final class BQSRCovariateList implements Iterable<Covariate>, Serializabl
6162
classFinder.find(covariatePackage, Covariate.class);
6263
}
6364

64-
DISCOVERED_COVARIATES = Collections.unmodifiableSet(classFinder.getConcreteClasses()); // TODO: somehow this also finds BQSRCovariateListUnitTest
65+
DISCOVERED_COVARIATES = Collections.unmodifiableSet(classFinder.getConcreteClasses().stream()
66+
.filter(cl -> !cl.getSimpleName().isEmpty()).collect(Collectors.toSet())); // Filter out the annonymous UnitTest classes.
6567
}
6668

6769
public static List<String> getAllDiscoveredCovariateNames() {
@@ -119,7 +121,7 @@ private List<Covariate> createNonrequiredCovariates(final RecalibrationArgumentC
119121
result.addAll(createStandardCovariates(rac, allReadGroups));
120122
}
121123

122-
for ( final String customCovariates : rac.COVARIATES ) { // tsato: how did COVARIATES get populated?
124+
for ( final String customCovariates : rac.COVARIATES ) {
123125
if ( isRequiredCovariate(customCovariates) ) {
124126
logger.warn("Covariate " + customCovariates + " is a required covariate that is always on. Ignoring explicit request for it.");
125127
}
@@ -154,7 +156,7 @@ private Covariate createCovariate(final String covariateName, final Recalibratio
154156
if ( covariateName.equals(covariateClass.getSimpleName()) ) {
155157
try {
156158
@SuppressWarnings("unchecked")
157-
final Covariate covariate = ((Class<? extends Covariate>)covariateClass).getDeclaredConstructor().newInstance(); // tsato: understand what's going on here
159+
final Covariate covariate = ((Class<? extends Covariate>)covariateClass).getDeclaredConstructor().newInstance();
158160
covariate.initialize(rac, allReadGroups);
159161
return covariate;
160162
}

src/main/java/org/broadinstitute/hellbender/utils/recalibration/covariates/CustomCovariate.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22

33
/**
44
* An interface to classify Covariate classes into:
5+
*
56
* 1. Required (ReadGroup, QualityScore)
67
* 2. Standard (Cycle, Context)
7-
* 3. Custom (e.g. RepeatLength)
8-
*
9-
* 2 and 3 together are called "Additional" covariates in parts of the code.
8+
* 3. Custom (any covariates defined by the user e.g. RepeatLength)
109
*
10+
* We call 2 and 3 together the "additional" covariates.
1111
*/
1212
public interface CustomCovariate extends Covariate {
1313
}

src/main/java/org/broadinstitute/hellbender/utils/recalibration/covariates/QualityScoreCovariate.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ public int keyFromValue(final Object value) {
5555
}
5656

5757
@Override
58-
public int maximumKeyValue() { // shouldn't this be a static method or a constant?
58+
public int maximumKeyValue() {
5959
return QualityUtils.MAX_SAM_QUAL_SCORE;
6060
}
6161
}

src/main/java/org/broadinstitute/hellbender/utils/recalibration/covariates/RepeatLengthCovariate.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package org.broadinstitute.hellbender.utils.recalibration.covariates;
22

33
import htsjdk.samtools.SAMFileHeader;
4-
import htsjdk.samtools.SAMRecord;
54
import org.apache.commons.lang3.tuple.MutablePair;
65
import org.apache.commons.lang3.tuple.Pair;
76
import org.broadinstitute.hellbender.utils.BaseUtils;
@@ -68,7 +67,7 @@ public void recordValues( final GATKRead read, final SAMFileHeader header, final
6867
* @return a pair of byte array (the repeat unit) and integer (the number of repetitions).
6968
*/ // should really be a static method
7069
public Pair<byte[], Integer> findTandemRepeatUnits(byte[] readBases, int offset) {
71-
int numRepetitions = 0; // tsato: BW? backward?
70+
int numRepetitions = 0;
7271
byte[] bestBWRepeatUnit = new byte[]{readBases[offset]};
7372
for (int strSize = 1; strSize <= MAX_STR_UNIT_LENGTH; strSize++) {
7473
// fix repeat unit length
@@ -107,10 +106,10 @@ public Pair<byte[], Integer> findTandemRepeatUnits(byte[] readBases, int offset)
107106
for (int strLength = 1; strLength <= MAX_STR_UNIT_LENGTH; strLength++) {
108107
// fix repeat unit length
109108
// edge case: if candidate tandem repeat unit falls beyond edge of read, skip
110-
if (offset+strLength+1 > readBases.length) // tsato: is +1 needed...
109+
if (offset+strLength+1 > readBases.length)
111110
break;
112111

113-
// get forward repeat unit and # repeats (tsato: offset + 1 .... so we don't include the base at offset...)
112+
// get forward repeat unit and # repeats (offset + 1 .... so we don't include the base at offset...)
114113
byte[] forwardRepeatUnit = Arrays.copyOfRange(readBases, offset + 1, offset + strLength + 1);
115114
String forwardRepeatUnitStr = new String(forwardRepeatUnit, StandardCharsets.UTF_8);
116115
maxFW = GATKVariantContextUtils.findNumberOfRepetitions(forwardRepeatUnit, Arrays.copyOfRange(readBases, offset + 1, readBases.length), true);

src/test/java/org/broadinstitute/hellbender/tools/spark/BaseRecalibratorSparkIntegrationTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import java.io.File;
1717
import java.io.IOException;
18+
import java.io.Serializable;
1819
import java.util.Arrays;
1920

2021
public final class BaseRecalibratorSparkIntegrationTest extends CommandLineProgramTest {

src/test/java/org/broadinstitute/hellbender/utils/recalibration/RecalibrationReportUnitTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ private static RecalDatum createRandomRecalDatum(int maxObservations, int maxErr
143143
}
144144

145145
@Test
146-
public void testNonStandardCovariates(){ // tsato: study this test
146+
public void testNonStandardCovariates(){
147147
File file = new File(toolsTestDir + "nonstandard-covariates.table.gz");
148148
final RecalibrationReport report = new RecalibrationReport(file);
149149
final BQSRCovariateList covariates = report.getCovariates();

src/test/java/org/broadinstitute/hellbender/utils/recalibration/covariates/RepeatLengthCovariateUnitTest.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,20 +51,16 @@ public void testFindTandemRepeatUnits(final String readBaseString, final int off
5151
// remove the characters that were put in to make the input string easier to read
5252
final byte[] readBases = readBaseString.replaceAll("[()_]", "").getBytes();
5353
RepeatLengthCovariate repeatLengthCovariate = new RepeatLengthCovariate();
54-
// tsato: do I really need to initialize this...
5554
repeatLengthCovariate.initialize(new RecalibrationArgumentCollection(), Arrays.asList("yo"));
5655

5756
Pair<byte[], Integer> ans = repeatLengthCovariate.findTandemRepeatUnits(readBases, offset);
58-
// Pair<byte[], Integer> ans = RepeatLengthCovariate.detectSTR(readBases, offset, 8);
5957
byte[] repeatUnit = ans.getLeft();
6058
int repeatLength = ans.getRight();
6159

6260
// for debugging
6361
String repeatUnitStr = new String(repeatUnit, StandardCharsets.UTF_8);
6462

65-
int d = 3;
6663
Assert.assertEquals(repeatUnitStr, expectedRepeatUnit);
6764
Assert.assertEquals(repeatLength, expectedRepeatLength);
6865
}
69-
7066
}

0 commit comments

Comments
 (0)