Skip to content

Commit 27dcb61

Browse files
FIX: Reintroduce sparkPar parameter
* The sparkPar parameter was commented out * Led to non-deterministic test results due to machine-dependent partitioning * Updated test case to print normalised importances
1 parent b686d75 commit 27dcb61

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

src/main/scala/au/csiro/variantspark/api/VSContext.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ class VSContext(val spark: SparkSession) extends SqlContextHolder {
3838
* @param inputFile path to file or directory with VCF files to load
3939
* @return FeatureSource loaded from the VCF file
4040
*/
41-
def importVCF(inputFile: String, imputationStrategy: String = "none"): FeatureSource = {
41+
def importVCF(inputFile: String, imputationStrategy: String = "none",
42+
sparkPar: Int = 0): FeatureSource = {
4243
val vcfSource =
43-
VCFSource(sc, inputFile)
44-
// VCFSource(sc.textFile(inputFile, if (sparkPar > 0) sparkPar else sc.defaultParallelism))
44+
VCFSource(sc.textFile(inputFile, if (sparkPar > 0) sparkPar else sc.defaultParallelism))
4545
VCFFeatureSource(vcfSource, imputationStrategy)
4646
}
4747

src/test/scala/au/csiro/variantspark/api/ImportanceApiTest.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ class ImportanceApiTest extends SparkTest {
1919
val params = RandomForestParams(seed = 17L)
2020
val rfModel = RFModelTrainer.trainModel(features, label, params, 200, 50)
2121
val impAnalysis = new ImportanceAnalysis(sqlContext, features, rfModel)
22-
val top10Variables = impAnalysis.importantVariables(10)
22+
val top10Variables = impAnalysis.importantVariables(10, normalized = true)
23+
top10Variables.foreach(println _)
2324
assertEquals(10, top10Variables.size)
2425
assertEquals("22_16050678_C_T", top10Variables.head._1)
2526
}

0 commit comments

Comments
 (0)