33import htsjdk .beta .io .bundle .*;
44import htsjdk .beta .plugin .IOUtils ;
55import htsjdk .beta .plugin .variants .VariantsBundle ;
6+ import org .broadinstitute .barclay .argparser .CommandLineException ;
67import org .broadinstitute .hellbender .CommandLineProgramTest ;
78import org .broadinstitute .hellbender .cmdline .StandardArgumentDefinitions ;
89import org .broadinstitute .hellbender .engine .GATKPath ;
1314import java .util .ArrayList ;
1415import java .util .Arrays ;
1516import java .util .List ;
16- import java .util .stream .Collectors ;
1717
1818public class CreateBundleIntegrationTest extends CommandLineProgramTest {
1919
2020 // force our local paths to use absolute path names to make BundleResource and IOPath equality checks easier,
2121 // since once a bundle is round-tripped/serialized to JSON, the resources will always contain absolute path names
2222 // for local files
23+
24+ //NOTE: These variables are Strings, but they are initialized to Strings obtained by first creating a GATKPath,
25+ // and then calling getURIString on the resulting object. This is just shortcut to normalize them so they
26+ // match the strings that will be embedded in the bundles created by the CreateBundle tool (i.e., to have
27+ // full/absolute paths and protocol schemes).
2328 private final static String LOCAL_VCF = new GATKPath (getTestDataDir () + "/count_variants_withSequenceDict.vcf" ).getURIString ();
2429 private final static String LOCAL_VCF_IDX = new GATKPath (getTestDataDir () + "/count_variants_withSequenceDict.vcf.idx" ).getURIString ();
2530 private final static String LOCAL_VCF_GZIP = new GATKPath ("src/test/resources/large/NA24385.vcf.gz" ).getURIString ();
2631 private final static String LOCAL_VCF_TBI = new GATKPath ("src/test/resources/large/NA24385.vcf.gz.tbi" ).getURIString ();
2732 private final static String LOCAL_VCF_WITH_NO_INDEX = new GATKPath ("src/test/resources/org/broadinstitute/hellbender/tools/count_variants_withSequenceDict_noIndex.vcf" ).getURIString ();
28- private final static String CLOUD_VCF = "gs://hellbender/test/resources/ large/1000G.phase3.broad.withGenotypes.chr20.10100000.vcf" ;
29- private final static String CLOUD_VCF_IDX = "gs://hellbender/test/resources/ large/1000G.phase3.broad.withGenotypes.chr20.10100000.vcf.idx" ;
33+ private final static String CLOUD_VCF = GCS_GATK_TEST_RESOURCES + " large/1000G.phase3.broad.withGenotypes.chr20.10100000.vcf" ;
34+ private final static String CLOUD_VCF_IDX = GCS_GATK_TEST_RESOURCES + " large/1000G.phase3.broad.withGenotypes.chr20.10100000.vcf.idx" ;
3035
31- private final static String LOCAL_FASTA = "src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta" ;
32- private final static String LOCAL_FASTA_INDEX = "src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta.fai" ;
33- private final static String LOCAL_FASTA_DICT = "src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta. dict" ;
36+ private final static String LOCAL_FASTA = new GATKPath ( "src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta" ). getURIString () ;
37+ private final static String LOCAL_FASTA_INDEX = new GATKPath ( "src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta.fai" ). getURIString () ;
38+ private final static String LOCAL_FASTA_DICT = new GATKPath ( "src/test/resources/large/Homo_sapiens_assembly38.20.21.dict" ). getURIString () ;
3439
3540 private final static String CUSTOM_PRIMARY_CT = "primary_ct" ;
3641 private final static String CUSTOM_SECONDARY_CT = "secondary_ct" ;
@@ -41,22 +46,21 @@ public Object[][] bundleCases() {
4146 return new Object [][] {
4247 // primary, primary tag, secondary, secondary tag, other(s), other tag(s), suppressResourceResolution, expectedBundle
4348
44- // VCF bundle cases, with AUTOMATIC secondary resolution, and INFERRED content types
49+ // VCF bundle cases, with AUTOMATIC secondary resolution, and INFERRED primary content types
4550 {LOCAL_VCF , null , null , null , null , null , false , new VariantsBundle (new GATKPath (LOCAL_VCF ), new GATKPath (LOCAL_VCF_IDX ))},
46- {LOCAL_VCF , null , LOCAL_VCF_IDX , null , null , null , false , new VariantsBundle (new GATKPath (LOCAL_VCF ), new GATKPath (LOCAL_VCF_IDX ))},
51+ {LOCAL_VCF , null , LOCAL_VCF_IDX , BundleResourceType . CT_VARIANTS_INDEX , null , null , false , new VariantsBundle (new GATKPath (LOCAL_VCF ), new GATKPath (LOCAL_VCF_IDX ))},
4752 {LOCAL_VCF_GZIP , null , null , null , null , null , false , new VariantsBundle (new GATKPath (LOCAL_VCF_GZIP ), new GATKPath (LOCAL_VCF_TBI ))},
53+ {LOCAL_VCF_GZIP , null , LOCAL_VCF_TBI , BundleResourceType .CT_VARIANTS_INDEX , null , null , true , new VariantsBundle (new GATKPath (LOCAL_VCF_GZIP ), new GATKPath (LOCAL_VCF_TBI ))},
4854 {CLOUD_VCF , null , null , null , null , null , false , new VariantsBundle (new GATKPath (CLOUD_VCF ), new GATKPath (CLOUD_VCF_IDX ))},
49- {CLOUD_VCF , null , CLOUD_VCF_IDX , null , null , null , false , new VariantsBundle (new GATKPath (CLOUD_VCF ), new GATKPath (CLOUD_VCF_IDX ))},
55+ {CLOUD_VCF , null , CLOUD_VCF_IDX , BundleResourceType . CT_VARIANTS_INDEX , null , null , false , new VariantsBundle (new GATKPath (CLOUD_VCF ), new GATKPath (CLOUD_VCF_IDX ))},
5056
51- // VCF bundle cases, with SUPPRESSED secondary resolution, and INFERRED content types
57+ // VCF bundle cases, with SUPPRESSED secondary resolution, and INFERRED primary content types
5258 {LOCAL_VCF , null , null , null , null , null , true , new VariantsBundle (new GATKPath (LOCAL_VCF ))},
53- { LOCAL_VCF , null , LOCAL_VCF_IDX , null , null , null , true , new VariantsBundle ( new GATKPath ( LOCAL_VCF ), new GATKPath ( LOCAL_VCF_IDX ))},
59+ // local vcf that has no index, but since suppressSecondaryResourceResolution is true, we don't throw since we don't try to infer the index
5460 {LOCAL_VCF_WITH_NO_INDEX , null , null , null , null , null , true , new VariantsBundle (new GATKPath (LOCAL_VCF_WITH_NO_INDEX ))},
55- {LOCAL_VCF_GZIP , null , LOCAL_VCF_TBI , null , null , null , true , new VariantsBundle (new GATKPath (LOCAL_VCF_GZIP ), new GATKPath (LOCAL_VCF_TBI ))},
5661 {CLOUD_VCF , null , null , null , null , null , true , new VariantsBundle (new GATKPath (CLOUD_VCF ))},
57- {CLOUD_VCF , null , CLOUD_VCF_IDX , null , null , null , true , new VariantsBundle (new GATKPath (CLOUD_VCF ), new GATKPath (CLOUD_VCF_IDX ))},
5862
59- // VCF bundle cases, with AUTOMATIC secondary resolution, and EXPLICIT content types
63+ // VCF bundle cases, with AUTOMATIC secondary resolution, and EXPLICIT primary content types
6064 {LOCAL_VCF , BundleResourceType .CT_VARIANT_CONTEXTS , null , null , null , null , false , new VariantsBundle (new GATKPath (LOCAL_VCF ), new GATKPath (LOCAL_VCF_IDX ))},
6165 {LOCAL_VCF , BundleResourceType .CT_VARIANT_CONTEXTS , LOCAL_VCF_IDX , BundleResourceType .CT_VARIANTS_INDEX , null , null , false , new VariantsBundle (new GATKPath (LOCAL_VCF ), new GATKPath (LOCAL_VCF_IDX ))},
6266
@@ -70,9 +74,24 @@ public Object[][] bundleCases() {
7074 .addPrimary (new IOPathResource (new GATKPath (LOCAL_VCF ), BundleResourceType .CT_VARIANT_CONTEXTS ))
7175 .addSecondary (new IOPathResource (new GATKPath (LOCAL_VCF_IDX ), BundleResourceType .CT_VARIANTS_INDEX ))
7276 .addSecondary (new IOPathResource (new GATKPath (new GATKPath ("someVariantsCompanion.txt" ).getURIString ()), "someVariantsCT" ))
73- .build ()},
77+ .build ()
78+ },
7479
7580 // reference bundles
81+ { LOCAL_FASTA , null , null , null , null , null , false ,
82+ new BundleBuilder ()
83+ .addPrimary (new IOPathResource (new GATKPath (LOCAL_FASTA ), BundleResourceType .CT_HAPLOID_REFERENCE ))
84+ .addSecondary (new IOPathResource (new GATKPath (LOCAL_FASTA_INDEX ), BundleResourceType .CT_REFERENCE_INDEX ))
85+ .addSecondary (new IOPathResource (new GATKPath (LOCAL_FASTA_DICT ), BundleResourceType .CT_REFERENCE_DICTIONARY ))
86+ .build ()
87+ },
88+ { LOCAL_FASTA , BundleResourceType .CT_HAPLOID_REFERENCE , LOCAL_FASTA_INDEX , BundleResourceType .CT_REFERENCE_INDEX , Arrays .asList (LOCAL_FASTA_DICT ), Arrays .asList (BundleResourceType .CT_REFERENCE_DICTIONARY ), false ,
89+ new BundleBuilder ()
90+ .addPrimary (new IOPathResource (new GATKPath (LOCAL_FASTA ), BundleResourceType .CT_HAPLOID_REFERENCE ))
91+ .addSecondary (new IOPathResource (new GATKPath (LOCAL_FASTA_INDEX ), BundleResourceType .CT_REFERENCE_INDEX ))
92+ .addSecondary (new IOPathResource (new GATKPath (LOCAL_FASTA_DICT ), BundleResourceType .CT_REFERENCE_DICTIONARY ))
93+ .build ()
94+ },
7695
7796 // "custom" bundles
7897 {
@@ -105,17 +124,26 @@ public Object[][] negativeBundleCases() {
105124 return new Object [][] {
106125 // primary, primary tag, secondary, secondary tag, other(s), other tag(s), suppressIndexResolution, expectedBundle
107126
108- // no index file can be inferred
127+ // no vcf index file can be inferred
109128 {LOCAL_VCF_WITH_NO_INDEX , null , null , null , null , null , false , new VariantsBundle (new GATKPath (LOCAL_VCF_WITH_NO_INDEX ))},
129+ // vcf bundle with secondary/other content type not explicitly provided
130+ {LOCAL_VCF , BundleResourceType .CT_VARIANT_CONTEXTS , null , null , Arrays .asList ("other.txt" ), null , false , null },
131+ {LOCAL_VCF , null , LOCAL_VCF_IDX , null , null , null , false , new VariantsBundle (new GATKPath (LOCAL_VCF ), new GATKPath (LOCAL_VCF_IDX ))},
132+ {LOCAL_VCF , null , LOCAL_VCF_IDX , null , null , null , true , new VariantsBundle (new GATKPath (LOCAL_VCF ), new GATKPath (LOCAL_VCF_IDX ))},
133+ {LOCAL_VCF_GZIP , null , LOCAL_VCF_TBI , null , null , null , true , new VariantsBundle (new GATKPath (LOCAL_VCF_GZIP ), new GATKPath (LOCAL_VCF_TBI ))},
134+ {CLOUD_VCF , null , CLOUD_VCF_IDX , null , null , null , false , new VariantsBundle (new GATKPath (CLOUD_VCF ), new GATKPath (CLOUD_VCF_IDX ))},
135+ {CLOUD_VCF , null , CLOUD_VCF_IDX , null , null , null , true , new VariantsBundle (new GATKPath (CLOUD_VCF ), new GATKPath (CLOUD_VCF_IDX ))},
110136 // primary content type not provided, and cannot be inferred from the extension
111137 {"primaryFile.ext" , null , null , null , null , null , false , null },
112138 // secondary content type not provided
113139 {"primaryFile.ext" , CUSTOM_PRIMARY_CT , "secondaryFile.ext" , null , null , null , false , null },
114140
141+ // reference input with unknown content type specified
142+ { LOCAL_FASTA , null , LOCAL_FASTA_INDEX , "unknown" , null , null , false , null },
143+
115144 // other bundle with other content type not provided
116145 {"primaryFile.ext" , CUSTOM_PRIMARY_CT , "secondaryFile.ext" , CUSTOM_SECONDARY_CT , Arrays .asList ("other.txt" ), null , false , null },
117- // vcf bundle with other content type not provided
118- {LOCAL_VCF , BundleResourceType .CT_VARIANT_CONTEXTS , null , null , Arrays .asList ("other.txt" ), null , false , null },
146+
119147 };
120148 }
121149
@@ -145,6 +173,17 @@ public void testNegativeBundleCases(
145173 doCreateBundleTest (primaryInput , primaryInputTag , secondaryInput , secondaryInputTag , otherInputs , otherInputTags , suppressResourceResolution , expectedBundle );
146174 }
147175
176+ @ Test (expectedExceptions ={CommandLineException .class })
177+ public void testRequireBundleExtension () {
178+ final GATKPath outputPath = new GATKPath (createTempFile ("test" , ".bundle.BOGUS" ).getAbsolutePath ().toString ());
179+ final List <String > args = new ArrayList <>();
180+ args .add ("--" + StandardArgumentDefinitions .PRIMARY_RESOURCE_LONG_NAME );
181+ args .add (LOCAL_FASTA );
182+ args .add ("--" + StandardArgumentDefinitions .OUTPUT_LONG_NAME );
183+ args .add (outputPath .toString ());
184+ runCommandLine (args );
185+ }
186+
148187 private void doCreateBundleTest (
149188 final String primaryInput ,
150189 final String primaryInputTag ,
@@ -176,10 +215,6 @@ private void doCreateBundleTest(
176215 args .add ("--" + StandardArgumentDefinitions .OUTPUT_LONG_NAME );
177216 args .add (outputPath .toString ());
178217
179- System .out .println ();
180- System .out .println (args .stream ().collect (Collectors .joining ("\n " )));
181- System .out .println ();
182-
183218 runCommandLine (args );
184219
185220 final Bundle actualBundle = BundleJSON .toBundle (IOUtils .getStringFromPath (outputPath ), GATKPath ::new );
0 commit comments