33import htsjdk .beta .io .bundle .*;
44import htsjdk .beta .plugin .registry .HaploidReferenceResolver ;
55import htsjdk .beta .plugin .variants .VariantsBundle ;
6- import htsjdk .io .HtsPath ;
76import htsjdk .samtools .util .FileExtensions ;
87import org .apache .logging .log4j .LogManager ;
98import org .apache .logging .log4j .Logger ;
2322/**
2423 * Create a bundle (JSON) file for use with a GATK tool.
2524 *
26- * Since most bundles will contain a primary resource plus at least one secondary resource (typically an index),
25+ * Since most bundles need to contain a primary resource plus at least one secondary resource (typically an index),
2726 * the tool will attempt to infer standard secondary resources(s) for a given primary resource if no secondary resource
2827 * is explicitly provided on the command line. Inferred secondary resources are automatically added to the resulting
2928 * bundle. Secondary resource inference can be suppressed by using the --suppress-resource-resolution argument.
3029 *
3130 * Each resource in a bundle must have an associated content type tag. Content types for each resource are either
3231 * specified on the command line via argument tags, or inferred by the tool. For the primary and secondary resources,
3332 * when no content type argument tag is provided, the tool will attempt to infer the content type from the file
34- * extension. However, the content type for "other" resources (resources that are nether primary nor secondary resources)
35- * are NEVER inferred, and must always include a content type argument tag.
33+ * extension. However, the content type for "other" resources (resources that are nether primary nor secondary
34+ * resources) are NEVER inferred, and must always include a content type argument tag.
3635 *
3736 * Bundle output file names must end with the suffix ".json".
3837 *
39- * Common examples:
38+ * In general, content types can be any string, but there are well known content types that must be used when creating
39+ * bundles for tools that expect well known resources types, such as a VCF, a VCF index, a .fasta file, or a reference
40+ * dictionary file. The common well known content types are:
41+ *
42+ * - "CT_VARIANT_CONTEXTS": a VCF file
43+ * - "CT_VARIANTS_INDEX: VCF" index file
44+ *
45+ * - "CT_HAPLOID_REFERENCE": fasta reference file
46+ * - "CT_HAPLOID_REFERENCE_INDEX": fasta index file
47+ * - "CT_HAPLOID_REFERENCE_DICTIONARY": fasta dictionary file
48+ *
49+ * Common bundle creation examples:
4050 *
4151 * VCF Bundles:
4252 *
43- * 1) Create a resource bundle for a VCF. Let the tool determine the content types, and resolve the secondary resources
44- * (which for vcfs is the companion index) automatically by finding a sibling index file. If the sibling file cannot
45- * be found, an exception wil lbe thrown:
53+ * 1) Create a resource bundle for a VCF from just the VCF, letting the tool resolve the secondary (index) resource by
54+ * automatically finding the sibling index file, and letting the tool determine the content types. If the sibling index
55+ * file cannot be found, an exception will be thrown. Resulting bundle contains the VCF and associated index.
56+ *
57+ * CreateBundle \
58+ * --primary path/to/my.vcf \
59+ * --output mybundle.json
60+ *
61+ * The exact same bundle could be created manually by specifying both the resources and the content types explicitly:
62+ *
63+ * CreateBundle \
64+ * --primary:CT_VARIANT_CONTEXTS path/to/my.vcf \
65+ * --secondary:CT_VARIANTS_INDEX path/to/my.vcf.idx \
66+ * --output mybundle.json
67+ *
68+ * 2) Create a resource bundle for a VCF from just the VCF, but suppress automatic resolution of the secondary
69+ * resources. Let the tool determine the content types. The resulting bundle will contain only the vcf resource:
70+ *
71+ * CreateBundle \
72+ * --primary path/to/my.vcf \
73+ * --suppress-resource-resolution \
74+ * --output mybundle.json
75+ *
76+ * 3) Create a resource bundle for a VCF, but specify the VCF AND the secondary index resource explicitly (which
77+ * suppresses automatic secondary resolution). This is useful when the VCF and index are not in the same directory.
78+ * Let the tool determine the content types. The resulting bundle will contain the VCF and index resources:
79+ *
80+ * CreateBundle \
81+ * --primary path/to/my.vcf \
82+ * --secondary some/other/path/to/vcd.idx \
83+ * --output mybundle.json
84+ *
85+ * 4) Create a resource bundle for a VCF, but specify the VCF AND the secondary index resource explicitly (this
86+ * is useful when the VCF and index are not in the same directory), and specify the content types explicitly via
87+ * command line argument tags. The resulting bundle will contain the VCF and index resources.
88+ *
89+ * CreateBundle \
90+ * --primary:CT_VARIANT_CONTEXTS path/to/my.vcf \
91+ * --secondary:CT_VARIANTS_INDEX some/other/path/to/vcd.idx \
92+ * --output mybundle.json
93+ *
94+ * Reference bundles:
95+ *
96+ * 1) Create a resource bundle for a reference from just the .fasta, letting the tool resolve the secondary
97+ * (index and dictionary) resource by automatically finding the sibling files, and determining the content types.
98+ * If the sibling index file cannot be found, an exception will be thrown. The resulting bundle will contain the
99+ * reference, index, and dictionary.
46100 *
47- * CreateBundle --primary path/to/my.vcf --output mybundle.json
101+ * CreateBundle \
102+ * --primary path/to/my.fasta \
103+ * --output mybundle.json
48104 *
49- * 2) Create a resource bundle for a VCF. Let the tool determine the content types, but suppress resolution of the secondary
50- * resources (which for vcfs is the companion index). The resulting bundle will contain only the vcf resource:
105+ * 2) Create a resource bundle for a reference from just the .fasta, but suppress resolution of the secondary index and
106+ * dictionary resources). Let the tool determine the content type. The resulting bundle will contain only the .fasta
107+ * resource:
51108 *
52- * CreateBundle --primary path/to/my.vcf --output mybundle.json
109+ * CreateBundle \
110+ * --primary path/to/my.fasta \
111+ * --suppress-resource-resolution \
112+ * --output mybundle.json
53113 *
54- * 3) Create a resource bundle for a VCF. Let the tool determine the content type, but specify the secondary
55- * index resource explicitly (which suppresses secondary resolution). The resulting bundle will contain the vcf
56- * and index resources:
114+ * 3) Create a resource bundle for a fasta, but specify the fasta AND the secondary index and dictionary resources
115+ * explicitly (which suppresses automatic secondary resolution). Let the tool determine the content types. The
116+ * resulting bundle will contain the fasta, index and dictionary resources:
57117 *
58- * CreateBundle --primary path/to/my.vcf --secondary some/other/path/to/vcd.idx --output mybundle.json
118+ * CreateBundle \
119+ * --primary path/to/my.fasta \
120+ * --secondary some/other/path/to/my.fai \
121+ * --secondary some/other/path/to/my.dict \
122+ * --output mybundle.json
59123 *
60- * Reference bundles: create a bundle using explicitly provided values and content types for the primary and
61- * secondary resources:
124+ * 4) Create a resource bundle for a fasta, but specify the fasta, index and dictionary resources and the content
125+ * types explicitly. The resulting bundle will contain the fasta, index and dictionary resources:
62126 *
63- * CreateBundle --primary: path/to/my.fa
127+ * CreateBundle \
128+ * --primary:CT_HAPLOID_REFERENCE path/to/my.fasta \
129+ * --secondary:CT_HAPLOID_REFERENCE_INDEX some/other/path/to/my.fai \
130+ * --secondary:CT_HAPLOID_REFERENCE_DICTIONARY some/other/path/to/my.dict \
131+ * --output mybundle.json
64132 */
65133@ DocumentedFeature
66134@ CommandLineProgramProperties (
@@ -110,7 +178,7 @@ public class CreateBundle extends CommandLineProgram {
110178 private enum BundleType {
111179 VCF ,
112180 REFERENCE ,
113- OTHER
181+ CUSTOM
114182 }
115183 private BundleType outputBundleType ;
116184
@@ -129,7 +197,7 @@ protected Object doWork() {
129197 final Bundle bundle = switch (outputBundleType ) {
130198 case VCF -> createVCFBundle ();
131199 case REFERENCE -> createHaploidReferenceBundle ();
132- case OTHER -> createOtherBundle ();
200+ case CUSTOM -> createOtherBundle ();
133201 };
134202 writer .write (BundleJSON .toJSON (bundle ));
135203 } catch (final IOException e ) {
@@ -153,7 +221,7 @@ private BundleType determinePrimaryContentType() {
153221 logger .info (String .format ("Primary input content type %s for %s not recognized. A bundle will be created using content types from the provided argument tags." ,
154222 primaryContentTag ,
155223 primaryResource ));
156- bundleType = BundleType .OTHER ;
224+ bundleType = BundleType .CUSTOM ;
157225 }
158226 } else {
159227 logger .info (String .format ("A content type for the primary input was not provided. Attempting to infer the content type from the %s extension." , primaryResource ));
0 commit comments