11package org .broadinstitute .hellbender .engine ;
22
3+ import htsjdk .beta .io .bundle .Bundle ;
4+ import htsjdk .beta .io .bundle .BundleJSON ;
5+ import htsjdk .beta .io .bundle .BundleResource ;
6+ import htsjdk .beta .io .bundle .BundleResourceType ;
7+ import htsjdk .io .IOPath ;
38import htsjdk .samtools .SAMSequenceDictionary ;
49import htsjdk .samtools .util .IOUtil ;
510import htsjdk .samtools .util .Locatable ;
@@ -148,7 +153,7 @@ public FeatureDataSource(final File featureFile) {
148153 * generated name, and will look ahead the default number of bases ({@link #DEFAULT_QUERY_LOOKAHEAD_BASES})
149154 * during queries that produce cache misses.
150155 *
151- * @param featurePath path or URI to source of Features
156+ * @param featurePath path or URI to source of Features (may be a Bundle)
152157 */
153158 public FeatureDataSource (final String featurePath ) {
154159 this (featurePath , null , DEFAULT_QUERY_LOOKAHEAD_BASES , null );
@@ -159,7 +164,7 @@ public FeatureDataSource(final String featurePath) {
159164 * name. We will look ahead the default number of bases ({@link #DEFAULT_QUERY_LOOKAHEAD_BASES}) during queries
160165 * that produce cache misses.
161166 *
162- * @param featureFile file containing Features
167+ * @param featureFile file or Bundle containing Features
163168 * @param name logical name for this data source (may be null)
164169 */
165170 public FeatureDataSource (final File featureFile , final String name ) {
@@ -170,7 +175,7 @@ public FeatureDataSource(final File featureFile, final String name) {
170175 * Creates a FeatureDataSource backed by the provided File and assigns this data source the specified logical
171176 * name. We will look ahead the specified number of bases during queries that produce cache misses.
172177 *
173- * @param featureFile file containing Features
178+ * @param featureFile file or Bundle containing Features
174179 * @param name logical name for this data source (may be null)
175180 * @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
176181 */
@@ -181,7 +186,7 @@ public FeatureDataSource(final File featureFile, final String name, final int qu
181186 /**
182187 * Creates a FeatureDataSource backed by the resource at the provided path.
183188 *
184- * @param featurePath path to file or GenomicsDB url containing features
189+ * @param featurePath path to file or GenomicsDB url or Bundle containing features
185190 * @param name logical name for this data source (may be null)
186191 * @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
187192 * @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
@@ -195,7 +200,7 @@ public FeatureDataSource(final String featurePath, final String name, final int
195200 * Creates a FeatureDataSource backed by the provided FeatureInput. We will look ahead the specified number of bases
196201 * during queries that produce cache misses.
197202 *
198- * @param featureInput a FeatureInput specifying a source of Features
203+ * @param featureInput a FeatureInput specifying a source of Features (or a Bundle)
199204 * @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
200205 * @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
201206 * that produce this type of Feature. May be null, which results in an unrestricted search.
@@ -207,7 +212,7 @@ public FeatureDataSource(final FeatureInput<T> featureInput, final int queryLook
207212 /**
208213 * Creates a FeatureDataSource backed by the resource at the provided path.
209214 *
210- * @param featurePath path to file or GenomicsDB url containing features
215+ * @param featurePath path to file or GenomicsDB url or Bundle containing features
211216 * @param name logical name for this data source (may be null)
212217 * @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
213218 * @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
@@ -224,7 +229,7 @@ public FeatureDataSource(final String featurePath, final String name, final int
224229 * Creates a FeatureDataSource backed by the provided FeatureInput. We will look ahead the specified number of bases
225230 * during queries that produce cache misses.
226231 *
227- * @param featureInput a FeatureInput specifying a source of Features
232+ * @param featureInput a FeatureInput specifying a source of Features (may be a Bundle)
228233 * @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
229234 * @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
230235 * that produce this type of Feature. May be null, which results in an unrestricted search.
@@ -241,7 +246,7 @@ public FeatureDataSource(final FeatureInput<T> featureInput, final int queryLook
241246 * Creates a FeatureDataSource backed by the provided FeatureInput. We will look ahead the specified number of bases
242247 * during queries that produce cache misses.
243248 *
244- * @param featureInput a FeatureInput specifying a source of Features
249+ * @param featureInput a FeatureInput specifying a source of Features (may be a Bundle)
245250 * @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
246251 * @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
247252 * that produce this type of Feature. May be null, which results in an unrestricted search.
@@ -259,7 +264,7 @@ public FeatureDataSource(final FeatureInput<T> featureInput, final int queryLook
259264 * Creates a FeatureDataSource backed by the provided FeatureInput. We will look ahead the specified number of bases
260265 * during queries that produce cache misses.
261266 *
262- * @param featureInput a FeatureInput specifying a source of Features
267+ * @param featureInput a FeatureInput specifying a source of Features (may be a Bundle)
263268 * @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
264269 * @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
265270 * that produce this type of Feature. May be null, which results in an unrestricted search.
@@ -278,7 +283,7 @@ public FeatureDataSource(final FeatureInput<T> featureInput, final int queryLook
278283 * Creates a FeatureDataSource backed by the provided FeatureInput. We will look ahead the specified number of bases
279284 * during queries that produce cache misses.
280285 *
281- * @param featureInput a FeatureInput specifying a source of Features
286+ * @param featureInput a FeatureInput specifying a source of Features (may be a Bundle)
282287 * @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
283288 * @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
284289 * that produce this type of Feature. May be null, which results in an unrestricted search.
@@ -296,7 +301,7 @@ public FeatureDataSource(final FeatureInput<T> featureInput, final int queryLook
296301 * Creates a FeatureDataSource backed by the provided FeatureInput. We will look ahead the specified number of bases
297302 * during queries that produce cache misses.
298303 *
299- * @param featureInput a FeatureInput specifying a source of Features
304+ * @param featureInput a FeatureInput specifying a source of Features (may be a Bundle)
300305 * @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
301306 * @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
302307 * that produce this type of Feature. May be null, which results in an unrestricted search.
@@ -369,9 +374,26 @@ private static <T extends Feature> FeatureReader<T> getFeatureReader(final Featu
369374 } catch (final ClassCastException e ) {
370375 throw new UserException ("GenomicsDB inputs can only be used to provide VariantContexts." , e );
371376 }
377+ } else if (featureInput .hasExtension (BundleJSON .BUNDLE_EXTENSION )) {
378+ // the feature input specifies a serialized json bundle file
379+ final Bundle vcfBundle = BundleJSON .toBundle (htsjdk .beta .plugin .IOUtils .getStringFromPath (featureInput ), GATKPath ::new );
380+ final IOPath vcfPath = vcfBundle .getOrThrow (BundleResourceType .CT_VARIANT_CONTEXTS ).getIOPath ().get ();
381+ // to get the codec we have to use the path of the underlying vcf resource, not the bundle path
382+ final FeatureInput <T > fi = new FeatureInput <T >(vcfPath .getRawInputString (), featureInput .getName ());
383+ final FeatureCodec <T , ?> codec = getCodecForFeatureInput (fi , targetFeatureType , setNameOnCodec );
384+ // propagate the bundle path, not the vcf path, to the reader, so that downstream code can retrieve
385+ // the index path from the bundle
386+ return getTribbleFeatureReader (featureInput , codec , cloudWrapper , cloudIndexWrapper );
387+ } else if (featureInput .getParentBundle () != null ) {
388+ // the featureInput was created from a bundle list expansion (i.e, MultiVariantWalkers). it has the
389+ // primary resource as the underlying resource path, and the containing bundle attached as the
390+ // "parent bundle". Use the original FI to get the codec, but to get the feature reader, we use
391+ // the FI that contains the bundle path, since the feature reader may require acccess to the index
392+ final FeatureCodec <T , ?> codec = getCodecForFeatureInput (featureInput , targetFeatureType , setNameOnCodec );
393+ return getTribbleFeatureReader (featureInput , codec , cloudWrapper , cloudIndexWrapper );
372394 } else {
373395 final FeatureCodec <T , ?> codec = getCodecForFeatureInput (featureInput , targetFeatureType , setNameOnCodec );
374- if ( featureInput .getFeaturePath ().toLowerCase ().endsWith (BCI_FILE_EXTENSION ) ) {
396+ if (featureInput .getFeaturePath ().toLowerCase ().endsWith (BCI_FILE_EXTENSION )) {
375397 return new Reader (featureInput , codec );
376398 }
377399 return getTribbleFeatureReader (featureInput , codec , cloudWrapper , cloudIndexWrapper );
@@ -419,18 +441,48 @@ private static <T extends Feature> FeatureReader<T> getFeatureReader(final Featu
419441 private static <T extends Feature > AbstractFeatureReader <T , ?> getTribbleFeatureReader (final FeatureInput <T > featureInput , final FeatureCodec <T , ?> codec , final Function <SeekableByteChannel , SeekableByteChannel > cloudWrapper , final Function <SeekableByteChannel , SeekableByteChannel > cloudIndexWrapper ) {
420442 Utils .nonNull (codec );
421443 try {
422- // Must get the path to the data file from the codec here:
423- final String absoluteRawPath = featureInput .getRawInputString ();
424-
425444 // Instruct the reader factory to not require an index. We will require one ourselves as soon as
426445 // a query by interval is attempted.
427446 final boolean requireIndex = false ;
428447
429- // Only apply the wrappers if the feature input is in a remote location which will benefit from prefetching.
430- if (BucketUtils .isEligibleForPrefetching (featureInput )) {
431- return AbstractFeatureReader .getFeatureReader (absoluteRawPath , null , codec , requireIndex , cloudWrapper , cloudIndexWrapper );
448+ if (featureInput .hasExtension (BundleJSON .BUNDLE_EXTENSION )) {
449+ final Bundle vcfBundle = BundleJSON .toBundle (htsjdk .beta .plugin .IOUtils .getStringFromPath (featureInput ), GATKPath ::new );
450+ final IOPath vcfPath = vcfBundle .getOrThrow (BundleResourceType .CT_VARIANT_CONTEXTS ).getIOPath ().get ();
451+ final Optional <BundleResource > vcfIndexPath = vcfBundle .get (BundleResourceType .CT_VARIANTS_INDEX );
452+ final String rawIndexResourcePath =
453+ vcfIndexPath .isPresent () ? vcfIndexPath .get ().getIOPath ().get ().getRawInputString () : null ;
454+
455+ // Only apply the wrappers if the feature input is in a remote location which will benefit from prefetching.
456+ if (BucketUtils .isEligibleForPrefetching (vcfPath )) {
457+ final String absoluteRawPath = vcfPath .getRawInputString ();
458+ return AbstractFeatureReader .getFeatureReader (absoluteRawPath , rawIndexResourcePath , codec , requireIndex , cloudWrapper , cloudIndexWrapper );
459+ } else {
460+ return AbstractFeatureReader .getFeatureReader (vcfPath .getRawInputString (), rawIndexResourcePath , codec , requireIndex , Utils .identityFunction (), Utils .identityFunction ());
461+ }
462+ } else if (featureInput .getParentBundle () != null ) {
463+ final Bundle vcfBundle = featureInput .getParentBundle ();
464+ // code path for when a user has specified multiple bundles on the command line, so there is no single
465+ // serialized bundle file to access
466+ final IOPath vcfPath = vcfBundle .getOrThrow (BundleResourceType .CT_VARIANT_CONTEXTS ).getIOPath ().get ();
467+ // Only apply the wrappers if the feature input is in a remote location which will benefit from prefetching.
468+ final Optional <BundleResource > vcfIndexPath = vcfBundle .get (BundleResourceType .CT_VARIANTS_INDEX );
469+ final String rawIndexResourcePath =
470+ vcfIndexPath .isPresent () ? vcfIndexPath .get ().getIOPath ().get ().getRawInputString () : null ;
471+ final String absoluteRawPath = vcfPath .getRawInputString ();
472+ if (BucketUtils .isEligibleForPrefetching (vcfPath )) {
473+ return AbstractFeatureReader .getFeatureReader (absoluteRawPath , rawIndexResourcePath , codec , requireIndex , cloudWrapper , cloudIndexWrapper );
474+ } else {
475+ return AbstractFeatureReader .getFeatureReader (absoluteRawPath , rawIndexResourcePath , codec , requireIndex , Utils .identityFunction (), Utils .identityFunction ());
476+ }
432477 } else {
433- return AbstractFeatureReader .getFeatureReader (absoluteRawPath , null , codec , requireIndex , Utils .identityFunction (), Utils .identityFunction ());
478+ final String absoluteRawPath = featureInput .getRawInputString ();
479+
480+ // Only apply the wrappers if the feature input is in a remote location which will benefit from prefetching.
481+ if (BucketUtils .isEligibleForPrefetching (featureInput )) {
482+ return AbstractFeatureReader .getFeatureReader (absoluteRawPath , null , codec , requireIndex , cloudWrapper , cloudIndexWrapper );
483+ } else {
484+ return AbstractFeatureReader .getFeatureReader (absoluteRawPath , null , codec , requireIndex , Utils .identityFunction (), Utils .identityFunction ());
485+ }
434486 }
435487 } catch (final TribbleException e ) {
436488 throw new GATKException ("Error initializing feature reader for path " + featureInput .getFeaturePath (), e );
0 commit comments