@@ -199,14 +199,15 @@ cdef class _GenomicsDB:
199
199
flatten_intervals = False ,
200
200
json_output = None ,
201
201
arrow_output = None ,
202
- # batching only used with arrow_output
203
- batching = False ):
202
+ # batching/compress only used with arrow_output
203
+ batching = False ,
204
+ compress = None ):
204
205
""" Query for variant calls from the GenomicsDB workspace using array, column_ranges and row_ranges for subsetting """
205
206
206
207
if json_output is not None :
207
208
return self .query_variant_calls_json(array, column_ranges, row_ranges, query_protobuf, json_output);
208
209
elif arrow_output is not None :
209
- return self .query_variant_calls_arrow(array, column_ranges, row_ranges, query_protobuf, batching);
210
+ return self .query_variant_calls_arrow(array, column_ranges, row_ranges, query_protobuf, batching, compress );
210
211
elif flatten_intervals is True :
211
212
return self .query_variant_calls_columnar(array, column_ranges, row_ranges, query_protobuf)
212
213
else :
@@ -318,7 +319,8 @@ cdef class _GenomicsDB:
318
319
column_ranges = None ,
319
320
row_ranges = None ,
320
321
query_protobuf: query_pb.QueryConfiguration = None ,
321
- batching = False ):
322
+ batching = False ,
323
+ compress = None ):
322
324
""" Query for variant calls from the GenomicsDB workspace using array, column_ranges and row_ranges for subsetting """
323
325
324
326
cdef ArrowVariantCallProcessor processor
@@ -366,19 +368,24 @@ cdef class _GenomicsDB:
366
368
schema_capsule = pycapsule_get_arrow_schema(arrow_schema)
367
369
schema_obj = _ArrowSchemaWrapper._import_from_c_capsule(schema_capsule)
368
370
schema = pa.schema(schema_obj.children_schema)
369
- yield schema.serialize().to_pybytes()
370
371
else :
371
372
raise GenomicsDBException(" Failed to retrieve arrow schema for query_variant_calls()" )
372
373
373
374
cdef void * arrow_array = NULL
375
+ w_opts = pa.ipc.IpcWriteOptions(allow_64bit = True , compression = compress)
374
376
while True :
375
377
try :
376
378
arrow_array = processor.arrow_array()
377
379
if arrow_array:
378
380
array_capsule = pycapsule_get_arrow_array(arrow_array)
379
381
array_obj = _ArrowArrayWrapper._import_from_c_capsule(schema_capsule, array_capsule)
380
382
arrays = [pa.array(array_obj.child(i)) for i in range (array_obj.n_children)]
381
- yield pa.RecordBatch.from_arrays(arrays, schema = schema).serialize().to_pybytes()
383
+ batch = pa.RecordBatch.from_arrays(arrays, schema = schema)
384
+ sink = pa.BufferOutputStream()
385
+ writer = pa.RecordBatchStreamWriter(sink, schema, options = w_opts)
386
+ writer.write_batch(batch)
387
+ writer.close()
388
+ yield sink.getvalue().to_pybytes()
382
389
else :
383
390
break
384
391
except Exception as e:
0 commit comments