diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs index b562249fc527..f2aca2fd5b51 100644 --- a/arrow-json/src/writer/encoder.rs +++ b/arrow-json/src/writer/encoder.rs @@ -346,6 +346,10 @@ pub fn make_encoder<'a>( let array = array.as_string_view(); NullableEncoder::new(Box::new(StringViewEncoder(array)), array.nulls().cloned()) } + DataType::BinaryView => { + let array = array.as_binary_view(); + NullableEncoder::new(Box::new(BinaryViewEncoder(array)), array.nulls().cloned()) + } DataType::List(_) => { let array = array.as_list::(); NullableEncoder::new(Box::new(ListEncoder::try_new(field, array, options)?), array.nulls().cloned()) @@ -440,6 +444,14 @@ fn encode_string(s: &str, out: &mut Vec) { serializer.serialize_str(s).unwrap(); } +fn encode_binary(bytes: &[u8], out: &mut Vec) { + out.push(b'"'); + for byte in bytes { + write!(out, "{byte:02x}").unwrap(); + } + out.push(b'"'); +} + struct FieldEncoder<'a> { field: FieldRef, encoder: NullableEncoder<'a>, @@ -609,6 +621,14 @@ impl Encoder for StringViewEncoder<'_> { } } +struct BinaryViewEncoder<'a>(&'a BinaryViewArray); + +impl Encoder for BinaryViewEncoder<'_> { + fn encode(&mut self, idx: usize, out: &mut Vec) { + encode_binary(self.0.value(idx), out); + } +} + struct ListEncoder<'a, O: OffsetSizeTrait> { offsets: OffsetBuffer, encoder: NullableEncoder<'a>, diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs index ae3b2ee78a57..6c34372ca07e 100644 --- a/arrow-json/src/writer/mod.rs +++ b/arrow-json/src/writer/mod.rs @@ -1751,17 +1751,13 @@ mod tests { Ok(()) } - fn binary_encoding_test() { - // set up schema + fn build_array_binary(values: &[Option<&[u8]>]) -> RecordBatch { let schema = SchemaRef::new(Schema::new(vec![Field::new( "bytes", GenericBinaryType::::DATA_TYPE, true, )])); - - // build record batch: let mut builder = GenericByteBuilder::>::new(); - let values = [Some(b"Ned Flanders"), None, Some(b"Troy McClure")]; for value in values { match value { Some(v) => builder.append_value(v), @@ -1769,8 +1765,27 @@ mod tests { } } let array = Arc::new(builder.finish()) as ArrayRef; - let batch = RecordBatch::try_new(schema, vec![array]).unwrap(); + RecordBatch::try_new(schema, vec![array]).unwrap() + } + + fn build_array_binary_view(values: &[Option<&[u8]>]) -> RecordBatch { + let schema = SchemaRef::new(Schema::new(vec![Field::new( + "bytes", + DataType::BinaryView, + true, + )])); + let mut builder = BinaryViewBuilder::new(); + for value in values { + match value { + Some(v) => builder.append_value(v), + None => builder.append_null(), + } + } + let array = Arc::new(builder.finish()) as ArrayRef; + RecordBatch::try_new(schema, vec![array]).unwrap() + } + fn assert_binary_json(batch: &RecordBatch) { // encode and check JSON with explicit nulls: { let mut buf = Vec::new(); @@ -1778,7 +1793,7 @@ mod tests { let mut writer = WriterBuilder::new() .with_explicit_nulls(true) .build::<_, JsonArray>(&mut buf); - writer.write(&batch).unwrap(); + writer.write(batch).unwrap(); writer.close().unwrap(); serde_json::from_slice(&buf).unwrap() }; @@ -1806,20 +1821,16 @@ mod tests { // explicit nulls are off by default, so we don't need // to set that when creating the writer: let mut writer = ArrayWriter::new(&mut buf); - writer.write(&batch).unwrap(); + writer.write(batch).unwrap(); writer.close().unwrap(); serde_json::from_slice(&buf).unwrap() }; assert_eq!( json!([ - { - "bytes": "4e656420466c616e64657273" - }, - {}, // empty because nulls are omitted - { - "bytes": "54726f79204d63436c757265" - } + { "bytes": "4e656420466c616e64657273" }, + {}, + { "bytes": "54726f79204d63436c757265" } ]), json_value ); @@ -1828,10 +1839,25 @@ mod tests { #[test] fn test_writer_binary() { + let values: [Option<&[u8]>; 3] = [ + Some(b"Ned Flanders" as &[u8]), + None, + Some(b"Troy McClure" as &[u8]), + ]; // Binary: - binary_encoding_test::(); + { + let batch = build_array_binary::(&values); + assert_binary_json(&batch); + } // LargeBinary: - binary_encoding_test::(); + { + let batch = build_array_binary::(&values); + assert_binary_json(&batch); + } + { + let batch = build_array_binary_view(&values); + assert_binary_json(&batch); + } } #[test]