diff --git a/avro/README.md b/avro/README.md index 51bc88ae..0a850f8d 100644 --- a/avro/README.md +++ b/avro/README.md @@ -746,6 +746,89 @@ set_schemata_equality_comparator(Box::new(MyCustomSchemataEq)); If the application parses schemas before setting a comparator, the default comparator will be registered and used! +### Deserializing Avro Byte Arrays + +If using the Serde way to deserialize avro files, there are sometimes special derive statements +that need to be applied in the case of byte arrays. + +```rust + use serde::{Deserialize, Serialize}; + + #[derive(Debug, Deserialize, Serialize)] + struct SampleStruct { + #[serde(with = "apache_avro::serde_avro_bytes")] + non_optional_bytes: Vec, + #[serde(with = "apache_avro::serde_avro_bytes_opt")] + optional_bytes: Option>, + #[serde(with = "apache_avro::serde_avro_fixed")] + non_optional_fixed: [u8; 6], + #[serde(with = "apache_avro::serde_avro_fixed_opt")] + optional_fixed: Option<[u8; 6]>, + } +``` + +Here is a complete example of a serde round trip of a struct with a nullable byte array: + +```rust +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Deserialize, PartialEq, Serialize)] +struct ExampleByteArray { + #[serde(with = "apache_avro::serde_avro_bytes_opt")] + data_bytes: Option>, + description: Option, +} + +fn serde_byte_array() { + let raw_schema = r#" + { + "type": "record", + "name": "SimpleRecord", + "fields": [ + {"name": "data_bytes", "type": ["null", "bytes"], "default": null}, + {"name": "description", "type": ["null", "string"], "default": null} + ] + }"#; + + let schema = apache_avro::Schema::parse_str(raw_schema).unwrap(); + + // Create vector of ExampleByteArray + let records = vec![ + ExampleByteArray { + data_bytes: Some(vec![1, 2, 3, 4, 5]), + description: Some("First record".to_string()), + }, + ExampleByteArray { + data_bytes: None, + description: Some("Second record".to_string()), + }, + ExampleByteArray { + data_bytes: Some(vec![10, 20, 30]), + description: None, + }, + ]; + + // Serialize records to Avro binary format with the schema + let mut writer = apache_avro::Writer::new(&schema, Vec::new()); + for record in &records { + writer.append_ser(record).unwrap(); + } + + let avro_data = writer.into_inner().unwrap(); + + + // Deserialize Avro binary data back into ExampleByteArray structs + let reader = apache_avro::Reader::new(&avro_data[..]).unwrap(); + let deserialized_records: Vec = reader + .map(|value| apache_avro::from_value::(&value.unwrap()).unwrap()) + .collect(); + + assert_eq!(records, deserialized_records); +} +``` + +Full implementation and other options for things like fixed byte arrays can found in src/bytes.rs + ## License diff --git a/avro/src/lib.rs b/avro/src/lib.rs index 6ec1a337..63cab49e 100644 --- a/avro/src/lib.rs +++ b/avro/src/lib.rs @@ -858,6 +858,89 @@ //! If the application parses schemas before setting a comparator, the default comparator will be //! registered and used! //! +//! ## Deserializing Avro Byte Arrays +//! +//! If using the Serde way to deserialize avro files, there are sometimes special derive statements +//! that need to be applied in the case of byte arrays. +//! +//! ```rust +//! use serde::{Deserialize, Serialize}; +//! +//! #[derive(Debug, Deserialize, Serialize)] +//! struct SampleStruct { +//! #[serde(with = "apache_avro::serde_avro_bytes")] +//! non_optional_bytes: Vec, +//! #[serde(with = "apache_avro::serde_avro_bytes_opt")] +//! optional_bytes: Option>, +//! #[serde(with = "apache_avro::serde_avro_fixed")] +//! non_optional_fixed: [u8; 6], +//! #[serde(with = "apache_avro::serde_avro_fixed_opt")] +//! optional_fixed: Option<[u8; 6]>, +//! } +//! ``` +//! +//! Here is a complete example of a serde round trip of a struct with a nullable byte array: +//! +//! ```rust +//! use serde::{Deserialize, Serialize}; +//! +//! #[derive(Debug, Deserialize, PartialEq, Serialize)] +//! struct ExampleByteArray { +//! #[serde(with = "apache_avro::serde_avro_bytes_opt")] +//! data_bytes: Option>, +//! description: Option, +//! } +//! +//! fn serde_byte_array() { +//! let raw_schema = r#" +//! { +//! "type": "record", +//! "name": "SimpleRecord", +//! "fields": [ +//! {"name": "data_bytes", "type": ["null", "bytes"], "default": null}, +//! {"name": "description", "type": ["null", "string"], "default": null} +//! ] +//! }"#; +//! +//! let schema = apache_avro::Schema::parse_str(raw_schema).unwrap(); +//! +//! // Create vector of ExampleByteArray +//! let records = vec![ +//! ExampleByteArray { +//! data_bytes: Some(vec![1, 2, 3, 4, 5]), +//! description: Some("First record".to_string()), +//! }, +//! ExampleByteArray { +//! data_bytes: None, +//! description: Some("Second record".to_string()), +//! }, +//! ExampleByteArray { +//! data_bytes: Some(vec![10, 20, 30]), +//! description: None, +//! }, +//! ]; +//! +//! // Serialize records to Avro binary format with the schema +//! let mut writer = apache_avro::Writer::new(&schema, Vec::new()); +//! for record in &records { +//! writer.append_ser(record).unwrap(); +//! } +//! +//! let avro_data = writer.into_inner().unwrap(); +//! +//! +//! // Deserialize Avro binary data back into ExampleByteArray structs +//! let reader = apache_avro::Reader::new(&avro_data[..]).unwrap(); +//! let deserialized_records: Vec = reader +//! .map(|value| apache_avro::from_value::(&value.unwrap()).unwrap()) +//! .collect(); +//! +//! assert_eq!(records, deserialized_records); +//! } +//! ``` +//! +//! Full implementation and other options for things like fixed byte arrays can found in src/bytes.rs +//! mod bigdecimal; mod bytes; diff --git a/avro/tests/avro-rs-285-bytes_deserialization.rs b/avro/tests/avro-rs-285-bytes_deserialization.rs new file mode 100644 index 00000000..b2e97bf0 --- /dev/null +++ b/avro/tests/avro-rs-285-bytes_deserialization.rs @@ -0,0 +1,112 @@ +use apache_avro_test_helper::TestResult; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Deserialize, PartialEq, Serialize)] +struct ExampleByteArray { + #[serde(with = "apache_avro::serde_avro_bytes_opt")] + data_bytes: Option>, + description: Option, +} + +#[derive(Deserialize, Serialize)] +struct ExampleByteArrayFiltered { + description: Option, +} + +#[test] +fn avro_rs_285_bytes_deserialization_round_trip() -> TestResult { + // define schema + let raw_schema = r#" + { + "type": "record", + "name": "SimpleRecord", + "fields": [ + {"name": "data_bytes", "type": ["null", "bytes"], "default": null}, + {"name": "description", "type": ["null", "string"], "default": null} + ] + } + "#; + + let schema = apache_avro::Schema::parse_str(raw_schema)?; + + let records = vec![ + ExampleByteArray { + data_bytes: Some(vec![1, 2, 3, 4, 5]), + description: Some("First record".to_string()), + }, + ExampleByteArray { + data_bytes: None, + description: Some("Second record".to_string()), + }, + ExampleByteArray { + data_bytes: Some(vec![10, 20, 30]), + description: None, + }, + ]; + + // serialize records to Avro binary format with schema + let mut writer = apache_avro::Writer::new(&schema, Vec::new()); + for record in &records { + writer.append_ser(record)?; + } + + let avro_data = writer.into_inner()?; + + // deserialize Avro binary data back into ExampleByteArray structs + let reader = apache_avro::Reader::new(&avro_data[..])?; + let deserialized_records: Vec = reader + .map(|value| apache_avro::from_value::(&value.unwrap()).unwrap()) + .collect(); + + assert_eq!(records, deserialized_records); + Ok(()) +} + +#[test] +fn avro_rs_285_bytes_deserialization_filtered_round_trip() -> TestResult { + let raw_schema = r#" + { + "type": "record", + "name": "SimpleRecord", + "fields": [ + {"name": "data_bytes", "type": ["null", "bytes"], "default": null}, + {"name": "description", "type": ["null", "string"], "default": null} + ] + } + "#; + + let schema = apache_avro::Schema::parse_str(raw_schema)?; + + let records = vec![ + ExampleByteArray { + data_bytes: Some(vec![1, 2, 3, 4, 5]), + description: Some("First record".to_string()), + }, + ExampleByteArray { + data_bytes: None, + description: Some("Second record".to_string()), + }, + ExampleByteArray { + data_bytes: Some(vec![10, 20, 30]), + description: None, + }, + ]; + + // serialize records to Avro binary format with schema + let mut writer = apache_avro::Writer::new(&schema, Vec::new()); + for record in &records { + writer.append_ser(record)?; + } + + let avro_data = writer.into_inner()?; + + // deserialize Avro binary data back into ExampleByteArrayFiltered structs + let reader = apache_avro::Reader::new(&avro_data[..])?; + let deserialized_records: Vec = reader + .map(|value| apache_avro::from_value::(&value.unwrap()).unwrap()) + .collect(); + + assert_eq!(records.len(), deserialized_records.len()); + + Ok(()) +}