Skip to content
83 changes: 83 additions & 0 deletions avro/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,89 @@ set_schemata_equality_comparator(Box::new(MyCustomSchemataEq));
If the application parses schemas before setting a comparator, the default comparator will be
registered and used!

### Deserializing Avro Byte Arrays

If using the Serde way to deserialize avro files, there are sometimes special derive statements
that need to be applied in the case of byte arrays.

```rust
use serde::{Deserialize, Serialize};

#[derive(Debug, Deserialize, Serialize)]
struct SampleStruct {
#[serde(with = "apache_avro::serde_avro_bytes")]
non_optional_bytes: Vec<u8>,
#[serde(with = "apache_avro::serde_avro_bytes_opt")]
optional_bytes: Option<Vec<u8>>,
#[serde(with = "apache_avro::serde_avro_fixed")]
non_optional_fixed: [u8; 6],
#[serde(with = "apache_avro::serde_avro_fixed_opt")]
optional_fixed: Option<[u8; 6]>,
}
```

Here is a complete example of a serde round trip of a struct with a nullable byte array:

```rust
use serde::{Deserialize, Serialize};

#[derive(Debug, Deserialize, PartialEq, Serialize)]
struct ExampleByteArray {
#[serde(with = "apache_avro::serde_avro_bytes_opt")]
data_bytes: Option<Vec<u8>>,
description: Option<String>,
}

fn serde_byte_array() {
let raw_schema = r#"
{
"type": "record",
"name": "SimpleRecord",
"fields": [
{"name": "data_bytes", "type": ["null", "bytes"], "default": null},
{"name": "description", "type": ["null", "string"], "default": null}
]
}"#;

let schema = apache_avro::Schema::parse_str(raw_schema).unwrap();

// Create vector of ExampleByteArray
let records = vec![
ExampleByteArray {
data_bytes: Some(vec![1, 2, 3, 4, 5]),
description: Some("First record".to_string()),
},
ExampleByteArray {
data_bytes: None,
description: Some("Second record".to_string()),
},
ExampleByteArray {
data_bytes: Some(vec![10, 20, 30]),
description: None,
},
];

// Serialize records to Avro binary format with the schema
let mut writer = apache_avro::Writer::new(&schema, Vec::new());
for record in &records {
writer.append_ser(record).unwrap();
}

let avro_data = writer.into_inner().unwrap();


// Deserialize Avro binary data back into ExampleByteArray structs
let reader = apache_avro::Reader::new(&avro_data[..]).unwrap();
let deserialized_records: Vec<ExampleByteArray> = reader
.map(|value| apache_avro::from_value::<ExampleByteArray>(&value.unwrap()).unwrap())
.collect();

assert_eq!(records, deserialized_records);
}
```

Full implementation and other options for things like fixed byte arrays can found in src/bytes.rs

<!-- cargo-rdme end -->

## License
Expand Down
83 changes: 83 additions & 0 deletions avro/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,89 @@
//! If the application parses schemas before setting a comparator, the default comparator will be
//! registered and used!
//!
//! ## Deserializing Avro Byte Arrays
//!
//! If using the Serde way to deserialize avro files, there are sometimes special derive statements
//! that need to be applied in the case of byte arrays.
//!
//! ```rust
//! use serde::{Deserialize, Serialize};
//!
//! #[derive(Debug, Deserialize, Serialize)]
//! struct SampleStruct {
//! #[serde(with = "apache_avro::serde_avro_bytes")]
//! non_optional_bytes: Vec<u8>,
//! #[serde(with = "apache_avro::serde_avro_bytes_opt")]
//! optional_bytes: Option<Vec<u8>>,
//! #[serde(with = "apache_avro::serde_avro_fixed")]
//! non_optional_fixed: [u8; 6],
//! #[serde(with = "apache_avro::serde_avro_fixed_opt")]
//! optional_fixed: Option<[u8; 6]>,
//! }
//! ```
//!
//! Here is a complete example of a serde round trip of a struct with a nullable byte array:
//!
//! ```rust
//! use serde::{Deserialize, Serialize};
//!
//! #[derive(Debug, Deserialize, PartialEq, Serialize)]
//! struct ExampleByteArray {
//! #[serde(with = "apache_avro::serde_avro_bytes_opt")]
//! data_bytes: Option<Vec<u8>>,
//! description: Option<String>,
//! }
//!
//! fn serde_byte_array() {
//! let raw_schema = r#"
//! {
//! "type": "record",
//! "name": "SimpleRecord",
//! "fields": [
//! {"name": "data_bytes", "type": ["null", "bytes"], "default": null},
//! {"name": "description", "type": ["null", "string"], "default": null}
//! ]
//! }"#;
//!
//! let schema = apache_avro::Schema::parse_str(raw_schema).unwrap();
//!
//! // Create vector of ExampleByteArray
//! let records = vec![
//! ExampleByteArray {
//! data_bytes: Some(vec![1, 2, 3, 4, 5]),
//! description: Some("First record".to_string()),
//! },
//! ExampleByteArray {
//! data_bytes: None,
//! description: Some("Second record".to_string()),
//! },
//! ExampleByteArray {
//! data_bytes: Some(vec![10, 20, 30]),
//! description: None,
//! },
//! ];
//!
//! // Serialize records to Avro binary format with the schema
//! let mut writer = apache_avro::Writer::new(&schema, Vec::new());
//! for record in &records {
//! writer.append_ser(record).unwrap();
//! }
//!
//! let avro_data = writer.into_inner().unwrap();
//!
//!
//! // Deserialize Avro binary data back into ExampleByteArray structs
//! let reader = apache_avro::Reader::new(&avro_data[..]).unwrap();
//! let deserialized_records: Vec<ExampleByteArray> = reader
//! .map(|value| apache_avro::from_value::<ExampleByteArray>(&value.unwrap()).unwrap())
//! .collect();
//!
//! assert_eq!(records, deserialized_records);
//! }
//! ```
//!
//! Full implementation and other options for things like fixed byte arrays can found in src/bytes.rs
//!

mod bigdecimal;
mod bytes;
Expand Down
112 changes: 112 additions & 0 deletions avro/tests/avro-rs-285-bytes_deserialization.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
use apache_avro_test_helper::TestResult;
use serde::{Deserialize, Serialize};

#[derive(Debug, Deserialize, PartialEq, Serialize)]
struct ExampleByteArray {
#[serde(with = "apache_avro::serde_avro_bytes_opt")]
data_bytes: Option<Vec<u8>>,
description: Option<String>,
}

#[derive(Deserialize, Serialize)]
struct ExampleByteArrayFiltered {
description: Option<String>,
}

#[test]
fn avro_rs_285_bytes_deserialization_round_trip() -> TestResult {
// define schema
let raw_schema = r#"
{
"type": "record",
"name": "SimpleRecord",
"fields": [
{"name": "data_bytes", "type": ["null", "bytes"], "default": null},
{"name": "description", "type": ["null", "string"], "default": null}
]
}
"#;

let schema = apache_avro::Schema::parse_str(raw_schema)?;

let records = vec![
ExampleByteArray {
data_bytes: Some(vec![1, 2, 3, 4, 5]),
description: Some("First record".to_string()),
},
ExampleByteArray {
data_bytes: None,
description: Some("Second record".to_string()),
},
ExampleByteArray {
data_bytes: Some(vec![10, 20, 30]),
description: None,
},
];

// serialize records to Avro binary format with schema
let mut writer = apache_avro::Writer::new(&schema, Vec::new());
for record in &records {
writer.append_ser(record)?;
}

let avro_data = writer.into_inner()?;

// deserialize Avro binary data back into ExampleByteArray structs
let reader = apache_avro::Reader::new(&avro_data[..])?;
let deserialized_records: Vec<ExampleByteArray> = reader
.map(|value| apache_avro::from_value::<ExampleByteArray>(&value.unwrap()).unwrap())
.collect();

assert_eq!(records, deserialized_records);
Ok(())
}

#[test]
fn avro_rs_285_bytes_deserialization_filtered_round_trip() -> TestResult {
let raw_schema = r#"
{
"type": "record",
"name": "SimpleRecord",
"fields": [
{"name": "data_bytes", "type": ["null", "bytes"], "default": null},
{"name": "description", "type": ["null", "string"], "default": null}
]
}
"#;

let schema = apache_avro::Schema::parse_str(raw_schema)?;

let records = vec![
ExampleByteArray {
data_bytes: Some(vec![1, 2, 3, 4, 5]),
description: Some("First record".to_string()),
},
ExampleByteArray {
data_bytes: None,
description: Some("Second record".to_string()),
},
ExampleByteArray {
data_bytes: Some(vec![10, 20, 30]),
description: None,
},
];

// serialize records to Avro binary format with schema
let mut writer = apache_avro::Writer::new(&schema, Vec::new());
for record in &records {
writer.append_ser(record)?;
}

let avro_data = writer.into_inner()?;

// deserialize Avro binary data back into ExampleByteArrayFiltered structs
let reader = apache_avro::Reader::new(&avro_data[..])?;
let deserialized_records: Vec<ExampleByteArrayFiltered> = reader
.map(|value| apache_avro::from_value::<ExampleByteArrayFiltered>(&value.unwrap()).unwrap())
.collect();

assert_eq!(records.len(), deserialized_records.len());

Ok(())
}