Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
665b970
feat!: Support compression codecs for JSON metadata and Avro
emkornfield Nov 12, 2025
091d3bc
fmt
emkornfield Nov 13, 2025
41a8c1c
fix clippy
emkornfield Nov 13, 2025
51e781e
clippy again
emkornfield Nov 13, 2025
253bf59
wip
emkornfield Nov 16, 2025
8bdb52d
address comments
emkornfield Nov 16, 2025
9d27116
no clone needed
emkornfield Nov 16, 2025
d1ee0b2
test compression works
emkornfield Nov 16, 2025
393622f
comments
emkornfield Nov 17, 2025
46fdb8e
update tests
emkornfield Nov 17, 2025
ec96917
address comments
emkornfield Nov 19, 2025
46efa9e
Remove Avro compression changes, keep only JSON metadata compression
emkornfield Nov 19, 2025
e96fe1a
address some comments
emkornfield Nov 20, 2025
53d0997
don't reference deprecated method
emkornfield Nov 20, 2025
4f033e8
don't call deprecated method
emkornfield Nov 20, 2025
2db8b89
with_next_version
emkornfield Nov 20, 2025
8b63161
refactor
emkornfield Nov 20, 2025
9308050
fix imports
emkornfield Nov 20, 2025
9bd46d6
fix compile
emkornfield Nov 20, 2025
f3276ce
fix clipp
emkornfield Nov 21, 2025
27cbc0b
remove ;
emkornfield Nov 21, 2025
bdc6bb5
Update crates/iceberg/src/spec/table_properties.rs
emkornfield Nov 21, 2025
05fdd81
address comment on .gz
emkornfield Nov 21, 2025
5d6404c
Merge branch 'feat/metadata-compression' of github.com-personal:emkor…
emkornfield Nov 21, 2025
207c0bf
add tests
emkornfield Nov 21, 2025
e83c449
fixes
emkornfield Nov 21, 2025
d07e628
Merge branch 'main' into feat/metadata-compression
Xuanwo Dec 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion crates/catalog/glue/src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,8 @@ impl Catalog for GlueCatalog {
.build()?
.metadata;
let metadata_location =
MetadataLocation::new_with_table_location(location.clone()).to_string();
MetadataLocation::new_with_properties(location.clone(), metadata.properties())
.to_string();

metadata.write_to(&self.file_io, &metadata_location).await?;

Expand Down
3 changes: 2 additions & 1 deletion crates/catalog/glue/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,9 @@ mod tests {
fn test_convert_to_glue_table() -> Result<()> {
let table_name = "my_table".to_string();
let location = "s3a://warehouse/hive".to_string();
let metadata_location = MetadataLocation::new_with_table_location(location).to_string();
let properties = HashMap::new();
let metadata_location =
MetadataLocation::new_with_properties(location, &properties).to_string();
let schema = Schema::builder()
.with_schema_id(1)
.with_fields(vec![
Expand Down
3 changes: 2 additions & 1 deletion crates/catalog/hms/src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,8 @@ impl Catalog for HmsCatalog {
.metadata;

let metadata_location =
MetadataLocation::new_with_table_location(location.clone()).to_string();
MetadataLocation::new_with_properties(location.clone(), metadata.properties())
.to_string();

metadata.write_to(&self.file_io, &metadata_location).await?;

Expand Down
4 changes: 2 additions & 2 deletions crates/catalog/hms/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,9 +343,9 @@ mod tests {
let db_name = "my_db".to_string();
let table_name = "my_table".to_string();
let location = "s3a://warehouse/hms".to_string();
let metadata_location =
MetadataLocation::new_with_table_location(location.clone()).to_string();
let properties = HashMap::new();
let metadata_location =
MetadataLocation::new_with_properties(location.clone(), &properties).to_string();
let schema = Schema::builder()
.with_schema_id(1)
.with_fields(vec![
Expand Down
10 changes: 6 additions & 4 deletions crates/catalog/s3tables/src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ impl Catalog for S3TablesCatalog {

// prepare metadata location. the warehouse location is generated by s3tables catalog,
// which looks like: s3://e6c9bf20-991a-46fb-kni5xs1q2yxi3xxdyxzjzigdeop1quse2b--table-s3
let metadata_location = match &creation.location {
let table_location = match &creation.location {
Some(_) => {
return Err(Error::new(
ErrorKind::DataInvalid,
Expand All @@ -467,16 +467,18 @@ impl Catalog for S3TablesCatalog {
.send()
.await
.map_err(from_aws_sdk_error)?;
let warehouse_location = get_resp.warehouse_location().to_string();
MetadataLocation::new_with_table_location(warehouse_location).to_string()
get_resp.warehouse_location().to_string()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a semantic change because up the update to creation.location below, need to investigate further what the correct thing to do here is.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, from the comment above (L451-452) it seems like the "s3tables catalog" generates the metadata file location and does not allow any modification

so maybe we should just use

        let metadata_location =
            MetadataLocation::new_with_properties(metadata_location, None)
                .to_string(); 

so as to not do any path modification before writing

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the original code was wrong. As best I can test warehouseLocation is actually the table location.

I've updated it to assign to table_location and then assign it below (as far as I can tell, the .location field is meant to be table location and not the metadata.json file).

}
};

// write metadata to file
creation.location = Some(metadata_location.clone());
creation.location = Some(table_location.clone());
let metadata = TableMetadataBuilder::from_table_creation(creation)?
.build()?
.metadata;
let metadata_location =
MetadataLocation::new_with_properties(table_location, metadata.properties())
.to_string();
metadata.write_to(&self.file_io, &metadata_location).await?;

// update metadata location
Expand Down
3 changes: 2 additions & 1 deletion crates/catalog/sql/src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,8 @@ impl Catalog for SqlCatalog {
.build()?
.metadata;
let tbl_metadata_location =
MetadataLocation::new_with_table_location(location.clone()).to_string();
MetadataLocation::new_with_properties(location.clone(), tbl_metadata.properties())
.to_string();

tbl_metadata
.write_to(&self.fileio, &tbl_metadata_location)
Expand Down
3 changes: 2 additions & 1 deletion crates/iceberg/src/catalog/memory/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ impl Catalog for MemoryCatalog {
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?
.build()?
.metadata;
let metadata_location = MetadataLocation::new_with_table_location(location).to_string();
let metadata_location =
MetadataLocation::new_with_properties(location, metadata.properties()).to_string();

metadata.write_to(&self.file_io, &metadata_location).await?;

Expand Down
Loading
Loading