Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions icechunk-python/python/icechunk/_icechunk_python.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1499,6 +1499,10 @@ class PyRepository:
async def inspect_snapshot_async(
self, snapshot_id: str, *, pretty: bool = True
) -> str: ...
def inspect_manifest(self, manifest_id: str, *, pretty: bool = True) -> str: ...
async def inspect_manifest_async(
self, manifest_id: str, *, pretty: bool = True
) -> str: ...

class PySession:
@classmethod
Expand Down
8 changes: 8 additions & 0 deletions icechunk-python/python/icechunk/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -1397,3 +1397,11 @@ async def inspect_snapshot_async(
self, snapshot_id: str, *, pretty: bool = True
) -> str:
return await self._repository.inspect_snapshot_async(snapshot_id, pretty=pretty)

def inspect_manifest(self, manifest_id: str, *, pretty: bool = True) -> str:
return self._repository.inspect_manifest(manifest_id, pretty=pretty)

async def inspect_manifest_async(
self, manifest_id: str, *, pretty: bool = True
) -> str:
return await self._repository.inspect_manifest_async(manifest_id, pretty=pretty)
40 changes: 38 additions & 2 deletions icechunk-python/src/repository.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ use icechunk::{
Repository,
config::Credentials,
format::{
SnapshotId,
ManifestId, SnapshotId,
snapshot::{ManifestFileInfo, SnapshotInfo, SnapshotProperties},
transaction_log::Diff,
},
inspect::snapshot_json,
inspect::{manifest_json, snapshot_json},
ops::{
gc::{ExpiredRefAction, GCConfig, GCSummary, expire, garbage_collect},
manifests::rewrite_manifests,
Expand Down Expand Up @@ -1660,6 +1660,42 @@ impl PyRepository {
Ok(res)
})
}

#[pyo3(signature = (manifest_id, *, pretty = true))]
fn inspect_manifest(&self, manifest_id: String, pretty: bool) -> PyResult<String> {
let result = pyo3_async_runtimes::tokio::get_runtime()
.block_on(async move {
let lock = self.0.read().await;
let manifest = ManifestId::try_from(manifest_id.as_str())
.map_err(|e| RepositoryErrorKind::Other(e.to_string()))?;
let res = manifest_json(lock.asset_manager(), &manifest, pretty).await?;
Ok(res)
})
.map_err(PyIcechunkStoreError::RepositoryError)?;
Ok(result)
}

#[pyo3(signature = (manifest_id, *, pretty = true))]
fn inspect_manifest_async<'py>(
&self,
py: Python<'py>,
manifest_id: String,
pretty: bool,
) -> PyResult<Bound<'py, PyAny>> {
let repository = self.0.clone();
pyo3_async_runtimes::tokio::future_into_py(py, async move {
let lock = repository.read().await;
let manifest = ManifestId::try_from(manifest_id.as_str())
.map_err(|e| {
RepositoryError::from(RepositoryErrorKind::Other(e.to_string()))
})
.map_err(PyIcechunkStoreError::RepositoryError)?;
let res = manifest_json(lock.asset_manager(), &manifest, pretty)
.await
.map_err(PyIcechunkStoreError::RepositoryError)?;
Ok(res)
})
}
}

fn map_credentials(
Expand Down
36 changes: 36 additions & 0 deletions icechunk-python/tests/test_inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,39 @@ async def test_inspect_snapshot_async() -> None:
assert pretty["id"] == snap
assert pretty_str != non_pretty_str
assert pretty == non_pretty


def test_inspect_manifest() -> None:
repo = ic.Repository.open(
storage=ic.local_filesystem_storage("./tests/data/test-repo")
)
snap = next(repo.ancestry(branch="main")).id
snap_info = json.loads(repo.inspect_snapshot(snap, pretty=True))
man_id = snap_info["manifests"][0]["id"]
pretty_str = repo.inspect_manifest(man_id, pretty=True)
non_pretty_str = repo.inspect_manifest(man_id, pretty=False)

pretty = json.loads(pretty_str)
non_pretty = json.loads(non_pretty_str)

assert pretty["id"] == man_id
assert pretty_str != non_pretty_str
assert pretty == non_pretty


async def test_inspect_manifest_async() -> None:
repo = ic.Repository.open(
storage=ic.local_filesystem_storage("./tests/data/test-repo")
)
snap = next(repo.ancestry(branch="main")).id
snap_info = json.loads(await repo.inspect_snapshot_async(snap, pretty=True))
man_id = snap_info["manifests"][0]["id"]
pretty_str = await repo.inspect_manifest_async(man_id, pretty=True)
non_pretty_str = await repo.inspect_manifest_async(man_id, pretty=False)

pretty = json.loads(pretty_str)
non_pretty = json.loads(non_pretty_str)

assert pretty["id"] == man_id
assert pretty_str != non_pretty_str
assert pretty == non_pretty
9 changes: 9 additions & 0 deletions icechunk/src/format/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,15 @@ impl Manifest {
array_manifest.refs().iter().map(|r| ref_to_payload(r))
})
}

pub fn nodes(&self) -> impl Iterator<Item = NodeId> + '_ {
self.root().arrays().iter().map(|array_manifest| array_manifest.node_id().into())
}

pub fn node_refs(&self, node: &NodeId) -> Option<usize> {
let manifest = self.root();
lookup_node(manifest, node).map(|am| am.refs().len())
}
}

fn lookup_node<'a>(
Expand Down
89 changes: 87 additions & 2 deletions icechunk/src/inspect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use serde::{Deserialize, Serialize};
use crate::{
asset_manager::AssetManager,
format::{
SnapshotId,
manifest::ManifestRef,
IcechunkFormatError, ManifestId, SnapshotId,
manifest::{Manifest, ManifestRef},
snapshot::{
ManifestFileInfo, NodeData, NodeSnapshot, NodeType, SnapshotProperties,
},
Expand Down Expand Up @@ -91,6 +91,65 @@ struct SnapshotInfoInspect {
nodes: Vec<NodeSnapshotInspect>,
}

#[derive(Debug, Serialize, Deserialize)]
struct ArrayManifestInspect {
node_id: String,
num_chunk_refs: u64,
}

#[derive(Debug, Serialize, Deserialize)]
struct ManifestInfoInspect {
id: String,
size_bytes: u64,
total_chunk_refs: u64,
arrays: Vec<ArrayManifestInspect>,
}

impl TryFrom<&Manifest> for ManifestInfoInspect {
type Error = IcechunkFormatError;

fn try_from(value: &Manifest) -> Result<Self, Self::Error> {
let arrays = value
.nodes()
.filter_map(|node_id| {
value.node_refs(&node_id).map(|num_chunk_refs| ArrayManifestInspect {
node_id: node_id.to_string(),
num_chunk_refs: num_chunk_refs as u64,
})
})
.collect();
Ok(Self {
id: value.id().to_string(),
size_bytes: value.bytes().len() as u64,
total_chunk_refs: value.len() as u64,
arrays,
})
}
}

async fn inspect_manifest(
asset_manager: &AssetManager,
id: &ManifestId,
) -> RepositoryResult<ManifestInfoInspect> {
let manifest = asset_manager.fetch_manifest_unknown_size(id).await?;
Ok(manifest.as_ref().try_into()?)
}

pub async fn manifest_json(
asset_manager: &AssetManager,
id: &ManifestId,
pretty: bool,
) -> RepositoryResult<String> {
let info = inspect_manifest(asset_manager, id).await?;
let res = if pretty {
serde_json::to_string_pretty(&info)
} else {
serde_json::to_string(&info)
}
.map_err(|e| RepositoryErrorKind::Other(e.to_string()))?;
Ok(res)
}

async fn inspect_snapshot(
asset_manager: &AssetManager,
id: &SnapshotId,
Expand Down Expand Up @@ -156,4 +215,30 @@ mod tests {

Ok(())
}

#[icechunk_macros::tokio_test]
async fn test_print_manifest() -> Result<(), Box<dyn std::error::Error>> {
let st = Arc::new(
ObjectStorage::new_local_filesystem(&PathBuf::from(
"../icechunk-python/tests/data/split-repo",
))
.await?,
);
let repo = Repository::open(None, st, Default::default()).await?;
let snap = repo
.ancestry(&VersionInfo::BranchTipRef("main".to_string()))
.await?
.boxed()
.try_next()
.await?
.unwrap();

let manifest_id = &snap.manifests[0].id;

let json = manifest_json(repo.asset_manager(), manifest_id, true).await?;
let info: ManifestInfoInspect = serde_json::from_str(json.as_str())?;
assert!(info.id == manifest_id.to_string());

Ok(())
}
}
Loading