Skip to content

Commit 03af3dd

Browse files
committed
Implement and add python bindings
1 parent 076b8cf commit 03af3dd

File tree

6 files changed

+124
-6
lines changed

6 files changed

+124
-6
lines changed

icechunk-python/python/icechunk/_icechunk_python.pyi

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,6 +1499,10 @@ class PyRepository:
14991499
async def inspect_snapshot_async(
15001500
self, snapshot_id: str, *, pretty: bool = True
15011501
) -> str: ...
1502+
def inspect_manifest(self, manifest_id: str, *, pretty: bool = True) -> str: ...
1503+
async def inspect_manifest_async(
1504+
self, manifest_id: str, *, pretty: bool = True
1505+
) -> str: ...
15021506

15031507
class PySession:
15041508
@classmethod

icechunk-python/python/icechunk/repository.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,3 +1397,11 @@ async def inspect_snapshot_async(
13971397
self, snapshot_id: str, *, pretty: bool = True
13981398
) -> str:
13991399
return await self._repository.inspect_snapshot_async(snapshot_id, pretty=pretty)
1400+
1401+
def inspect_manifest(self, manifest_id: str, *, pretty: bool = True) -> str:
1402+
return self._repository.inspect_manifest(manifest_id, pretty=pretty)
1403+
1404+
async def inspect_manifest_async(
1405+
self, manifest_id: str, *, pretty: bool = True
1406+
) -> str:
1407+
return await self._repository.inspect_manifest_async(manifest_id, pretty=pretty)

icechunk-python/src/repository.rs

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ use icechunk::{
1313
Repository,
1414
config::Credentials,
1515
format::{
16-
SnapshotId,
16+
ManifestId, SnapshotId,
1717
snapshot::{ManifestFileInfo, SnapshotInfo, SnapshotProperties},
1818
transaction_log::Diff,
1919
},
20-
inspect::snapshot_json,
20+
inspect::{manifest_json, snapshot_json},
2121
ops::{
2222
gc::{ExpiredRefAction, GCConfig, GCSummary, expire, garbage_collect},
2323
manifests::rewrite_manifests,
@@ -1660,6 +1660,42 @@ impl PyRepository {
16601660
Ok(res)
16611661
})
16621662
}
1663+
1664+
#[pyo3(signature = (manifest_id, *, pretty = true))]
1665+
fn inspect_manifest(&self, manifest_id: String, pretty: bool) -> PyResult<String> {
1666+
let result = pyo3_async_runtimes::tokio::get_runtime()
1667+
.block_on(async move {
1668+
let lock = self.0.read().await;
1669+
let manifest = ManifestId::try_from(manifest_id.as_str())
1670+
.map_err(|e| RepositoryErrorKind::Other(e.to_string()))?;
1671+
let res = manifest_json(lock.asset_manager(), &manifest, pretty).await?;
1672+
Ok(res)
1673+
})
1674+
.map_err(PyIcechunkStoreError::RepositoryError)?;
1675+
Ok(result)
1676+
}
1677+
1678+
#[pyo3(signature = (manifest_id, *, pretty = true))]
1679+
fn inspect_manifest_async<'py>(
1680+
&self,
1681+
py: Python<'py>,
1682+
manifest_id: String,
1683+
pretty: bool,
1684+
) -> PyResult<Bound<'py, PyAny>> {
1685+
let repository = self.0.clone();
1686+
pyo3_async_runtimes::tokio::future_into_py(py, async move {
1687+
let lock = repository.read().await;
1688+
let manifest = ManifestId::try_from(manifest_id.as_str())
1689+
.map_err(|e| {
1690+
RepositoryError::from(RepositoryErrorKind::Other(e.to_string()))
1691+
})
1692+
.map_err(PyIcechunkStoreError::RepositoryError)?;
1693+
let res = manifest_json(lock.asset_manager(), &manifest, pretty)
1694+
.await
1695+
.map_err(PyIcechunkStoreError::RepositoryError)?;
1696+
Ok(res)
1697+
})
1698+
}
16631699
}
16641700

16651701
fn map_credentials(

icechunk-python/tests/test_inspect.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,39 @@ async def test_inspect_snapshot_async() -> None:
3333
assert pretty["id"] == snap
3434
assert pretty_str != non_pretty_str
3535
assert pretty == non_pretty
36+
37+
38+
def test_inspect_manifest() -> None:
39+
repo = ic.Repository.open(
40+
storage=ic.local_filesystem_storage("./tests/data/test-repo")
41+
)
42+
snap = next(repo.ancestry(branch="main")).id
43+
snap_info = json.loads(repo.inspect_snapshot(snap, pretty=True))
44+
man_id = snap_info["manifests"][0]["id"]
45+
pretty_str = repo.inspect_manifest(man_id, pretty=True)
46+
non_pretty_str = repo.inspect_manifest(man_id, pretty=False)
47+
48+
pretty = json.loads(pretty_str)
49+
non_pretty = json.loads(non_pretty_str)
50+
51+
assert pretty["id"] == man_id
52+
assert pretty_str != non_pretty_str
53+
assert pretty == non_pretty
54+
55+
56+
async def test_inspect_manifest_async() -> None:
57+
repo = ic.Repository.open(
58+
storage=ic.local_filesystem_storage("./tests/data/test-repo")
59+
)
60+
snap = next(repo.ancestry(branch="main")).id
61+
snap_info = json.loads(await repo.inspect_snapshot_async(snap, pretty=True))
62+
man_id = snap_info["manifests"][0]["id"]
63+
pretty_str = await repo.inspect_manifest_async(man_id, pretty=True)
64+
non_pretty_str = await repo.inspect_manifest_async(man_id, pretty=False)
65+
66+
pretty = json.loads(pretty_str)
67+
non_pretty = json.loads(non_pretty_str)
68+
69+
assert pretty["id"] == man_id
70+
assert pretty_str != non_pretty_str
71+
assert pretty == non_pretty

icechunk/src/format/manifest.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,15 @@ impl Manifest {
470470
array_manifest.refs().iter().map(|r| ref_to_payload(r))
471471
})
472472
}
473+
474+
pub fn nodes(&self) -> impl Iterator<Item = NodeId> + '_ {
475+
self.root().arrays().iter().map(|array_manifest| array_manifest.node_id().into())
476+
}
477+
478+
pub fn node_refs(&self, node: &NodeId) -> Option<usize> {
479+
let manifest = self.root();
480+
lookup_node(manifest, node).map(|am| am.refs().len())
481+
}
473482
}
474483

475484
fn lookup_node<'a>(

icechunk/src/inspect.rs

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ use serde::{Deserialize, Serialize};
55
use crate::{
66
asset_manager::AssetManager,
77
format::{
8-
ManifestId, SnapshotId,
9-
manifest::ManifestRef,
8+
IcechunkFormatError, ManifestId, SnapshotId,
9+
manifest::{Manifest, ManifestRef},
1010
snapshot::{
1111
ManifestFileInfo, NodeData, NodeSnapshot, NodeType, SnapshotProperties,
1212
},
@@ -94,20 +94,45 @@ struct SnapshotInfoInspect {
9494
#[derive(Debug, Serialize, Deserialize)]
9595
struct ArrayManifestInspect {
9696
node_id: String,
97-
num_chunks: u64,
97+
num_chunk_refs: u64,
9898
}
9999

100100
#[derive(Debug, Serialize, Deserialize)]
101101
struct ManifestInfoInspect {
102102
id: String,
103+
size_bytes: u64,
104+
total_chunk_refs: u64,
103105
arrays: Vec<ArrayManifestInspect>,
104106
}
105107

108+
impl TryFrom<&Manifest> for ManifestInfoInspect {
109+
type Error = IcechunkFormatError;
110+
111+
fn try_from(value: &Manifest) -> Result<Self, Self::Error> {
112+
let arrays = value
113+
.nodes()
114+
.filter_map(|node_id| {
115+
value.node_refs(&node_id).map(|num_chunk_refs| ArrayManifestInspect {
116+
node_id: node_id.to_string(),
117+
num_chunk_refs: num_chunk_refs as u64,
118+
})
119+
})
120+
.collect();
121+
Ok(Self {
122+
id: value.id().to_string(),
123+
size_bytes: value.bytes().len() as u64,
124+
total_chunk_refs: value.len() as u64,
125+
arrays,
126+
})
127+
}
128+
}
129+
106130
async fn inspect_manifest(
107131
asset_manager: &AssetManager,
108132
id: &ManifestId,
109133
) -> RepositoryResult<ManifestInfoInspect> {
110-
todo!()
134+
let manifest = asset_manager.fetch_manifest_unknown_size(id).await?;
135+
Ok(manifest.as_ref().try_into()?)
111136
}
112137

113138
pub async fn manifest_json(

0 commit comments

Comments
 (0)