diff --git a/README.md b/README.md index e8de573..f0012e0 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,10 @@ remapped = crackle.refit(binary) # renumber array and change dtype to smallest possible remapped = crackle.renumber(binary, start=0) +is_equal = crackle.array_equal(binary1, binary2) +arr == arr2 # syntactic sugar for CrackleArrays +arr.array_equal(arr2) # method call for CrackleArrays + # for working with files # if .gz is appended to the filename, the file will be # automatically gzipped (or ungzipped) @@ -162,7 +166,7 @@ python setup.py develop | Labels | header.num_label_bytes | Can be either "flat" labels or "pins". Describes how to color connected components. | | Crack Codes | Variable length. | Instructions for drawing crack boundaries. | | Labels crc32c | (v1 only) 4(le) | v0: n/a, v1: crc32c of the labels binary. | -| Labels crc32c | (v1 only) header.sz * 4(le) | v0: n/a, v1: crc32c of the uncompressed uint32_t fortran order connected component labeling of each z-slice. | +| Crack crc32c | (v1 only) header.sz * 4(le) | v0: n/a, v1: crc32c of the uncompressed uint32_t fortran order connected component labeling of each z-slice. | ### A Note on CRCs diff --git a/automated_test.py b/automated_test.py index 2125959..b1eaf7f 100644 --- a/automated_test.py +++ b/automated_test.py @@ -960,8 +960,43 @@ def test_contacts(): assert np.all(gt_contacts == crackle_contacts) +def test_array_equal_simple(): + binary = crackle.compress(np.zeros([100,100,100])) + binary2 = crackle.compress(np.zeros([23,1,33])) + assert not crackle.array_equal(binary, binary2) + binary = crackle.compress(np.zeros([23,1,33])) + binary2 = crackle.compress(np.zeros([23,1,33])) + assert crackle.array_equal(binary, binary2) + arr = np.random.randint(0,500, size=[100,103,101]).astype(np.uint16) + arr2 = np.random.randint(0,500, size=[100,103,101]).astype(np.uint16) + + binary = crackle.compress(arr) + binary2 = crackle.compress(arr2) + + assert np.all(arr == arr2) == crackle.array_equal(binary, binary2) + +def test_array_equal_real_data(): + labels = compresso.load("connectomics.npy.cpso.gz") + binary = crackle.compress(labels, markov_model_order=0) + markov_binary = crackle.codec.reencode(binary, markov_model_order=5) + + assert crackle.array_equal(binary, binary) + assert crackle.array_equal(binary, markov_binary) + + labels2 = labels.copy() + binary2 = crackle.compress(labels2) + + assert crackle.array_equal(binary, binary2) + + labels2[100,100,100] = labels2[400,400,400] + binary2 = crackle.compress(labels2) + + assert not crackle.array_equal(binary, binary2) + + + diff --git a/crackle/__init__.py b/crackle/__init__.py index e4e8dbe..5841ea8 100644 --- a/crackle/__init__.py +++ b/crackle/__init__.py @@ -57,6 +57,7 @@ mask, mask_except, voxel_connectivity_graph, contacts, + array_equal, ) from .headers import FormatError, CrackleHeader from .util import save, load, aload, bload, save_numpy diff --git a/crackle/array.py b/crackle/array.py index 0724146..96cf2f1 100644 --- a/crackle/array.py +++ b/crackle/array.py @@ -19,6 +19,7 @@ connected_components, voxel_connectivity_graph, contacts, + array_equal, ) from . import codec, operations from .lib import crc32c @@ -195,11 +196,14 @@ def mask_except(self, labels:list[int], value:int = 0, in_place:bool = False) -> ) return CrackleArray(binary) + def array_equal(self, other:"CrackleArray") -> bool: + return array_equal(self.binary, other.binary) + def __eq__(self, other:Union[int, Any]) -> bool: if isinstance(other, int): return self.min() == other and self.max() == other elif isinstance(other, CrackleArray): - return self.binary == other.binary + return self.array_equal(other) else: raise TypeError(f"Type {type(other)} is not supported.") diff --git a/crackle/codec.py b/crackle/codec.py index 8b383ce..d49c9ea 100644 --- a/crackle/codec.py +++ b/crackle/codec.py @@ -128,7 +128,7 @@ def labels_crc(binary:bytes) -> Optional[int]: crcl = head.sz * 4 + 4 return int.from_bytes(binary[-crcl:-crcl+4], 'little') -def crack_crcs(binary:bytes) -> Optional[int]: +def crack_crcs(binary:bytes) -> Optional[npt.NDArray[np.uint32]]: """Retrieve the stored crack code crc32cs.""" head = CrackleHeader.frombytes(binary) diff --git a/crackle/operations.py b/crackle/operations.py index a77920d..af3af01 100644 --- a/crackle/operations.py +++ b/crackle/operations.py @@ -963,5 +963,59 @@ def contacts( wx, wy, wz = anisotropy return fastcrackle.contacts(binary, 0, -1, wx, wy ,wz) +def array_equal( + binary1:bytes, + binary2:bytes, + parallel:int = 0, +) -> bool: + """ + Check if arrays have the same contents regardless of + encoding representation. + """ + h1 = header(binary1) + h2 = header(binary2) + + if ( + h1.sx != h2.sx + or h1.sy != h2.sy + or h1.sz != h2.sz + ): + return False + + if num_labels(binary1) != num_labels(binary2): + return False + + uniq1 = labels(binary1) + uniq2 = labels(binary2) + if np.any(uniq1 != uniq2): + return False + + return fastcrackle.array_equal(binary1, binary2, parallel) + +def structure_equal( + binary1:bytes, + binary2:bytes, + parallel:int = 0, +) -> bool: + """ + Check if images have the same structure regardless of + labeling. + """ + h1 = header(binary1) + h2 = header(binary2) + + if ( + h1.sx != h2.sx + or h1.sy != h2.sy + or h1.sz != h2.sz + ): + return False + + if h1.format_version == 0 or h2.format_version == 0: + vcg1 = voxel_connectivity_graph(binary1, connectivity=4, parallel=parallel) + vcg2 = voxel_connectivity_graph(binary2, connectivity=4, parallel=parallel) + return np.all(vcg1 == vcg2) + else: + return np.all(crack_crcs(binary1) == crack_crcs(binary2)) diff --git a/src/fastcrackle.cpp b/src/fastcrackle.cpp index 253b772..1c8a80e 100644 --- a/src/fastcrackle.cpp +++ b/src/fastcrackle.cpp @@ -586,6 +586,32 @@ py::dict contacts( return pyareas; } +bool array_equal( + const py::buffer buffer1, + const py::buffer buffer2, + const size_t parallel = 1 +) { + py::buffer_info info1 = buffer1.request(); + py::buffer_info info2 = buffer2.request(); + + if (info1.ndim != 1 || info2.ndim != 1) { + throw std::runtime_error("Expected a 1D buffer"); + } + + uint8_t* data1 = static_cast(info1.ptr); + uint8_t* data2 = static_cast(info2.ptr); + + if (data1 == data2) { + return true; + } + + return crackle::operations::array_equal( + data1, info1.size, + data2, info2.size, + parallel + ); +} + PYBIND11_MODULE(fastcrackle, m) { m.doc() = "Accelerated crackle functions."; @@ -607,6 +633,7 @@ PYBIND11_MODULE(fastcrackle, m) { m.def("get_slice_vcg", &get_slice_vcg, "Debugging tool for examining the voxel connectivity graph of a slice."); m.def("voxel_connectivity_graph", &voxel_connectivity_graph, "Extract the voxel connectivity graph from the image."); m.def("contacts", &contacts, "Find the contact area between pairs of regions."); + m.def("array_equal", &array_equal, "Check if two crackle arrays are equal regardless of encoding."); py::class_>(m, "CppPin") .def(py::init<>()) diff --git a/src/operations.hpp b/src/operations.hpp index 644e0f8..1fa4e0d 100644 --- a/src/operations.hpp +++ b/src/operations.hpp @@ -1024,7 +1024,166 @@ auto contacts( ); } +bool array_equal( + const unsigned char* buffer1, + const size_t num_bytes1, + const unsigned char* buffer2, + const size_t num_bytes2, + size_t parallel = 1 +) { + const CrackleHeader header1 = get_header(buffer1, num_bytes1); + const CrackleHeader header2 = get_header(buffer2, num_bytes2); + + const uint64_t voxels1 = get_voxels(header1, 0, -1); + const uint64_t voxels2 = get_voxels(header2, 0, -1); + + if (voxels1 == 0 || voxels2 == 0) { + return voxels1 == voxels2; + } + + if ( + header1.sx != header2.sx + || header1.sy != header2.sy + || header1.sz != header2.sz + ) { + return false; + } + + const size_t sx = header1.sx; + const size_t sy = header1.sy; + const size_t sz = header1.sz; + + std::span binary1(buffer1, num_bytes1); + std::span binary2(buffer2, num_bytes2); + + // only used for markov compressed streams + std::vector> markov_model1 = decode_markov_model(header1, binary1); + std::vector> markov_model2 = decode_markov_model(header2, binary2); + + auto crack_codes1 = get_crack_codes(header1, binary1, 0, sz); + auto crack_codes2 = get_crack_codes(header2, binary2, 0, sz); + + if (parallel == 0) { + parallel = std::thread::hardware_concurrency(); + } + parallel = std::min(parallel, sz); + + ThreadPool pool(parallel); + + std::vector> vcgs(parallel * 2); + std::vector> ccls(parallel * 2); + + for (size_t t = 0; t < parallel * 2; t++) { + vcgs[t].resize(header1.sx * header1.sy); + ccls[t].resize(header1.sx * header1.sy); + } + + std::atomic is_equal{true}; + + for (size_t z = 0; z < sz; z++) { + pool.enqueue([&,z](size_t t){ + if (!is_equal) { + return; + } + + t *= 2; + + std::vector& vcg1 = vcgs[t]; + std::vector& vcg2 = vcgs[t+1]; + + std::vector& ccl1 = ccls[t]; + std::vector& ccl2 = ccls[t+1]; + auto& crack_code1 = crack_codes1[z]; + auto& crack_code2 = crack_codes2[z]; + + crack_code_to_vcg( + /*code=*/crack_code1, + /*sx=*/sx, /*sy=*/sy, + /*permissible=*/(header1.crack_format == CrackFormat::PERMISSIBLE), + /*markov_model=*/markov_model1, + /*slice_edges=*/vcg1.data() + ); + + if (!is_equal) { + return; + } + + crack_code_to_vcg( + /*code=*/crack_code2, + /*sx=*/sx, /*sy=*/sy, + /*permissible=*/(header2.crack_format == CrackFormat::PERMISSIBLE), + /*markov_model=*/markov_model2, + /*slice_edges=*/vcg2.data() + ); + + if (!is_equal) { + return; + } + + uint64_t N1 = 0; + crackle::cc3d::color_connectivity_graph( + vcg1, sx, sy, 1, ccl1.data(), N1 + ); + + if (!is_equal) { + return; + } + + uint64_t N2 = 0; + crackle::cc3d::color_connectivity_graph( + vcg2, sx, sy, 1, ccl2.data(), N2 + ); + + if (!is_equal) { + return; + } + + if (N1 != N2) { + is_equal.store(false, std::memory_order_relaxed); + return; + } + + std::vector label_map1 = decode_label_map( + header1, binary1, ccl1.data(), N1, z, z+1 + ); + std::vector label_map2 = decode_label_map( + header2, binary2, ccl2.data(), N2, z, z+1 + ); + + for (size_t y = 0; y < sy; y++) { + for (size_t x = 0; x < sx; x++) { + const int64_t loc = x + sx * y; + const uint64_t a = label_map1[ccl1[loc]]; + const uint64_t b = label_map1[ccl2[loc]]; + + if (a != b) { + is_equal.store(false, std::memory_order_relaxed); + return; + } + } + } + }); + } + + pool.join(); + + return is_equal; +} + +bool array_equal( + const std::span& buffer1, + const std::span& buffer2, + const int parallel = 1 +) { + return array_equal( + buffer1.data(), + buffer1.size(), + buffer2.data(), + buffer2.size(), + parallel + ); +} }; };