Skip to content

Commit 3595cf9

Browse files
committed
add hashing algorithm and paralelism with rayon
1 parent 9c50d37 commit 3595cf9

File tree

4 files changed

+287
-0
lines changed

4 files changed

+287
-0
lines changed

Cargo.lock

Lines changed: 59 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ exclude = ["target", "*.rs.bk", ".github"]
1515

1616
[dependencies]
1717
dirs = "6.0.0"
18+
rayon = "1.11.0"
1819
serde = { version = "1.0.228", features = ["derive"] }
1920
toml = "0.9.11"
21+
xxhash-rust = { version = "0.8.15", features = ["xxh3"] }
2022

2123
[dev-dependencies]
2224
tempfile = "3"

src/errors.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
pub enum RobeError {
33
Internal(String),
44
BadUsage(String),
5+
Hashing(String),
56
}
67

78
impl RobeError {
@@ -18,6 +19,10 @@ impl std::fmt::Display for RobeError {
1819
"robe: Wrong usage. {}\nUse `robe -h` for help.",
1920
err
2021
)),
22+
Self::Hashing(err) => f.write_fmt(format_args!(
23+
"robe: An error occurred when hashing directory contents. {}",
24+
err
25+
)),
2126
}
2227
}
2328
}

src/hashing.rs

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
use rayon::prelude::*;
2+
use std::fs::File;
3+
use std::io::{BufReader, Read};
4+
use std::path::{Path, PathBuf};
5+
use xxhash_rust::xxh3::Xxh3;
6+
7+
use crate::errors::RobeError;
8+
9+
pub fn are_paths_equal(p1: &Path, p2: &Path) -> Result<bool, RobeError> {
10+
if p1.is_dir() && p2.is_dir() {
11+
dirs_equal_parallel(p1, p2)
12+
} else {
13+
files_equal(p1, p2)
14+
}
15+
}
16+
17+
fn dirs_equal_parallel(dir1: &Path, dir2: &Path) -> Result<bool, RobeError> {
18+
let mut entries1: Vec<PathBuf> = std::fs::read_dir(dir1)?
19+
.map(|e| e.expect("").path())
20+
.collect();
21+
22+
let mut entries2: Vec<PathBuf> = std::fs::read_dir(dir2)?
23+
.map(|e| e.expect("").path())
24+
.collect();
25+
26+
if entries1.len() != entries2.len() {
27+
return Ok(false);
28+
}
29+
30+
entries1.sort();
31+
entries2.sort();
32+
33+
let results: Result<Vec<bool>, RobeError> = entries1
34+
.par_iter()
35+
.zip(entries2.par_iter())
36+
.map(|(p1, p2)| compare_entry(p1, p2))
37+
.collect();
38+
39+
Ok(results?.into_iter().all(|x| x))
40+
}
41+
42+
fn compare_entry(p1: &PathBuf, p2: &PathBuf) -> Result<bool, RobeError> {
43+
if let Some(name1) = p1.file_name()
44+
&& let Some(name2) = p2.file_name()
45+
{
46+
if name1 != name2 {
47+
return Ok(false);
48+
}
49+
50+
if p1.is_dir() && p2.is_dir() {
51+
dirs_equal_parallel(p1, p2)
52+
} else if p1.is_file() && p2.is_file() {
53+
files_equal(p1, p2)
54+
} else {
55+
Ok(false)
56+
}
57+
} else {
58+
Err(RobeError::Hashing(format!(
59+
"Error obtaining file names: {:?}, {:?}",
60+
p1, p2
61+
)))
62+
}
63+
}
64+
65+
fn files_equal(file1: &Path, file2: &Path) -> Result<bool, RobeError> {
66+
let meta1 = std::fs::metadata(file1)?;
67+
let meta2 = std::fs::metadata(file2)?;
68+
if meta1.len() != meta2.len() {
69+
return Ok(false);
70+
}
71+
72+
let mut f1 = BufReader::new(File::open(file1)?);
73+
let mut f2 = BufReader::new(File::open(file2)?);
74+
let mut buffer1 = [0u8; 8192];
75+
let mut buffer2 = [0u8; 8192];
76+
77+
loop {
78+
let n1 = f1.read(&mut buffer1)?;
79+
let n2 = f2.read(&mut buffer2)?;
80+
if n1 != n2 {
81+
return Ok(false);
82+
}
83+
if n1 == 0 {
84+
break;
85+
}
86+
87+
let mut h1 = Xxh3::new();
88+
h1.update(&buffer1[..n1]);
89+
let mut h2 = Xxh3::new();
90+
h2.update(&buffer2[..n2]);
91+
if h1.digest() != h2.digest() {
92+
return Ok(false);
93+
}
94+
}
95+
96+
Ok(true)
97+
}
98+
#[cfg(test)]
99+
mod tests {
100+
use super::*;
101+
use std::fs::{self};
102+
use tempfile::{TempDir, tempdir};
103+
104+
fn create_dirs() -> (TempDir, TempDir) {
105+
let tmp1 = tempdir().unwrap();
106+
let tmp2 = tempdir().unwrap();
107+
(tmp1, tmp2)
108+
}
109+
110+
#[test]
111+
fn test_identical_text_files() -> Result<(), RobeError> {
112+
let (tmp1, tmp2) = create_dirs();
113+
let dir1 = tmp1.path();
114+
let dir2 = tmp2.path();
115+
116+
let f1 = dir1.join("file1.txt");
117+
let f2 = dir2.join("file1.txt");
118+
fs::write(&f1, b"Hello, world!")?;
119+
fs::write(&f2, b"Hello, world!")?;
120+
121+
assert!(are_paths_equal(dir1, dir2)?);
122+
Ok(())
123+
}
124+
125+
#[test]
126+
fn test_different_text_files() -> Result<(), RobeError> {
127+
let (tmp1, tmp2) = create_dirs();
128+
let dir1 = tmp1.path();
129+
let dir2 = tmp2.path();
130+
131+
let f1 = dir1.join("file1.txt");
132+
let f2 = dir2.join("file1.txt");
133+
fs::write(&f1, b"Hello")?;
134+
fs::write(&f2, b"Goodbye")?;
135+
136+
assert!(!are_paths_equal(dir1, dir2)?);
137+
Ok(())
138+
}
139+
140+
#[test]
141+
fn test_identical_binary_files() -> Result<(), RobeError> {
142+
let (tmp1, tmp2) = create_dirs();
143+
let dir1 = tmp1.path();
144+
let dir2 = tmp2.path();
145+
146+
let bin1 = dir1.join("binary.bin");
147+
let bin2 = dir2.join("binary.bin");
148+
let data = vec![0u8, 255, 128, 64, 32];
149+
fs::write(&bin1, &data)?;
150+
fs::write(&bin2, &data)?;
151+
152+
assert!(are_paths_equal(dir1, dir2)?);
153+
Ok(())
154+
}
155+
156+
#[test]
157+
fn test_different_binary_files() -> Result<(), RobeError> {
158+
let (tmp1, tmp2) = create_dirs();
159+
let dir1 = tmp1.path();
160+
let dir2 = tmp2.path();
161+
162+
let bin1 = dir1.join("binary.bin");
163+
let bin2 = dir2.join("binary.bin");
164+
let mut data = vec![0u8, 255, 128, 64, 32];
165+
fs::write(&bin1, &data)?;
166+
data[0] = 1;
167+
fs::write(&bin2, &data)?;
168+
169+
assert!(!are_paths_equal(dir1, dir2)?);
170+
Ok(())
171+
}
172+
173+
#[test]
174+
fn test_nested_directories_equal() -> Result<(), RobeError> {
175+
let (tmp1, tmp2) = create_dirs();
176+
let dir1 = tmp1.path();
177+
let dir2 = tmp2.path();
178+
179+
let sub1 = dir1.join("sub");
180+
let sub2 = dir2.join("sub");
181+
fs::create_dir(&sub1)?;
182+
fs::create_dir(&sub2)?;
183+
fs::write(sub1.join("nested.txt"), b"Nested content")?;
184+
fs::write(sub2.join("nested.txt"), b"Nested content")?;
185+
186+
assert!(are_paths_equal(dir1, dir2)?);
187+
Ok(())
188+
}
189+
190+
#[test]
191+
fn test_nested_directories_different() -> Result<(), RobeError> {
192+
let (tmp1, tmp2) = create_dirs();
193+
let dir1 = tmp1.path();
194+
let dir2 = tmp2.path();
195+
196+
let sub1 = dir1.join("sub");
197+
let sub2 = dir2.join("sub");
198+
fs::create_dir(&sub1)?;
199+
fs::create_dir(&sub2)?;
200+
fs::write(sub1.join("nested.txt"), b"Nested content")?;
201+
fs::write(sub2.join("nested.txt"), b"Different content")?;
202+
203+
assert!(!are_paths_equal(dir1, dir2)?);
204+
Ok(())
205+
}
206+
207+
#[test]
208+
fn test_multiple_files_parallel() -> Result<(), RobeError> {
209+
let (tmp1, tmp2) = create_dirs();
210+
let dir1 = tmp1.path();
211+
let dir2 = tmp2.path();
212+
213+
for i in 0..50 {
214+
fs::write(dir1.join(format!("file{}.txt", i)), b"Some content")?;
215+
fs::write(dir2.join(format!("file{}.txt", i)), b"Some content")?;
216+
}
217+
218+
assert!(are_paths_equal(dir1, dir2)?);
219+
Ok(())
220+
}
221+
}

0 commit comments

Comments
 (0)