Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions azure/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
<parent>
<groupId>com.instaclustr</groupId>
<artifactId>esop-parent</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

<artifactId>esop-azure</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>

<name>esop-azure</name>
<description>Backup and restoration tooling for Cassandra for Azure</description>
Expand Down
11 changes: 9 additions & 2 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
<parent>
<groupId>com.instaclustr</groupId>
<artifactId>esop-parent</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

<artifactId>esop-core</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>

<name>esop-core</name>
<description>Core of backup and restoration tooling for Cassandra</description>
Expand All @@ -25,6 +25,7 @@
<commons-io.version>2.20.0</commons-io.version>
<awaitility.version>3.1.6</awaitility.version>
<jackson.bom.version>2.19.2</jackson.bom.version>
<lz4.version>1.8.0</lz4.version>

<slf4j.version>2.0.17</slf4j.version>
<logback.version>1.5.19</logback.version>
Expand Down Expand Up @@ -120,6 +121,12 @@
<version>${awaitility.version}</version>
</dependency>

<dependency>
<groupId>org.lz4</groupId>
<artifactId>lz4-java</artifactId>
<version>${lz4.version}</version>
</dependency>

<!-- logging -->

<dependency>
Expand Down
31 changes: 30 additions & 1 deletion core/src/main/java/com/instaclustr/esop/impl/hash/HashSpec.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import java.util.zip.CRC32;
import java.util.zip.Checksum;

import net.jpountz.xxhash.StreamingXXHash64;
import net.jpountz.xxhash.XXHashFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import picocli.CommandLine;
Expand Down Expand Up @@ -116,8 +118,35 @@ public String getHash(byte[] digest) throws Exception {
}
}

/**
* Wraps the xxHash64 algorithm. Used for fast hashing of large files as an alternative to SHA-256.
*/
public static class XXHasher implements Hasher {

@Override
public String getHash(final InputStream is) throws Exception {
try (StreamingXXHash64 xxHash64 = XXHashFactory.fastestJavaInstance().newStreamingHash64(0)) {
byte[] byteArray = new byte[1024];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what happens if you make this bigger? does it make any difference on the speed? Try 4096 (4KiB), that is usually smallest block on disk it reads anyway

int bytesCount = 0;

while ((bytesCount = is.read(byteArray)) != -1) {
xxHash64.update(byteArray, 0, bytesCount);
}

return Long.toString(xxHash64.getValue());
}
}

@Override
public String getHash(final byte[] digest) throws Exception {
// TODO do we actually need this?
throw new UnsupportedOperationException();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check how it is done in case of sha

}
}

public enum HashAlgorithm {
SHA_256("SHA-256", () -> new SHAHasher("SHA-256")),
XXHASH64("xxHash64", () -> new XXHasher()),
CRC("CRC", () -> new CRCHasher()),
NONE("NONE", () -> new NoOp());

Expand Down Expand Up @@ -146,7 +175,7 @@ public static HashAlgorithm parse(final String value) {
}

for (final HashAlgorithm algorithm : HashAlgorithm.values()) {
if (algorithm.name.equals(value)) {
if (algorithm.name.equalsIgnoreCase(value)) {
return algorithm;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,54 @@
import com.instaclustr.esop.impl.hash.HashService;
import com.instaclustr.esop.impl.hash.HashServiceImpl;
import com.instaclustr.esop.impl.hash.HashSpec;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

public class HashServiceTest {

private static File testFile;

@BeforeAll
public static void setup() throws Exception {
testFile = File.createTempFile("hashingTest", ".tmp");
Files.write(testFile.toPath(), "testdata".getBytes());
}

@AfterAll
public static void teardown() throws Exception {
if (testFile != null && testFile.exists()) {
testFile.delete();
}
}

@Test
public void testHashing_DefaultAlgorithm() throws Exception {
testHashing(new HashSpec());
}

@Test
public void testHashing_SHA256() throws Exception {
testHashing(new HashSpec(HashSpec.HashAlgorithm.SHA_256));
}

@Test
public void testHashing_CRC32() throws Exception {
testHashing(new HashSpec(HashSpec.HashAlgorithm.CRC));
}

@Test
public void testHashing() throws Exception {
final File f = File.createTempFile("hashingTest", ".tmp");
Files.write(f.toPath(), "".getBytes());
final HashService hashService = new HashServiceImpl(new HashSpec());
hashService.verify(f.toPath(), hashService.hash(f.toPath()));
public void testHashing_xxHash64() throws Exception {
testHashing(new HashSpec(HashSpec.HashAlgorithm.XXHASH64));
}

@Test
public void testHashing_None() throws Exception {
testHashing(new HashSpec(HashSpec.HashAlgorithm.NONE));
}

private void testHashing(HashSpec hashSpec) throws Exception {
final HashService hashService = new HashServiceImpl(hashSpec);
hashService.verify(testFile.toPath(), hashService.hash(testFile.toPath()));
}
}
4 changes: 2 additions & 2 deletions gcp/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
<parent>
<groupId>com.instaclustr</groupId>
<artifactId>esop-parent</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

<artifactId>esop-gcp</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>

<name>esop-gcp</name>
<description>Backup and restoration tooling for Cassandra for GCP</description>
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

<groupId>com.instaclustr</groupId>
<artifactId>esop-parent</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>
<packaging>pom</packaging>

<name>esop-parent</name>
Expand Down
4 changes: 2 additions & 2 deletions s3/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
<parent>
<groupId>com.instaclustr</groupId>
<artifactId>esop-parent</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

<artifactId>esop-s3</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>

<name>esop-s3</name>
<description>Backup and restoration tooling for Cassandra for AWS S3</description>
Expand Down