Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c209f9f
Next development iteration
worryg0d Oct 17, 2025
56c09a6
Implemented xxHash64 wrapper
worryg0d Oct 22, 2025
b64bf48
Added all supported algorithms to HashServiceTest
worryg0d Oct 22, 2025
4a0db60
Increased chunk size for hashing to 4096
worryg0d Oct 28, 2025
517f6fa
Parallel computing sstables hashes
worryg0d Nov 5, 2025
b4ec1b7
Expose snapshots map in Snapshots class
worryg0d Nov 5, 2025
70f3505
Deffer sstable components hash computation
worryg0d Nov 6, 2025
b7d36f4
ParallelHashService for hashing manifest entries
worryg0d Nov 6, 2025
91a5709
Use Runtime.availableProcessors() as a default value for concurrentCo…
worryg0d Nov 6, 2025
06a6fa2
Use 50% of available cpus by default instead
worryg0d Nov 6, 2025
08b6c82
Pass list of manifest entries instead of stream
worryg0d Nov 7, 2025
d41aca5
Parallel hashing of downloaded files during restoration import phase
worryg0d Nov 7, 2025
9479001
Address review comments
worryg0d Nov 10, 2025
fd483ea
Hash only manifest entries of the snapshot we're aiming for
worryg0d Nov 10, 2025
5480bbc
First iteration of fast fail on verifyAll manifests during import pha…
worryg0d Nov 10, 2025
cba7a9c
Code cleanup and comments
worryg0d Nov 11, 2025
ade8b3e
Hash operation interruption in the mid
worryg0d Nov 11, 2025
baec383
Addressed review suggestions by Stefan
worryg0d Nov 11, 2025
321f5be
Removed redundant validation method for concurrentConnections
worryg0d Nov 11, 2025
4b0e914
Change output format to hex for XXHasher.getHash method to align with…
worryg0d Nov 12, 2025
588b75b
Added test file and precomputed hashes for testing
worryg0d Nov 12, 2025
2693525
logs adjustment
worryg0d Nov 12, 2025
794f281
Bump Cassandra version to 5.0.6
worryg0d Nov 12, 2025
924fc25
CRCHasher test
worryg0d Nov 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions azure/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
<parent>
<groupId>com.instaclustr</groupId>
<artifactId>esop-parent</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

<artifactId>esop-azure</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>

<name>esop-azure</name>
<description>Backup and restoration tooling for Cassandra for Azure</description>
Expand Down
11 changes: 9 additions & 2 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
<parent>
<groupId>com.instaclustr</groupId>
<artifactId>esop-parent</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

<artifactId>esop-core</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>

<name>esop-core</name>
<description>Core of backup and restoration tooling for Cassandra</description>
Expand All @@ -25,6 +25,7 @@
<commons-io.version>2.20.0</commons-io.version>
<awaitility.version>3.1.6</awaitility.version>
<jackson.bom.version>2.19.2</jackson.bom.version>
<lz4.version>1.8.0</lz4.version>

<slf4j.version>2.0.17</slf4j.version>
<logback.version>1.5.19</logback.version>
Expand Down Expand Up @@ -120,6 +121,12 @@
<version>${awaitility.version}</version>
</dependency>

<dependency>
<groupId>org.lz4</groupId>
<artifactId>lz4-java</artifactId>
<version>${lz4.version}</version>
</dependency>

<!-- logging -->

<dependency>
Expand Down
31 changes: 30 additions & 1 deletion core/src/main/java/com/instaclustr/esop/impl/hash/HashSpec.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import java.util.zip.CRC32;
import java.util.zip.Checksum;

import net.jpountz.xxhash.StreamingXXHash64;
import net.jpountz.xxhash.XXHashFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import picocli.CommandLine;
Expand Down Expand Up @@ -116,8 +118,35 @@ public String getHash(byte[] digest) throws Exception {
}
}

/**
* Wraps the xxHash64 algorithm. Used for fast hashing of large files as an alternative to SHA-256.
*/
public static class XXHasher implements Hasher {

@Override
public String getHash(final InputStream is) throws Exception {
try (StreamingXXHash64 xxHash64 = XXHashFactory.fastestJavaInstance().newStreamingHash64(0)) {
byte[] byteArray = new byte[1024];
int bytesCount = 0;

while ((bytesCount = is.read(byteArray)) != -1) {
xxHash64.update(byteArray, 0, bytesCount);
}

return Long.toString(xxHash64.getValue());
}
}

@Override
public String getHash(final byte[] digest) throws Exception {
// TODO do we actually need this?
throw new UnsupportedOperationException();
}
}

public enum HashAlgorithm {
SHA_256("SHA-256", () -> new SHAHasher("SHA-256")),
XXHASH64("xxHash64", () -> new XXHasher()),
CRC("CRC", () -> new CRCHasher()),
NONE("NONE", () -> new NoOp());

Expand Down Expand Up @@ -146,7 +175,7 @@ public static HashAlgorithm parse(final String value) {
}

for (final HashAlgorithm algorithm : HashAlgorithm.values()) {
if (algorithm.name.equals(value)) {
if (algorithm.name.equalsIgnoreCase(value)) {
return algorithm;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,54 @@
import com.instaclustr.esop.impl.hash.HashService;
import com.instaclustr.esop.impl.hash.HashServiceImpl;
import com.instaclustr.esop.impl.hash.HashSpec;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

public class HashServiceTest {

private static File testFile;

@BeforeAll
public static void setup() throws Exception {
testFile = File.createTempFile("hashingTest", ".tmp");
Files.write(testFile.toPath(), "testdata".getBytes());
}

@AfterAll
public static void teardown() throws Exception {
if (testFile != null && testFile.exists()) {
testFile.delete();
}
}

@Test
public void testHashing_DefaultAlgorithm() throws Exception {
testHashing(new HashSpec());
}

@Test
public void testHashing_SHA256() throws Exception {
testHashing(new HashSpec(HashSpec.HashAlgorithm.SHA_256));
}

@Test
public void testHashing_CRC32() throws Exception {
testHashing(new HashSpec(HashSpec.HashAlgorithm.CRC));
}

@Test
public void testHashing() throws Exception {
final File f = File.createTempFile("hashingTest", ".tmp");
Files.write(f.toPath(), "".getBytes());
final HashService hashService = new HashServiceImpl(new HashSpec());
hashService.verify(f.toPath(), hashService.hash(f.toPath()));
public void testHashing_xxHash64() throws Exception {
testHashing(new HashSpec(HashSpec.HashAlgorithm.XXHASH64));
}

@Test
public void testHashing_None() throws Exception {
testHashing(new HashSpec(HashSpec.HashAlgorithm.NONE));
}

private void testHashing(HashSpec hashSpec) throws Exception {
final HashService hashService = new HashServiceImpl(hashSpec);
hashService.verify(testFile.toPath(), hashService.hash(testFile.toPath()));
}
}
4 changes: 2 additions & 2 deletions gcp/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
<parent>
<groupId>com.instaclustr</groupId>
<artifactId>esop-parent</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

<artifactId>esop-gcp</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>

<name>esop-gcp</name>
<description>Backup and restoration tooling for Cassandra for GCP</description>
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

<groupId>com.instaclustr</groupId>
<artifactId>esop-parent</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>
<packaging>pom</packaging>

<name>esop-parent</name>
Expand Down
4 changes: 2 additions & 2 deletions s3/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
<parent>
<groupId>com.instaclustr</groupId>
<artifactId>esop-parent</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

<artifactId>esop-s3</artifactId>
<version>4.0.1</version>
<version>4.0.2-SNAPSHOT</version>

<name>esop-s3</name>
<description>Backup and restoration tooling for Cassandra for AWS S3</description>
Expand Down