Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
854286b
Initial commit - Add BTI support to ic-pstats
jfleming-ic Oct 2, 2024
909d31a
bump 5.0.1
haoran-huang-netapp Oct 14, 2024
d29203b
Merge branch 'cassandra-5.0.1' into cassandra-5.0.1
haoran-huang-netapp Oct 14, 2024
e314587
Merge pull request #40 from haoran-huang-netapp/cassandra-5.0.1
haoran-huang-netapp Oct 14, 2024
6b1eb4d
Bump Version
jarodabeysinghe28 Oct 21, 2024
827d40f
Working Prototype
jfleming-ic Nov 15, 2024
247ee0f
Remove Sys outs
jfleming-ic Nov 15, 2024
d363980
Cleanup
jfleming-ic Nov 15, 2024
74c30f2
Cleanup
jfleming-ic Nov 15, 2024
201ecb3
Simplify to single solution
jfleming-ic Nov 15, 2024
57ec6e8
Remove dead code
jfleming-ic Nov 15, 2024
245f9b8
Update README
jfleming-ic Nov 15, 2024
0ff714a
Remove incorrect comment
jfleming-ic Nov 15, 2024
78a2c50
Remove incorrect comment
jfleming-ic Nov 15, 2024
08711d7
Remove useless null check
jfleming-ic Nov 15, 2024
bd56b8a
Bump binary version
jfleming-ic Nov 15, 2024
479ed26
Bump binary version
jfleming-ic Nov 15, 2024
bb33b3e
Undo bad bump
jfleming-ic Nov 15, 2024
cf9e230
Fix review comments from Cam
jfleming-ic Nov 17, 2024
39671ef
Fix bug with filenames in pstats - Increment version of tool to 1.1
jfleming-ic Nov 17, 2024
1eb8e68
Merge branch 'BTI-Support' into 5.0.2-BTI
jfleming-ic Nov 21, 2024
71d593a
Remove trailing new lines at end of IndexReader.java
jfleming-ic Nov 22, 2024
e22f74b
Fix filename
jfleming-ic Nov 22, 2024
4178a1a
Fix ugly optional handling
jfleming-ic Nov 22, 2024
68576d6
Improve Optional missing code path
jfleming-ic Nov 28, 2024
1e99bbc
Merge pull request #42 from instaclustr/5.0.2-BTI
jfleming-ic Nov 28, 2024
7c6404e
Update to Apache Cassandra 5.0.3
jfleming-ic Feb 4, 2025
f301ea8
Bump Version
jarodabeysinghe28 May 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ Print out sstable metadata for a column family. Useful in helping to tune compac


## pstats ##
Tool for finding largest partitions. Reads the Index.db files so is relatively quick.
Tool for finding largest partitions.

### Usage ###

Expand Down Expand Up @@ -321,6 +321,32 @@ Largest reclaimable partitions: Partitions with the largest amount of reclaimabl
| Reclaim | Reclaimable uncompressed size |
| Generations | SSTable generations the partition belongs to |

### Testing with CCM ###

You can test this tool with the CCM tool (as it will save some time over needing to install and configure cassandra), simply do the following

Locate the ccm installation directory on your machine, usually this is ~/.ccm

Identify the version of cassandra you wish to test on. Run take the binary (target/ic-sstable-tools.jar) and copy it into the lib directory located in
`
~/.ccm/repository/\<version to test on>/lib/
`

For example
`~/.ccm/repository/5.0.0/lib/`

Now run

`export CASSANDRA_INCLUDE=~/.ccm/<ccm cluster name>/<node name in cluster>/bin/cassandra.in.sh`

For example

`export CASSANDRA_INCLUDE=~/.ccm/test/node1/bin/cassandra.in.sh`


and you should be able to run commands using the script located in the bin directory!

`ic-sstable-tools pstats keyspace table`

Please see https://www.instaclustr.com/support/documentation/announcements/instaclustr-open-source-project-status/ for Instaclustr support status of this project

6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
<modelVersion>4.0.0</modelVersion>

<groupId>com.instaclustr</groupId>
<artifactId>ic-sstable-tools-5.0.0</artifactId>
<version>1.0.0</version>
<artifactId>ic-sstable-tools-5.0.4</artifactId>
<version>1.1.0</version>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jarodabeysinghe28 if you go to introduce a new artifact with "-5.0.4" suffix then bumping the version to 1.1.0 does not make any sense.


<name>Instaclustr SSTable Tools</name>
<description>Handy SSTable tools for Apache Cassandra</description>
Expand Down Expand Up @@ -88,7 +88,7 @@
<dependency>
<groupId>org.apache.cassandra</groupId>
<artifactId>cassandra-all</artifactId>
<version>5.0.0</version>
<version>5.0.4</version>
<scope>provided</scope>
</dependency>
</dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,18 @@
import com.instaclustr.sstabletools.*;
import org.apache.cassandra.db.ColumnFamilyStore;
import org.apache.cassandra.db.DecoratedKey;
import org.apache.cassandra.db.SerializationHeader;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.io.sstable.Component;
import org.apache.cassandra.io.sstable.format.SSTableFormat;
import org.apache.cassandra.io.sstable.format.big.BigFormat;
import org.apache.cassandra.io.sstable.format.big.BigTableReader;
import org.apache.cassandra.io.util.FileHandle;
import org.apache.cassandra.utils.FilterFactory;
import org.apache.cassandra.io.sstable.format.bti.BtiFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.*;

/**
* ColumnFamilyProxy using Cassandra 3.5 backend.
Expand Down Expand Up @@ -80,8 +73,8 @@ public ColumnFamilyBackend(AbstractType<?> keyValidator,
if (filter != null) {
List<org.apache.cassandra.io.sstable.format.SSTableReader> filteredSSTables = new ArrayList<>(sstables.size());
for (org.apache.cassandra.io.sstable.format.SSTableReader sstable : sstables) {
File dataFile = sstable.descriptor.fileFor(SSTableFormat.Components.DATA).toJavaIOFile();;
if (filter.contains(dataFile.getName())) {
String filename = sstable.descriptor.fileFor(SSTableFormat.Components.DATA).name();
if (filter.contains(filename)) {
filteredSSTables.add(sstable);
}
}
Expand All @@ -94,34 +87,33 @@ public Collection<SSTableReader> getIndexReaders() {
Collection<SSTableReader> readers = new ArrayList<>(sstables.size());
for (org.apache.cassandra.io.sstable.format.SSTableReader sstable : sstables) {
try {
Set<Component> components = sstable.descriptor.discoverComponents();
Set<Component> discoveredComponents =
sstable.descriptor.getComponents(Set.of(), Set.of(BtiFormat.Components.PARTITION_INDEX, BigFormat.Components.PRIMARY_INDEX));

Optional<Component> maybeIndexComponent = components.stream().filter(c -> c.name.contains("Index")).findFirst();
if (!maybeIndexComponent.isPresent()) {
if (discoveredComponents.isEmpty()) {
//Nothing to read.
continue;
}

org.apache.cassandra.io.util.File indexFile = sstable.descriptor.fileFor(maybeIndexComponent.get());
FileHandle indexHandle = new FileHandle.Builder(indexFile).complete();
if(discoveredComponents.size() > 1){
logger.error("Multiple Components found, this should never happen. Filename might be incorrect.");
}

BigTableReader reader = new BigTableReader.Builder(sstable.descriptor)
.setComponents(components)
.setFilter(FilterFactory.AlwaysPresent)
.setSerializationHeader(SerializationHeader.makeWithoutStats(cfStore.metadata()))
.setIndexFile(indexHandle)
.build(this.cfStore, false, false);
Optional<Component> maybeComponent = discoveredComponents.stream().findFirst();

org.apache.cassandra.io.util.File sstableIndexFile =
sstable.descriptor.fileFor(maybeComponent.orElseThrow(() ->
new IllegalStateException(String.format("No Component found on sstable %s, this should never happen.", sstable.getFilename()))));

File dataFile = sstable.descriptor.fileFor(SSTableFormat.Components.DATA).toJavaIOFile();
readers.add(new IndexReader(
new SSTableStatistics(
sstable.descriptor.id,
dataFile.getName(),
sstableIndexFile.name(),
sstable.uncompressedLength(),
sstable.getMinTimestamp(),
sstable.getMaxTimestamp(),
sstable.getSSTableLevel()),
reader.getIndexFile().createReader(),
sstable.descriptor.version,
sstable.keyReader(),
sstable.getPartitioner()
));
} catch (Throwable t) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,17 @@
import com.instaclustr.sstabletools.PartitionStatistics;
import com.instaclustr.sstabletools.SSTableStatistics;
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.io.sstable.format.Version;
import org.apache.cassandra.io.util.RandomAccessReader;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.io.sstable.KeyReader;

/**
* SSTable Index.db reader.
*/
public class IndexReader extends AbstractSSTableReader {
/**
* Index.db reader.
*/
private RandomAccessReader reader;

/**
* SSTable version.
* The SSTable KeyReader.
*/
private Version version;
private KeyReader keyReader;

/**
* The sstable partitioner.
Expand All @@ -45,55 +39,40 @@ public class IndexReader extends AbstractSSTableReader {
*/
private boolean completed = false;


/**
* Construct a reader for Index.db sstable file.
*
* @param tableStats SSTable statistics.
* @param reader Reader to Index.db file.
* @param version Version of SSTable
* @param keyReader KeyReader for sstable.
* @param partitioner The sstable partitioner.
*/
public IndexReader(SSTableStatistics tableStats, RandomAccessReader reader, Version version, IPartitioner partitioner) {
public IndexReader(SSTableStatistics tableStats, KeyReader keyReader, IPartitioner partitioner) {
this.tableStats = tableStats;
this.reader = reader;
this.version = version;
this.keyReader = keyReader;
this.nextKey = null;
this.partitioner = partitioner;
}

/**
* Skip data field on index entry.
*
* @throws IOException
*/
private void skipData() throws IOException {
int size = version.version.compareTo("ma") >= 0 ? (int) reader.readUnsignedVInt() : reader.readInt();
if (size > 0) {
reader.skipBytesFully(size);
}
}

@Override
public boolean next() {
if (completed) {
return false;
}
try {
if (nextKey == null) {
nextKey = ByteBufferUtil.readWithShortLength(reader);
nextPosition = version.version.compareTo("ma") > 0 ? reader.readUnsignedVInt() : reader.readLong();
skipData();
nextKey = keyReader.key();
nextPosition = keyReader.dataPosition();
}
partitionStats = new PartitionStatistics(partitioner.decorateKey(nextKey));
long position = nextPosition;
if (!reader.isEOF()) {
nextKey = ByteBufferUtil.readWithShortLength(reader);
nextPosition = version.version.compareTo("ma") > 0 ? reader.readUnsignedVInt() : reader.readLong();
skipData();
if (!keyReader.isExhausted() && keyReader.advance()) {
nextKey = keyReader.key();
nextPosition = keyReader.dataPosition();
partitionStats.size = nextPosition - position;
} else {
partitionStats.size = this.tableStats.size - position;
reader.close();
keyReader.close();
completed = true;
}
this.tableStats.partitionCount++;
Expand All @@ -103,7 +82,7 @@ public boolean next() {
e.printStackTrace();
if (!completed) {
try {
reader.close();
keyReader.close();
} catch (Throwable t) {
}
}
Expand Down