Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ public BytesRef buildTsid(XContentType sourceType, BytesReference source) {
} catch (IOException | ParsingException e) {
throw new IllegalArgumentException("Error extracting tsid: " + e.getMessage(), e);
}
return b.buildTsid();
return b.buildTsid(creationVersion);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
import org.elasticsearch.common.hash.BufferedMurmur3Hasher;
import org.elasticsearch.common.hash.MurmurHash3;
import org.elasticsearch.common.util.ByteUtils;
import org.elasticsearch.common.util.FeatureFlag;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.mapper.RoutingPathFields;
import org.elasticsearch.xcontent.XContentString;

Expand All @@ -31,12 +34,17 @@
*/
public class TsidBuilder {

public static final boolean SINGLE_PREFIX_BYTE_ENABLED = new FeatureFlag("tsid_layout_single_prefix_byte").isEnabled();

/**
* The maximum number of fields to use for the value similarity part of the TSID.
* This is a trade-off between clustering similar time series together and the size of the TSID.
* More fields improve clustering but also increase the size of the TSID.
*/
private static final int MAX_TSID_VALUE_SIMILARITY_FIELDS = 4;
static final String OTEL_METRIC_FIELD = "_metric_names_hash";
static final String PROMETHEUS_LABEL_FIELD = "labels.__name__";

private final BufferedMurmur3Hasher murmur3Hasher = new BufferedMurmur3Hasher(0L);

private final List<Dimension> dimensions;
Expand Down Expand Up @@ -215,6 +223,18 @@ public MurmurHash3.Hash128 hash() {
return murmur3Hasher.digestHash();
}

public final BytesRef buildTsid(IndexVersion indexVersion) {
if (useSingleBytePrefixLayout(indexVersion)) {
return buildSingleBytePrefixTsid();
} else {
return buildMultiBytePrefixTsid();
}
}

public static boolean useSingleBytePrefixLayout(IndexVersion indexVersion) {
return SINGLE_PREFIX_BYTE_ENABLED && indexVersion.onOrAfter(IndexVersions.TSID_SINGLE_PREFIX_BYTE_FEATURE_FLAG);
}

/**
* Builds a time series identifier (TSID) based on the dimensions added to this builder.
* This is a slight adaptation of {@link RoutingPathFields#buildHash()} but creates shorter tsids.
Expand All @@ -233,18 +253,16 @@ public MurmurHash3.Hash128 hash() {
* This is to avoid hash collisions.
* </li>
* </ul>
*
* @return a BytesRef containing the TSID
* @throws IllegalArgumentException if no dimensions have been added
* Note that this layout has been used with indices created before {@link IndexVersions#TSID_SINGLE_PREFIX_BYTE_FEATURE_FLAG}
*/
public BytesRef buildTsid() {
private BytesRef buildMultiBytePrefixTsid() {
throwIfEmpty();
Collections.sort(dimensions);

int numberOfValues = Math.min(MAX_TSID_VALUE_SIMILARITY_FIELDS, dimensions.size());
byte[] hash = new byte[1 + numberOfValues + 16];
int index = 0;

Collections.sort(dimensions);

MurmurHash3.Hash128 hashBuffer = new MurmurHash3.Hash128();
murmur3Hasher.reset();
// similarity hash for dimension names
Expand Down Expand Up @@ -280,6 +298,55 @@ public BytesRef buildTsid() {
return new BytesRef(hash, 0, index);
}

private BytesRef buildSingleBytePrefixTsid() {
throwIfEmpty();
Collections.sort(dimensions);

final byte[] tsid = new byte[16];
murmur3Hasher.reset();
MurmurHash3.Hash128 hashBuffer = new MurmurHash3.Hash128();
// hash of all dimension names and values for uniqueness
for (Dimension dim : dimensions) {
murmur3Hasher.addLongs(dim.pathHash.h1, dim.pathHash.h2, dim.valueHash.h1, dim.valueHash.h2);
}
murmur3Hasher.digestHash(hashBuffer);
ByteUtils.writeLongLE(hashBuffer.h2, tsid, 0);
ByteUtils.writeLongLE(hashBuffer.h1, tsid, 8);
tsid[0] = computeSingleBytePrefix(hashBuffer);
return new BytesRef(tsid);
}

private byte computeSingleBytePrefix(MurmurHash3.Hash128 scratch) {
murmur3Hasher.reset();
Dimension otelMetric = findDimension(dimensions, OTEL_METRIC_FIELD);
if (otelMetric != null) {
murmur3Hasher.addLong(otelMetric.valueHash().h1 ^ otelMetric.valueHash().h2);
return (byte) murmur3Hasher.digestHash(scratch).h1;
}
Dimension prometheusLabel = findDimension(dimensions, PROMETHEUS_LABEL_FIELD);
if (prometheusLabel != null) {
murmur3Hasher.addLong(prometheusLabel.valueHash().h1 ^ prometheusLabel.valueHash().h2);
return (byte) murmur3Hasher.digestHash(scratch).h1;
}
// similarity hash for dimension names
for (Dimension dim : dimensions) {
murmur3Hasher.addLong(dim.pathHash.h1 ^ dim.pathHash.h2);
}
return (byte) murmur3Hasher.digestHash(scratch).h1;
}

private static Dimension findDimension(List<Dimension> sortedDimensions, String name) {
for (Dimension dim : sortedDimensions) {
int cmp = dim.path.compareTo(name);
if (cmp > 0) {
return null;
} else if (cmp == 0) {
return dim;
}
}
return null;
}

private void throwIfEmpty() {
if (dimensions.isEmpty()) {
throw new IllegalArgumentException("Dimensions are empty");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion UPGRADE_TO_LUCENE_10_4_0 = def(9_075_00_0, Version.LUCENE_10_4_0);
public static final IndexVersion UPGRADE_DISKBBQ_ES940 = def(9_076_00_0, Version.LUCENE_10_4_0);
public static final IndexVersion FLATTENED_FIELD_NO_ROOT_DOC_VALUES = def(9_077_0_00, Version.LUCENE_10_4_0);

public static final IndexVersion TSID_SINGLE_PREFIX_BYTE_FEATURE_FLAG = def(9_078_00_0, Version.LUCENE_10_4_0);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.elasticsearch.index.mapper.TsidExtractingIdFieldMapper;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.index.IndexVersionUtils;
import org.elasticsearch.xcontent.DeprecationHandler;
import org.elasticsearch.xcontent.NamedXContentRegistry;
import org.elasticsearch.xcontent.XContentType;
Expand Down Expand Up @@ -686,7 +687,7 @@ public void testRoutingPathBwc() throws IOException {
public void testRoutingPathBwcAfterTsidBasedRouting() throws IOException {
boolean useSyntheticId = IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG && randomBoolean();
TimeSeriesRoutingFixture fixture = indexRoutingForTimeSeriesDimensions(
IndexVersion.current(),
IndexVersionUtils.randomVersionOnOrAfter(IndexVersions.TIME_SERIES_USE_SYNTHETIC_ID_94),
8,
"dim.*,other.*,top",
useSyntheticId
Expand All @@ -699,17 +700,31 @@ public void testRoutingPathBwcAfterTsidBasedRouting() throws IOException {
* versions of Elasticsearch must continue to route based on the
* version on the index.
*/
assertIndexShard(fixture, Map.of("dim", Map.of("a", "a")), 7);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add a test that uses the previous values for shard routing the index version just before the new one you added (IndexVersionUtils.randomPreviousCompatibleVersion(IndexVersions.CLUSTERING_TSID)).

This is to ensure shard routing stays consistent for existing indices (see also the code comment block above)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added in 0da5189

assertIndexShard(fixture, Map.of("dim", Map.of("a", "b")), 5);
assertIndexShard(fixture, Map.of("dim", Map.of("c", "d")), 5);
assertIndexShard(fixture, Map.of("other", Map.of("a", "a")), 0);
assertIndexShard(fixture, Map.of("top", "a"), 7);
assertIndexShard(fixture, Map.of("dim", Map.of("c", "d"), "top", "b"), 2);
assertIndexShard(fixture, Map.of("dim.a", "a"), 7);
assertIndexShard(fixture, Map.of("dim.a", 1), 0);
assertIndexShard(fixture, Map.of("dim.a", "1"), 5);
assertIndexShard(fixture, Map.of("dim.a", true), 5);
assertIndexShard(fixture, Map.of("dim.a", "true"), 6);
if (TsidBuilder.useSingleBytePrefixLayout(fixture.routing.creationVersion)) {
assertIndexShard(fixture, Map.of("dim", Map.of("a", "a")), 5);
assertIndexShard(fixture, Map.of("dim", Map.of("a", "b")), 3);
assertIndexShard(fixture, Map.of("dim", Map.of("c", "d")), 7);
assertIndexShard(fixture, Map.of("other", Map.of("a", "a")), 1);
assertIndexShard(fixture, Map.of("top", "a"), 6);
assertIndexShard(fixture, Map.of("dim", Map.of("c", "d"), "top", "b"), 0);
assertIndexShard(fixture, Map.of("dim.a", "a"), 5);
assertIndexShard(fixture, Map.of("dim.a", 1), 2);
assertIndexShard(fixture, Map.of("dim.a", "1"), 0);
assertIndexShard(fixture, Map.of("dim.a", true), 3);
assertIndexShard(fixture, Map.of("dim.a", "true"), 1);
} else {
assertIndexShard(fixture, Map.of("dim", Map.of("a", "a")), 7);
assertIndexShard(fixture, Map.of("dim", Map.of("a", "b")), 5);
assertIndexShard(fixture, Map.of("dim", Map.of("c", "d")), 5);
assertIndexShard(fixture, Map.of("other", Map.of("a", "a")), 0);
assertIndexShard(fixture, Map.of("top", "a"), 7);
assertIndexShard(fixture, Map.of("dim", Map.of("c", "d"), "top", "b"), 2);
assertIndexShard(fixture, Map.of("dim.a", "a"), 7);
assertIndexShard(fixture, Map.of("dim.a", 1), 0);
assertIndexShard(fixture, Map.of("dim.a", "1"), 5);
assertIndexShard(fixture, Map.of("dim.a", true), 5);
assertIndexShard(fixture, Map.of("dim.a", "true"), 6);
}
}

public void testRoutingPathReadWithInvalidString() {
Expand Down Expand Up @@ -1236,7 +1251,7 @@ private int expectedShard(IndexRouting routing, List<Object> keysAndValues, int
*/
private int hash(IndexRouting routing, List<Object> keysAndValues) {
if (routing instanceof IndexRouting.ExtractFromSource.ForIndexDimensions) {
return tsidBasedRoutingHash(keysAndValues);
return tsidBasedRoutingHash(keysAndValues, routing.creationVersion);
}
return legacyRoutingHash(keysAndValues);
}
Expand All @@ -1252,7 +1267,7 @@ private int legacyRoutingHash(List<Object> keysAndValues) {
return hash;
}

private static int tsidBasedRoutingHash(List<Object> keysAndValues) {
private static int tsidBasedRoutingHash(List<Object> keysAndValues, IndexVersion indexVersion) {
TsidBuilder tsidBuilder = new TsidBuilder();
for (int i = 0; i < keysAndValues.size(); i += 2) {
String key = keysAndValues.get(i).toString();
Expand All @@ -1271,6 +1286,6 @@ private static int tsidBasedRoutingHash(List<Object> keysAndValues) {
throw new IllegalArgumentException("Unsupported value type for TSID routing: " + value.getClass());
}
}
return StringHelper.murmurhash3_x86_32(tsidBuilder.buildTsid(), 0);
return StringHelper.murmurhash3_x86_32(tsidBuilder.buildTsid(indexVersion), 0);
}
}
Loading
Loading