Skip to content

Commit eba0245

Browse files
committed
Changing offset list strategy to pull all partition offsets in a single call to Kafka.
1 parent ffa7877 commit eba0245

File tree

1 file changed

+23
-7
lines changed

1 file changed

+23
-7
lines changed

plugin/trino-kafka/src/main/java/io/trino/plugin/kafka/KafkaFilterManager.java

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.kafka.common.config.ConfigResource;
3838

3939
import java.util.Collections;
40+
import java.util.HashMap;
4041
import java.util.List;
4142
import java.util.Map;
4243
import java.util.Optional;
@@ -47,7 +48,6 @@
4748
import static com.google.common.collect.ImmutableList.toImmutableList;
4849
import static com.google.common.collect.ImmutableMap.toImmutableMap;
4950
import static com.google.common.collect.ImmutableSet.toImmutableSet;
50-
import static com.google.common.collect.Iterables.getOnlyElement;
5151
import static io.trino.plugin.kafka.KafkaErrorCode.KAFKA_SPLIT_ERROR;
5252
import static io.trino.plugin.kafka.KafkaInternalFieldManager.InternalFieldId.OFFSET_TIMESTAMP_FIELD;
5353
import static io.trino.plugin.kafka.KafkaInternalFieldManager.InternalFieldId.PARTITION_ID_FIELD;
@@ -123,13 +123,23 @@ public KafkaFilteringResult getKafkaFilterResult(
123123
try (KafkaConsumer<byte[], byte[]> kafkaConsumer = consumerFactory.create(session)) {
124124
// filter negative value to avoid java.lang.IllegalArgumentException when using KafkaConsumer offsetsForTimes
125125
if (offsetTimestampRanged.get().begin() > INVALID_KAFKA_RANGE_INDEX) {
126+
Map<TopicPartition, Long> partitionBeginTimestamps = new HashMap<>();
127+
partitionBeginOffsets.forEach((partition, partitionIndex) -> {
128+
partitionBeginTimestamps.put(partition, offsetTimestampRanged.get().begin());
129+
});
130+
Map<TopicPartition, Long> beginOffsets = findOffsetsForTimestampGreaterOrEqual(kafkaConsumer, partitionBeginTimestamps);
126131
partitionBeginOffsets = overridePartitionBeginOffsets(partitionBeginOffsets,
127-
partition -> findOffsetsForTimestampGreaterOrEqual(kafkaConsumer, partition, offsetTimestampRanged.get().begin()));
132+
partition -> Optional.ofNullable(beginOffsets.get(partition)));
128133
}
129134
if (isTimestampUpperBoundPushdownEnabled(session, kafkaTableHandle.topicName())) {
130135
if (offsetTimestampRanged.get().end() > INVALID_KAFKA_RANGE_INDEX) {
136+
Map<TopicPartition, Long> partitionEndTimestamps = new HashMap<>();
137+
partitionEndOffsets.forEach((partition, partitionIndex) -> {
138+
partitionEndTimestamps.put(partition, offsetTimestampRanged.get().end());
139+
});
140+
Map<TopicPartition, Long> endOffsets = findOffsetsForTimestampGreaterOrEqual(kafkaConsumer, partitionEndTimestamps);
131141
partitionEndOffsets = overridePartitionEndOffsets(partitionEndOffsets,
132-
partition -> findOffsetsForTimestampGreaterOrEqual(kafkaConsumer, partition, offsetTimestampRanged.get().end()));
142+
partition -> Optional.ofNullable(endOffsets.get(partition)));
133143
}
134144
}
135145
}
@@ -172,11 +182,17 @@ private boolean isTimestampUpperBoundPushdownEnabled(ConnectorSession session, S
172182
return KafkaSessionProperties.isTimestampUpperBoundPushdownEnabled(session);
173183
}
174184

175-
private static Optional<Long> findOffsetsForTimestampGreaterOrEqual(KafkaConsumer<byte[], byte[]> kafkaConsumer, TopicPartition topicPartition, long timestamp)
185+
private static Map<TopicPartition, Long> findOffsetsForTimestampGreaterOrEqual(KafkaConsumer<byte[], byte[]> kafkaConsumer, Map<TopicPartition, Long> timestamps)
176186
{
177-
final long transferTimestamp = floorDiv(timestamp, MICROSECONDS_PER_MILLISECOND);
178-
Map<TopicPartition, OffsetAndTimestamp> topicPartitionOffsets = kafkaConsumer.offsetsForTimes(ImmutableMap.of(topicPartition, transferTimestamp));
179-
return Optional.ofNullable(getOnlyElement(topicPartitionOffsets.values(), null)).map(OffsetAndTimestamp::offset);
187+
timestamps.replaceAll((k, v) -> floorDiv(v, MICROSECONDS_PER_MILLISECOND));
188+
Map<TopicPartition, OffsetAndTimestamp> topicPartitionOffsetAndTimestamps = kafkaConsumer.offsetsForTimes(timestamps);
189+
Map<TopicPartition, Long> topicPartitionOffsets = new HashMap<>();
190+
topicPartitionOffsetAndTimestamps.forEach((topicPartition, offsetAndTimestamp) -> {
191+
if (offsetAndTimestamp != null) {
192+
topicPartitionOffsets.put(topicPartition, offsetAndTimestamp.offset());
193+
}
194+
});
195+
return topicPartitionOffsets;
180196
}
181197

182198
private static Map<TopicPartition, Long> overridePartitionBeginOffsets(Map<TopicPartition, Long> partitionBeginOffsets,

0 commit comments

Comments
 (0)