Skip to content

Commit 71fe322

Browse files
authored
Update to Flink 2.1 (#229)
1 parent 00b3f37 commit 71fe322

File tree

21 files changed

+203
-193
lines changed

21 files changed

+203
-193
lines changed

.github/workflows/delete-pr-images.yml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,14 @@ on:
77
jobs:
88
delete-ghcr:
99
runs-on: ubuntu-latest
10-
11-
strategy:
12-
matrix:
13-
FLINK_PROFILE: [flink-1.19, flink-1.20]
14-
1510
steps:
1611
- name: Delete GHCR image via GitHub REST API
1712
env:
1813
GH_USERNAME: ${{ secrets.PAT_USERNAME }}
1914
GH_PASSWORD: ${{ secrets.PAT_PASSWORD }}
2015
ORG: DataSQRL
2116
PACKAGE: flink-sql-runner
22-
TAG: pr-${{ github.event.number }}-${{ matrix.FLINK_PROFILE }}
17+
TAG: pr-${{ github.event.number }}
2318
run: |
2419
set -euxo pipefail
2520

.github/workflows/deploy.yml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ jobs:
5555
5656
- name: Download dependencies
5757
run: |
58-
mvn -B org.apache.maven.plugins:maven-dependency-plugin:3.8.1:go-offline de.qaware.maven:go-offline-maven-plugin:1.2.8:resolve-dependencies -Pci,flink-1.19
59-
mvn -B -f flink-sql-runner/pom.xml org.apache.maven.plugins:maven-resources-plugin:3.3.1:resources -Pci,flink-1.19
58+
mvn -B org.apache.maven.plugins:maven-dependency-plugin:3.8.1:go-offline de.qaware.maven:go-offline-maven-plugin:1.2.8:resolve-dependencies -Pci
59+
mvn -B -f flink-sql-runner/pom.xml org.apache.maven.plugins:maven-resources-plugin:3.3.1:resources -Pci
6060
6161
- name: Get project version and store in env
6262
run: |
@@ -74,10 +74,9 @@ jobs:
7474
- name: Run Maven Build
7575
run: |
7676
if [[ "${{ github.event_name }}" == "release" ]]; then
77-
mvn -B clean deploy -Pci,release,flink-1.19
77+
mvn -B clean deploy -Pci,release
7878
elif [[ "${{ github.ref_name }}" == "main" ]]; then
79-
mvn -B clean deploy -Pci,flink-1.19
79+
mvn -B clean deploy -Pci
8080
else
81-
mvn -B clean install -Pci,flink-1.19 -Dgpg.skip=true
81+
mvn -B clean install -Pci -Dgpg.skip=true
8282
fi
83-

.github/workflows/uber-jar.yml

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,6 @@ jobs:
2020
contents: write
2121
packages: write
2222

23-
strategy:
24-
matrix:
25-
FLINK_PROFILE: [flink-1.19]
26-
2723
steps:
2824
- name: Checkout code
2925
uses: actions/checkout@v4
@@ -55,36 +51,37 @@ jobs:
5551
5652
- name: Download dependencies
5753
run: |
58-
mvn -B org.apache.maven.plugins:maven-dependency-plugin:3.8.1:go-offline de.qaware.maven:go-offline-maven-plugin:1.2.8:resolve-dependencies -P${{ matrix.FLINK_PROFILE }}
59-
mvn -B -f flink-sql-runner/pom.xml org.apache.maven.plugins:maven-resources-plugin:3.3.1:resources -P${{ matrix.FLINK_PROFILE }}
54+
mvn -B org.apache.maven.plugins:maven-dependency-plugin:3.8.1:go-offline de.qaware.maven:go-offline-maven-plugin:1.2.8:resolve-dependencies
55+
mvn -B -f flink-sql-runner/pom.xml org.apache.maven.plugins:maven-resources-plugin:3.3.1:resources
6056
grep '^FROM' flink-sql-runner/target/Dockerfile | awk '{print $2}' | xargs -n1 docker pull
6157
62-
- name: Get project version and store in env
58+
- name: Get project version and Flink version
6359
run: |
6460
if [[ "${{ github.event_name }}" == "release" && "${{ github.event.action }}" == "created" ]]; then
65-
echo "VERSION=${{ github.ref_name }}" >> $GITHUB_ENV
61+
echo "PROJECT_VERSION=${{ github.ref_name }}" >> $GITHUB_ENV
6662
else
67-
echo "VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)" >> $GITHUB_ENV
63+
echo "PROJECT_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)" >> $GITHUB_ENV
6864
fi
65+
echo "FLINK_VERSION_LABEL=$(mvn help:evaluate -Dexpression=flink.version.label -q -DforceStdout)" >> $GITHUB_ENV
6966
7067
- name: Update version
7168
if: github.event_name == 'release' && github.event.action == 'created'
7269
run: |
73-
mvn -B versions:set -DnewVersion=${{ env.VERSION }}
70+
mvn -B versions:set -DnewVersion=${{ env.PROJECT_VERSION }}
7471
7572
- name: Run Maven Build
7673
run: |
77-
mvn -B clean install -P${{ matrix.FLINK_PROFILE }}
74+
mvn -B clean install
7875
7976
- name: Define Docker image tag
8077
id: vars
8178
run: |
8279
if [[ "${{ github.event_name }}" == "release" && "${{ github.event.action }}" == "created" ]]; then
83-
echo "image_tag=datasqrl/flink-sql-runner:${{ env.VERSION }}-${{ matrix.FLINK_PROFILE }}" >> $GITHUB_OUTPUT
80+
echo "image_tag=datasqrl/flink-sql-runner:${{ env.PROJECT_VERSION }}-${{ env.FLINK_VERSION_LABEL }}" >> $GITHUB_OUTPUT
8481
elif [[ "${{ github.event_name }}" == "push" ]]; then
85-
echo "image_tag=ghcr.io/datasqrl/flink-sql-runner:${{ env.VERSION }}-${{ matrix.FLINK_PROFILE }}" >> $GITHUB_OUTPUT
82+
echo "image_tag=ghcr.io/datasqrl/flink-sql-runner:${{ env.PROJECT_VERSION }}" >> $GITHUB_OUTPUT
8683
else
87-
echo "image_tag=ghcr.io/datasqrl/flink-sql-runner:pr-${{ github.event.number }}-${{ matrix.FLINK_PROFILE }}" >> $GITHUB_OUTPUT
84+
echo "image_tag=ghcr.io/datasqrl/flink-sql-runner:pr-${{ github.event.number }}" >> $GITHUB_OUTPUT
8885
fi
8986
9087
- name: Log in to container registry
@@ -106,12 +103,12 @@ jobs:
106103
- name: Rename jar file
107104
if: github.event_name == 'release' && github.event.action == 'created'
108105
run: |
109-
mv flink-sql-runner/target/flink-sql-runner.uber.jar flink-sql-runner/target/flink-sql-runner-${{ env.VERSION }}-${{ matrix.FLINK_PROFILE }}.jar
106+
mv flink-sql-runner/target/flink-sql-runner.uber.jar flink-sql-runner/target/flink-sql-runner-${{ env.PROJECT_VERSION }}-${{ env.FLINK_VERSION_LABEL }}.jar
110107
111108
- name: Upload to GitHub Release
112109
if: github.event_name == 'release' && github.event.action == 'created'
113110
uses: softprops/action-gh-release@v1
114111
with:
115-
files: flink-sql-runner/target/flink-sql-runner-${{ env.VERSION }}-${{ matrix.FLINK_PROFILE }}.jar
112+
files: flink-sql-runner/target/flink-sql-runner-${{ env.PROJECT_VERSION }}-${{ env.FLINK_VERSION_LABEL }}.jar
116113
env:
117114
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

connectors/kafka-safe-connector/src/main/java/com/datasqrl/flinkrunner/connector/kafka/DeserFailureHandler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public static DeserFailureHandler of(ReadableConfig tableOptions, Properties con
5353
}
5454

5555
public void deserWithFailureHandling(
56-
ConsumerRecord<byte[], byte[]> record, DeserializationCaller deser) throws Exception {
56+
ConsumerRecord<byte[], byte[]> record, DeserializationCaller deser) throws IOException {
5757

5858
try {
5959
deser.call();
@@ -82,6 +82,6 @@ public void deserWithFailureHandling(
8282
}
8383

8484
public interface DeserializationCaller extends Serializable {
85-
void call() throws Exception;
85+
void call() throws IOException;
8686
}
8787
}

connectors/kafka-safe-connector/src/main/java/org/apache/flink/streaming/connectors/kafka/table/SafeDynamicKafkaDeserializationSchema.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,17 @@
1717

1818
import org.apache.flink.api.common.serialization.DeserializationSchema;
1919
import org.apache.flink.api.common.typeinfo.TypeInformation;
20-
import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema;
20+
import org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema;
2121
import org.apache.flink.table.data.RowData;
2222
import org.apache.flink.util.Collector;
2323

2424
import com.datasqrl.flinkrunner.connector.kafka.DeserFailureHandler;
2525
import org.apache.kafka.clients.consumer.ConsumerRecord;
2626

2727
import javax.annotation.Nullable;
28+
import java.io.IOException;
2829

29-
/** A specific {@link KafkaSerializationSchema} for {@link SafeKafkaDynamicSource}. */
30+
/** A specific {@link KafkaRecordDeserializationSchema} for {@link SafeKafkaDynamicSource}. */
3031
public class SafeDynamicKafkaDeserializationSchema extends DynamicKafkaDeserializationSchema {
3132

3233
private final DeserFailureHandler deserFailureHandler;
@@ -57,7 +58,7 @@ public class SafeDynamicKafkaDeserializationSchema extends DynamicKafkaDeseriali
5758

5859
@Override
5960
public void deserialize(ConsumerRecord<byte[], byte[]> record, Collector<RowData> collector)
60-
throws Exception {
61+
throws IOException {
6162
deserFailureHandler.deserWithFailureHandling(
6263
record, () -> super.deserialize(record, collector));
6364
}

connectors/kafka-safe-connector/src/main/java/org/apache/flink/streaming/connectors/kafka/table/SafeKafkaDynamicSource.java

Lines changed: 51 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,8 @@
2828
import org.apache.flink.streaming.api.datastream.DataStream;
2929
import org.apache.flink.streaming.api.datastream.DataStreamSource;
3030
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
31-
import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema;
3231
import org.apache.flink.streaming.connectors.kafka.config.BoundedMode;
3332
import org.apache.flink.streaming.connectors.kafka.config.StartupMode;
34-
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;
3533
import org.apache.flink.streaming.connectors.kafka.table.DynamicKafkaDeserializationSchema.MetadataConverter;
3634
import org.apache.flink.table.api.DataTypes;
3735
import org.apache.flink.table.connector.ChangelogMode;
@@ -48,7 +46,10 @@
4846
import org.apache.flink.table.data.StringData;
4947
import org.apache.flink.table.data.TimestampData;
5048
import org.apache.flink.table.types.DataType;
51-
import org.apache.flink.table.types.utils.DataTypeUtils;
49+
import org.apache.flink.table.types.FieldsDataType;
50+
import org.apache.flink.table.types.logical.LogicalType;
51+
import org.apache.flink.table.types.logical.RowType;
52+
import org.apache.flink.table.types.logical.utils.LogicalTypeUtils;
5253
import org.apache.flink.util.Preconditions;
5354

5455
import com.datasqrl.flinkrunner.connector.kafka.DeserFailureHandler;
@@ -69,12 +70,15 @@
6970
import java.util.Locale;
7071
import java.util.Map;
7172
import java.util.Objects;
73+
import java.util.Optional;
7274
import java.util.Properties;
7375
import java.util.regex.Pattern;
7476
import java.util.stream.Collectors;
7577
import java.util.stream.IntStream;
7678
import java.util.stream.Stream;
7779

80+
import static org.apache.flink.table.types.logical.LogicalTypeRoot.ROW;
81+
7882
/** A version-agnostic Kafka {@link ScanTableSource}. */
7983
@Internal
8084
public class SafeKafkaDynamicSource
@@ -141,7 +145,7 @@ public class SafeKafkaDynamicSource
141145
* Specific startup offsets; only relevant when startup mode is {@link
142146
* StartupMode#SPECIFIC_OFFSETS}.
143147
*/
144-
protected final Map<KafkaTopicPartition, Long> specificStartupOffsets;
148+
protected final Map<TopicPartition, Long> specificStartupOffsets;
145149

146150
/**
147151
* The start timestamp to locate partition offsets; only relevant when startup mode is {@link
@@ -156,7 +160,7 @@ public class SafeKafkaDynamicSource
156160
* Specific end offsets; only relevant when bounded mode is {@link
157161
* BoundedMode#SPECIFIC_OFFSETS}.
158162
*/
159-
protected final Map<KafkaTopicPartition, Long> specificBoundedOffsets;
163+
protected final Map<TopicPartition, Long> specificBoundedOffsets;
160164

161165
/**
162166
* The bounded timestamp to locate partition offsets; only relevant when bounded mode is {@link
@@ -169,6 +173,9 @@ public class SafeKafkaDynamicSource
169173

170174
protected final String tableIdentifier;
171175

176+
/** Parallelism of the physical Kafka consumer. * */
177+
protected final @Nullable Integer parallelism;
178+
172179
protected final DeserFailureHandler deserFailureHandler;
173180

174181
public SafeKafkaDynamicSource(
@@ -182,13 +189,14 @@ public SafeKafkaDynamicSource(
182189
@Nullable Pattern topicPattern,
183190
Properties properties,
184191
StartupMode startupMode,
185-
Map<KafkaTopicPartition, Long> specificStartupOffsets,
192+
Map<TopicPartition, Long> specificStartupOffsets,
186193
long startupTimestampMillis,
187194
BoundedMode boundedMode,
188-
Map<KafkaTopicPartition, Long> specificBoundedOffsets,
195+
Map<TopicPartition, Long> specificBoundedOffsets,
189196
long boundedTimestampMillis,
190197
boolean upsertMode,
191198
String tableIdentifier,
199+
@Nullable Integer parallelism,
192200
DeserFailureHandler deserFailureHandler) {
193201
// Format attributes
194202
this.physicalDataType =
@@ -229,6 +237,7 @@ public SafeKafkaDynamicSource(
229237
this.boundedTimestampMillis = boundedTimestampMillis;
230238
this.upsertMode = upsertMode;
231239
this.tableIdentifier = tableIdentifier;
240+
this.parallelism = parallelism;
232241
this.deserFailureHandler = deserFailureHandler;
233242
}
234243

@@ -269,6 +278,11 @@ public DataStream<RowData> produceDataStream(
269278
public boolean isBounded() {
270279
return kafkaSource.getBoundedness() == Boundedness.BOUNDED;
271280
}
281+
282+
@Override
283+
public Optional<Integer> getParallelism() {
284+
return Optional.ofNullable(parallelism);
285+
}
272286
};
273287
}
274288

@@ -347,6 +361,7 @@ public DynamicTableSource copy() {
347361
boundedTimestampMillis,
348362
upsertMode,
349363
tableIdentifier,
364+
parallelism,
350365
deserFailureHandler);
351366
copy.producedDataType = producedDataType;
352367
copy.metadataKeys = metadataKeys;
@@ -387,7 +402,8 @@ public boolean equals(Object o) {
387402
&& boundedTimestampMillis == that.boundedTimestampMillis
388403
&& Objects.equals(upsertMode, that.upsertMode)
389404
&& Objects.equals(tableIdentifier, that.tableIdentifier)
390-
&& Objects.equals(watermarkStrategy, that.watermarkStrategy);
405+
&& Objects.equals(watermarkStrategy, that.watermarkStrategy)
406+
&& Objects.equals(parallelism, that.parallelism);
391407
}
392408

393409
@Override
@@ -412,7 +428,8 @@ public int hashCode() {
412428
boundedTimestampMillis,
413429
upsertMode,
414430
tableIdentifier,
415-
watermarkStrategy);
431+
watermarkStrategy,
432+
parallelism);
416433
}
417434

418435
// --------------------------------------------------------------------------------------------
@@ -422,7 +439,7 @@ protected KafkaSource<RowData> createKafkaSource(
422439
DeserializationSchema<RowData> valueDeserialization,
423440
TypeInformation<RowData> producedTypeInfo) {
424441

425-
final KafkaDeserializationSchema<RowData> kafkaDeserializer =
442+
final KafkaRecordDeserializationSchema<RowData> kafkaDeserializer =
426443
createKafkaDeserializationSchema(
427444
keyDeserialization, valueDeserialization, producedTypeInfo);
428445

@@ -455,8 +472,7 @@ protected KafkaSource<RowData> createKafkaSource(
455472
specificStartupOffsets.forEach(
456473
(tp, offset) ->
457474
offsets.put(
458-
new TopicPartition(tp.getTopic(), tp.getPartition()),
459-
offset));
475+
new TopicPartition(tp.topic(), tp.partition()), offset));
460476
kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.offsets(offsets));
461477
break;
462478
case TIMESTAMP:
@@ -480,18 +496,15 @@ protected KafkaSource<RowData> createKafkaSource(
480496
specificBoundedOffsets.forEach(
481497
(tp, offset) ->
482498
offsets.put(
483-
new TopicPartition(tp.getTopic(), tp.getPartition()),
484-
offset));
499+
new TopicPartition(tp.topic(), tp.partition()), offset));
485500
kafkaSourceBuilder.setBounded(OffsetsInitializer.offsets(offsets));
486501
break;
487502
case TIMESTAMP:
488503
kafkaSourceBuilder.setBounded(OffsetsInitializer.timestamp(boundedTimestampMillis));
489504
break;
490505
}
491506

492-
kafkaSourceBuilder
493-
.setProperties(properties)
494-
.setDeserializer(KafkaRecordDeserializationSchema.of(kafkaDeserializer));
507+
kafkaSourceBuilder.setProperties(properties).setDeserializer(kafkaDeserializer);
495508

496509
return kafkaSourceBuilder.build();
497510
}
@@ -513,7 +526,7 @@ private OffsetResetStrategy getResetStrategy(String offsetResetConfig) {
513526
.collect(Collectors.joining(",")))));
514527
}
515528

516-
private KafkaDeserializationSchema<RowData> createKafkaDeserializationSchema(
529+
private KafkaRecordDeserializationSchema<RowData> createKafkaDeserializationSchema(
517530
DeserializationSchema<RowData> keyDeserialization,
518531
DeserializationSchema<RowData> valueDeserialization,
519532
TypeInformation<RowData> producedTypeInfo) {
@@ -567,11 +580,30 @@ private KafkaDeserializationSchema<RowData> createKafkaDeserializationSchema(
567580
}
568581
DataType physicalFormatDataType = Projection.of(projection).project(this.physicalDataType);
569582
if (prefix != null) {
570-
physicalFormatDataType = DataTypeUtils.stripRowPrefix(physicalFormatDataType, prefix);
583+
physicalFormatDataType = stripRowPrefix(physicalFormatDataType, prefix);
571584
}
572585
return format.createRuntimeDecoder(context, physicalFormatDataType);
573586
}
574587

588+
/** Removes a string prefix from the fields of the given row data type. */
589+
private static DataType stripRowPrefix(DataType dataType, String prefix) {
590+
Preconditions.checkArgument(dataType.getLogicalType().is(ROW), "Row data type expected.");
591+
final RowType rowType = (RowType) dataType.getLogicalType();
592+
final List<String> newFieldNames =
593+
rowType.getFieldNames().stream()
594+
.map(
595+
s -> {
596+
if (s.startsWith(prefix)) {
597+
return s.substring(prefix.length());
598+
}
599+
return s;
600+
})
601+
.collect(Collectors.toList());
602+
final LogicalType newRowType = LogicalTypeUtils.renameRowFields(rowType, newFieldNames);
603+
return new FieldsDataType(
604+
newRowType, dataType.getConversionClass(), dataType.getChildren());
605+
}
606+
575607
// --------------------------------------------------------------------------------------------
576608
// Metadata handling
577609
// --------------------------------------------------------------------------------------------

0 commit comments

Comments
 (0)