Skip to content

Commit 9f1259a

Browse files
committed
update
1 parent 49d9ace commit 9f1259a

File tree

3 files changed

+82
-37
lines changed

3 files changed

+82
-37
lines changed

parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/ExampleParquetWriter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ public static Builder builder(OutputFile file) {
9797

9898
public static class Builder extends ParquetWriter.Builder<Group, Builder> {
9999
private MessageType type = null;
100-
private boolean strictUnsignedIntegerValidation = false;
100+
private boolean strictUnsignedIntegerValidation = true;
101101

102102
private Builder(Path file) {
103103
super(file);

parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/ValidatingUnsignedIntegerRecordConsumer.java

Lines changed: 38 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.apache.parquet.hadoop.example;
2020

21+
import java.util.Optional;
2122
import java.util.Stack;
2223
import org.apache.parquet.io.InvalidRecordException;
2324
import org.apache.parquet.io.api.RecordConsumer;
@@ -128,50 +129,54 @@ private void validateUnsignedInteger(int value) {
128129
Type currentType = getCurrentFieldType();
129130
if (currentType != null && currentType.isPrimitive()) {
130131
LogicalTypeAnnotation logicalType = currentType.asPrimitiveType().getLogicalTypeAnnotation();
131-
if (logicalType instanceof LogicalTypeAnnotation.IntLogicalTypeAnnotation) {
132-
LogicalTypeAnnotation.IntLogicalTypeAnnotation intType =
133-
(LogicalTypeAnnotation.IntLogicalTypeAnnotation) logicalType;
134-
if (!intType.isSigned()) {
135-
switch (intType.getBitWidth()) {
136-
case 8:
137-
if (value < 0 || value > 255) {
138-
throw new InvalidRecordException("Value " + value
139-
+ " is out of range for UINT_8 (0-255) in field " + currentType.getName());
140-
}
141-
break;
142-
case 16:
143-
if (value < 0 || value > 65535) {
144-
throw new InvalidRecordException("Value " + value
145-
+ " is out of range for UINT_16 (0-65535) in field " + currentType.getName());
146-
}
147-
break;
148-
case 32:
149-
case 64:
150-
if (value < 0) {
151-
throw new InvalidRecordException("Negative value " + value
152-
+ " is not allowed for unsigned integer type " + currentType.getName());
153-
}
154-
break;
132+
logicalType.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Void>() {
133+
@Override
134+
public Optional<Void> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intType) {
135+
if (!intType.isSigned()) {
136+
switch (intType.getBitWidth()) {
137+
case 8:
138+
if (value < 0 || value > 255) {
139+
throw new InvalidRecordException("Value " + value
140+
+ " is out of range for UINT_8 (0-255) in field " + currentType.getName());
141+
}
142+
break;
143+
case 16:
144+
if (value < 0 || value > 65535) {
145+
throw new InvalidRecordException("Value " + value
146+
+ " is out of range for UINT_16 (0-65535) in field " + currentType.getName());
147+
}
148+
break;
149+
case 32:
150+
case 64:
151+
if (value < 0) {
152+
throw new InvalidRecordException("Negative value " + value
153+
+ " is not allowed for unsigned integer type " + currentType.getName());
154+
}
155+
break;
156+
}
155157
}
158+
return Optional.empty();
156159
}
157-
}
160+
});
158161
}
159162
}
160163

161164
private void validateUnsignedLong(long value) {
162165
Type currentType = getCurrentFieldType();
163166
if (currentType != null && currentType.isPrimitive()) {
164167
LogicalTypeAnnotation logicalType = currentType.asPrimitiveType().getLogicalTypeAnnotation();
165-
if (logicalType instanceof LogicalTypeAnnotation.IntLogicalTypeAnnotation) {
166-
LogicalTypeAnnotation.IntLogicalTypeAnnotation intType =
167-
(LogicalTypeAnnotation.IntLogicalTypeAnnotation) logicalType;
168-
if (!intType.isSigned()) {
169-
if (value < 0) {
170-
throw new InvalidRecordException("Negative value " + value
171-
+ " is not allowed for unsigned integer type " + currentType.getName());
168+
logicalType.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Void>() {
169+
@Override
170+
public Optional<Void> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intType) {
171+
if (!intType.isSigned()) {
172+
if (value < 0) {
173+
throw new InvalidRecordException("Negative value " + value
174+
+ " is not allowed for unsigned integer type " + currentType.getName());
175+
}
172176
}
177+
return Optional.empty();
173178
}
174-
}
179+
});
175180
}
176181
}
177182

parquet-hadoop/src/test/java/org/apache/parquet/hadoop/example/TestStrictUnsignedIntegerValidation.java

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,44 @@ public void testValidUnsignedIntegerValues() throws IOException {
9191
}
9292
}
9393

94+
@Test
95+
public void testMaximumUnsignedIntegerValues() throws IOException {
96+
MessageType schema = Types.buildMessage()
97+
.required(INT32)
98+
.as(intType(8, false))
99+
.named("uint8_field")
100+
.required(INT32)
101+
.as(intType(16, false))
102+
.named("uint16_field")
103+
.required(INT32)
104+
.as(intType(32, false))
105+
.named("uint32_field")
106+
.required(INT64)
107+
.as(intType(64, false))
108+
.named("uint64_field")
109+
.named("test_schema");
110+
111+
File tempFile = new File(tempFolder.getRoot(), "max_unsigned.parquet");
112+
Path outputPath = new Path(tempFile.getAbsolutePath());
113+
114+
try (ParquetWriter<Group> writer = ExampleParquetWriter.builder(outputPath)
115+
.withType(schema)
116+
.withStrictUnsignedIntegerValidation(true)
117+
.build()) {
118+
119+
SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);
120+
121+
Group maxGroup = groupFactory
122+
.newGroup()
123+
.append("uint8_field", 255)
124+
.append("uint16_field", 65535)
125+
.append("uint32_field", Integer.MAX_VALUE)
126+
.append("uint64_field", Long.MAX_VALUE);
127+
128+
writer.write(maxGroup);
129+
}
130+
}
131+
94132
@Test
95133
public void testInvalidUint8Values() throws IOException {
96134
MessageType schema = Types.buildMessage()
@@ -192,14 +230,14 @@ public void testInvalidUint64Values() throws IOException {
192230
}
193231

194232
@Test
195-
public void testValidationDisabledByDefault() throws IOException {
233+
public void testValidationEnabledByDefault() throws IOException {
196234
MessageType schema = Types.buildMessage()
197235
.required(INT32)
198236
.as(intType(8, false))
199237
.named("uint8_field")
200238
.named("test_schema");
201239

202-
File tempFile = new File(tempFolder.getRoot(), "validation_disabled.parquet");
240+
File tempFile = new File(tempFolder.getRoot(), "validation_enabled.parquet");
203241
Path outputPath = new Path(tempFile.getAbsolutePath());
204242

205243
try (ParquetWriter<Group> writer =
@@ -208,7 +246,9 @@ public void testValidationDisabledByDefault() throws IOException {
208246
SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);
209247

210248
Group invalidGroup = groupFactory.newGroup().append("uint8_field", -5);
211-
writer.write(invalidGroup);
249+
assertThrows(InvalidRecordException.class, () -> {
250+
writer.write(invalidGroup);
251+
});
212252
}
213253
}
214254

0 commit comments

Comments
 (0)