Skip to content

Commit f82e479

Browse files
committed
fix: Use Guava.Iterables.indexOf to get column index from position in fields list
1 parent 1083c7e commit f82e479

File tree

4 files changed

+40
-39
lines changed

4 files changed

+40
-39
lines changed

glint/src/main/java/co/clflushopt/glint/datasource/CsvDataSource.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,13 @@ private Schema inferSchema() throws FileNotFoundException {
7777
parser.stopParsing();
7878

7979
if (hasHeaders) {
80-
return new Schema(Streams.mapWithIndex(List.of(headers).stream(), (columnName,
81-
columnIndex) -> new Field(columnName, (int) columnIndex, ArrowTypes.StringType))
80+
return new Schema(Streams.mapWithIndex(List.of(headers).stream(),
81+
(columnName, columnIndex) -> new Field(columnName, ArrowTypes.StringType))
8282
.toList());
8383

8484
} else {
85-
return new Schema(Streams.mapWithIndex(List.of(headers).stream(),
86-
(_field, index) -> new Field(String.format("field_%d", index), (int) index,
87-
ArrowTypes.StringType))
85+
return new Schema(Streams.mapWithIndex(List.of(headers).stream(), (_field,
86+
index) -> new Field(String.format("field_%d", index), ArrowTypes.StringType))
8887
.toList());
8988
}
9089

glint/src/main/java/co/clflushopt/glint/types/Field.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Field is a metadata type used to qualify columns in the columnar format with
99
* a name and data type.
1010
*/
11-
public record Field(String name, int index, ArrowType dataType) {
11+
public record Field(String name, ArrowType dataType) {
1212

1313
/**
1414
* Transform an internal `Field` type to Arrow Field type.

glint/src/test/java/co/clflushopt/glint/datasource/CsvDataSourceTest.java

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
import org.junit.Test;
1414

15+
import com.google.common.collect.Iterables;
16+
1517
import co.clflushopt.glint.types.ArrowTypes;
1618
import co.clflushopt.glint.types.Field;
1719
import co.clflushopt.glint.types.RecordBatch;
@@ -22,12 +24,12 @@ public class CsvDataSourceTest {
2224
@Test
2325
public void canProcessSimpleCsvFilesWithoutHeaders() {
2426
String filename = "./testdata/employee_no_header.csv";
25-
List<Field> expectedFields = List.of(new Field("field_0", 0, ArrowTypes.StringType),
26-
new Field("field_1", 1, ArrowTypes.StringType),
27-
new Field("field_2", 2, ArrowTypes.StringType),
28-
new Field("field_3", 3, ArrowTypes.StringType),
29-
new Field("field_4", 4, ArrowTypes.StringType),
30-
new Field("field_5", 5, ArrowTypes.StringType));
27+
List<Field> expectedFields = List.of(new Field("field_0", ArrowTypes.StringType),
28+
new Field("field_1", ArrowTypes.StringType), // Changed from Int64Type
29+
new Field("field_2", ArrowTypes.StringType),
30+
new Field("field_3", ArrowTypes.StringType),
31+
new Field("field_4", ArrowTypes.StringType),
32+
new Field("field_5", ArrowTypes.StringType));
3133
Schema expectedSchema = new Schema(expectedFields);
3234

3335
// Try building the schema from the datasource.
@@ -49,12 +51,12 @@ public void canProcessSimpleCsvFilesWithoutHeaders() {
4951
@Test
5052
public void canInferColumnNamesFromHeaders() {
5153
String filename = "./testdata/employee.csv";
52-
List<Field> expectedFields = List.of(new Field("id", 0, ArrowTypes.StringType),
53-
new Field("first_name", 1, ArrowTypes.StringType),
54-
new Field("last_name", 2, ArrowTypes.StringType),
55-
new Field("state", 3, ArrowTypes.StringType),
56-
new Field("job_title", 4, ArrowTypes.StringType),
57-
new Field("salary", 5, ArrowTypes.StringType));
54+
List<Field> expectedFields = List.of(new Field("id", ArrowTypes.StringType),
55+
new Field("first_name", ArrowTypes.StringType), // Changed from Int64Type
56+
new Field("last_name", ArrowTypes.StringType),
57+
new Field("state", ArrowTypes.StringType),
58+
new Field("job_title", ArrowTypes.StringType),
59+
new Field("salary", ArrowTypes.StringType));
5860
Schema expectedSchema = new Schema(expectedFields);
5961

6062
// Try building the schema from the datasource.
@@ -76,12 +78,12 @@ public void canInferColumnNamesFromHeaders() {
7678
@Test
7779
public void canBuildDatasourceFromSchema() {
7880
String filename = "./testdata/employee.csv";
79-
List<Field> expectedFields = List.of(new Field("id", 0, ArrowTypes.Int64Type),
80-
new Field("first_name", 1, ArrowTypes.Int64Type),
81-
new Field("last_name", 2, ArrowTypes.StringType),
82-
new Field("state", 3, ArrowTypes.StringType),
83-
new Field("job_title", 4, ArrowTypes.StringType),
84-
new Field("salary", 5, ArrowTypes.Int64Type));
81+
List<Field> expectedFields = List.of(new Field("id", ArrowTypes.StringType),
82+
new Field("first_name", ArrowTypes.StringType), // Changed from Int64Type
83+
new Field("last_name", ArrowTypes.StringType),
84+
new Field("state", ArrowTypes.StringType),
85+
new Field("job_title", ArrowTypes.StringType),
86+
new Field("salary", ArrowTypes.StringType));
8587
Schema expectedSchema = new Schema(expectedFields);
8688

8789
// Try building the schema from the datasource.
@@ -103,12 +105,12 @@ public void canBuildDatasourceFromSchema() {
103105
@Test
104106
public void canProcessSimpleCSvFileWithHeaderAndSchema() {
105107
String filename = "./testdata/employee.csv";
106-
List<Field> expectedFields = List.of(new Field("id", 0, ArrowTypes.Int64Type),
107-
new Field("first_name", 1, ArrowTypes.StringType), // Changed from Int64Type
108-
new Field("last_name", 2, ArrowTypes.StringType),
109-
new Field("state", 3, ArrowTypes.StringType),
110-
new Field("job_title", 4, ArrowTypes.StringType),
111-
new Field("salary", 5, ArrowTypes.Int64Type));
108+
List<Field> expectedFields = List.of(new Field("id", ArrowTypes.Int64Type),
109+
new Field("first_name", ArrowTypes.StringType), // Changed from Int64Type
110+
new Field("last_name", ArrowTypes.StringType),
111+
new Field("state", ArrowTypes.StringType),
112+
new Field("job_title", ArrowTypes.StringType),
113+
new Field("salary", ArrowTypes.Int64Type));
112114
Schema expectedSchema = new Schema(expectedFields);
113115
Optional<Schema> schema = Optional.of(expectedSchema);
114116

@@ -176,12 +178,12 @@ public void canProcessSimpleCSvFileWithHeaderAndSchema() {
176178

177179
// Helper methods to get values from the batch
178180
private String getStringValue(RecordBatch batch, String columnName, int rowIndex) {
179-
var columnIndex = batch.getSchema().select(List.of(columnName)).getFields().get(0).index();
180-
return (String) batch.getField(columnIndex).getValue(rowIndex);
181+
var index = Iterables.indexOf(batch.getSchema().getFields(), f -> f.name() == columnName);
182+
return (String) batch.getField(index).getValue(rowIndex);
181183
}
182184

183185
private long getLongValue(RecordBatch batch, String columnName, int rowIndex) {
184-
var columnIndex = batch.getSchema().select(List.of(columnName)).getFields().get(0).index();
185-
return (Long) batch.getField(columnIndex).getValue(rowIndex);
186+
var index = Iterables.indexOf(batch.getSchema().getFields(), f -> f.name() == columnName);
187+
return (Long) batch.getField(index).getValue(rowIndex);
186188
}
187189
}

glint/src/test/java/co/clflushopt/glint/types/SchemaTest.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ public class SchemaTest {
1010

1111
@Test
1212
public void shouldNotMutateFields() {
13-
var fields = List.of(new Field("id", 0, ArrowTypes.Int64Type),
14-
new Field("name", 1, ArrowTypes.StringType),
15-
new Field("department", 2, ArrowTypes.StringType),
16-
new Field("on_vacation", 3, ArrowTypes.BooleanType),
17-
new Field("salary", 4, ArrowTypes.Int64Type));
13+
var fields = List.of(new Field("id", ArrowTypes.Int64Type),
14+
new Field("name", ArrowTypes.StringType),
15+
new Field("department", ArrowTypes.StringType),
16+
new Field("on_vacation", ArrowTypes.BooleanType),
17+
new Field("salary", ArrowTypes.Int64Type));
1818

1919
var schema = new Schema(fields);
2020

0 commit comments

Comments
 (0)