@@ -125,94 +125,4 @@ private void close() {
125
125
}
126
126
}
127
127
}
128
- }
129
- /*
130
- * public class ParquetDataSource implements DataSource { private final String
131
- * filename;
132
- *
133
- * public ParquetDataSource(String filename) { this.filename = filename; }
134
- *
135
- * @Override public Schema getSchema() { try (ParquetScan scan = new
136
- * ParquetScan(filename, Collections.emptyList())) {
137
- * org.apache.arrow.vector.types.pojo.Schema arrowSchema = SchemaConverter
138
- * .fromParquet(scan.getSchema()).toArrow(); return
139
- * SchemaConverter.fromArrow(arrowSchema); } catch (IOException e) { throw new
140
- * RuntimeException("Failed to read schema from Parquet file", e); } }
141
- *
142
- * @SuppressWarnings("resource")
143
- *
144
- * @Override public Iterable<RecordBatch> scan(List<String> projection) { //
145
- * Return an Iterable that creates a new ParquetScan each time iterator() is //
146
- * called return () -> { try { return new ParquetScan(filename,
147
- * projection).iterator(); } catch (IOException e) { throw new
148
- * RuntimeException("Failed to create ParquetScan", e); } }; } }
149
- *
150
- * class ParquetScan implements AutoCloseable { private final ParquetFileReader
151
- * reader; private final List<String> columns; private final
152
- * org.apache.parquet.schema.MessageType schema;
153
- *
154
- * public ParquetScan(String filename, List<String> columns) throws IOException
155
- * { this.columns = columns; this.reader = ParquetFileReader
156
- * .open(HadoopInputFile.fromPath(new Path(filename), new Configuration()));
157
- * this.schema = reader.getFooter().getFileMetaData().getSchema(); }
158
- *
159
- * public Iterator<RecordBatch> iterator() { return new ParquetIterator(reader,
160
- * columns); }
161
- *
162
- * @Override public void close() throws IOException { reader.close(); }
163
- *
164
- * public org.apache.parquet.schema.MessageType getSchema() { return schema; } }
165
- *
166
- * class ParquetIterator implements Iterator<RecordBatch> { private final
167
- * ParquetFileReader reader; private final List<String> projectedColumns;
168
- * private final org.apache.parquet.schema.MessageType schema; private final
169
- * org.apache.arrow.vector.types.pojo.Schema arrowSchema; private final
170
- * org.apache.arrow.vector.types.pojo.Schema projectedArrowSchema; private
171
- * RecordBatch batch;
172
- *
173
- * public ParquetIterator(ParquetFileReader reader, List<String>
174
- * projectedColumns) { this.reader = reader; this.projectedColumns =
175
- * projectedColumns; this.schema =
176
- * reader.getFooter().getFileMetaData().getSchema(); this.arrowSchema =
177
- * SchemaConverter.fromParquet(schema).toArrow();
178
- *
179
- * if (projectedColumns.isEmpty()) { // Project all columns
180
- * this.projectedArrowSchema = arrowSchema; } else { // Create projected schema
181
- * List<org.apache.arrow.vector.types.pojo.Field> projectedFields =
182
- * projectedColumns .stream().map( name -> arrowSchema.getFields().stream()
183
- * .filter(f -> f.getName().equals(name)).findFirst() .orElseThrow(() -> new
184
- * IllegalArgumentException( "Column not found: " + name)))
185
- * .collect(Collectors.toList());
186
- *
187
- * this.projectedArrowSchema = new org.apache.arrow.vector.types.pojo.Schema(
188
- * projectedFields); }
189
- *
190
- * }
191
- *
192
- * @Override public boolean hasNext() { batch = nextBatch(); return batch !=
193
- * null; }
194
- *
195
- * @Override public RecordBatch next() { if (batch == null) { throw new
196
- * NoSuchElementException(); } RecordBatch result = batch;
197
- * System.out.println("ParquetIterator.next: " + result.toCsv()); batch = null;
198
- * return result; }
199
- *
200
- * private RecordBatch nextBatch() { try (PageReadStore pages =
201
- * reader.readNextRowGroup()) { if (pages == null) {
202
- * System.out.println("No more pages"); return null; }
203
- *
204
- * if (pages.getRowCount() > Integer.MAX_VALUE) { throw new
205
- * IllegalStateException("Row count exceeds maximum integer value"); }
206
- *
207
- * int rows = (int) pages.getRowCount();
208
- *
209
- * VectorSchemaRoot root = VectorSchemaRoot.create(projectedArrowSchema, new
210
- * RootAllocator(Long.MAX_VALUE)); root.allocateNew(); root.setRowCount(rows);
211
- *
212
- * Schema convertedSchema = SchemaConverter.fromArrow(projectedArrowSchema);
213
- *
214
- * return new RecordBatch(convertedSchema, root.getFieldVectors().stream()
215
- * .map(ArrowFieldVector::new).collect(Collectors.toList())); } catch
216
- * (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }
217
- * return batch; } }
218
- */
128
+ }
0 commit comments