clflushopt
diff --git a/‎glint/src/main/java/co/clflushopt/glint/query/physical/expr/LiteralIntExpr.java‎
Lines changed: 25 additions & 0 deletions b/‎glint/src/main/java/co/clflushopt/glint/query/physical/expr/LiteralIntExpr.java‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎glint/src/main/java/co/clflushopt/glint/query/physical/plan/FilterOperator.java‎
Lines changed: 88 additions & 0 deletions b/‎glint/src/main/java/co/clflushopt/glint/query/physical/plan/FilterOperator.java‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎glint/src/main/java/co/clflushopt/glint/query/physical/plan/HashJoinOperator.java‎
Lines changed: 140 additions & 0 deletions b/‎glint/src/main/java/co/clflushopt/glint/query/physical/plan/HashJoinOperator.java‎
Lines changed: 140 additions & 0 deletions
diff --git a/‎glint/src/main/java/co/clflushopt/glint/query/physical/plan/PhysicalPlan.java‎
Lines changed: 2 additions & 1 deletion b/‎glint/src/main/java/co/clflushopt/glint/query/physical/plan/PhysicalPlan.java‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎glint/src/main/java/co/clflushopt/glint/query/physical/plan/ProjectionOperator.java‎
Lines changed: 17 additions & 10 deletions b/‎glint/src/main/java/co/clflushopt/glint/query/physical/plan/ProjectionOperator.java‎
Lines changed: 17 additions & 10 deletions
diff --git a/‎glint/src/main/java/co/clflushopt/glint/query/physical/plan/ScanOperator.java‎
Lines changed: 3 additions & 2 deletions b/‎glint/src/main/java/co/clflushopt/glint/query/physical/plan/ScanOperator.java‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎glint/src/test/java/co/clflushopt/glint/query/physical/FilterOperatorTest.java‎
Lines changed: 43 additions & 0 deletions b/‎glint/src/test/java/co/clflushopt/glint/query/physical/FilterOperatorTest.java‎
Lines changed: 43 additions & 0 deletions
@@ -0,0 +1,25 @@
+package co.clflushopt.glint.query.physical.expr;
+
+import co.clflushopt.glint.types.ArrowTypes;
+import co.clflushopt.glint.types.ColumnVector;
+import co.clflushopt.glint.types.LiteralValueVector;
+import co.clflushopt.glint.types.RecordBatch;
+
+public class LiteralIntExpr implements Expr {
+    private int value;
+
+    public LiteralIntExpr(int value) {
+        this.value = value;
+    }
+
+    @Override
+    public String toString() {
+        return Long.toString(value);
+    }
+
+    @Override
+    public ColumnVector eval(RecordBatch input) {
+        return new LiteralValueVector(ArrowTypes.Int32Type, value, input.getRowSize());
+    }
+
+}
@@ -0,0 +1,88 @@
+package co.clflushopt.glint.query.physical.plan;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+
+import co.clflushopt.glint.query.physical.expr.Expr;
+import co.clflushopt.glint.types.ArrowFieldVector;
+import co.clflushopt.glint.types.ArrowVectorBuilder;
+import co.clflushopt.glint.types.ColumnVector;
+import co.clflushopt.glint.types.RecordBatch;
+import co.clflushopt.glint.types.Schema;
+
+public class FilterOperator implements PhysicalPlan {
+    private final PhysicalPlan input;
+    private final Expr expr; // Assuming you have an Expression interface
+
+    public FilterOperator(PhysicalPlan input, Expr expr) {
+        this.input = input;
+        this.expr = expr;
+    }
+
+    @Override
+    public Iterator<RecordBatch> execute() {
+        // Convert Sequence to Iterator/Iterable
+        Iterator<RecordBatch> inputIterator = input.execute();
+
+        // Return a new Iterator that applies the filter
+        return new Iterator<RecordBatch>() {
+            @Override
+            public boolean hasNext() {
+                return inputIterator.hasNext();
+            }
+
+            @Override
+            public RecordBatch next() {
+                RecordBatch batch = inputIterator.next();
+                BitVector result = (BitVector) ((ArrowFieldVector) expr.eval(batch)).getField();
+                Schema schema = batch.getSchema();
+                int columnCount = schema.getFields().size();
+
+                // Filter each field
+                List<FieldVector> filteredFields = IntStream.range(0, columnCount)
+                        .mapToObj(i -> filter(batch.getField(i), result))
+                        .collect(Collectors.toList());
+
+                // Convert to ArrowFieldVectors
+                List<ColumnVector> fields = filteredFields.stream().map(ArrowFieldVector::new)
+                        .collect(Collectors.toList());
+
+                return new RecordBatch(schema, fields);
+            }
+        };
+    }
+
+    private FieldVector filter(ColumnVector v, BitVector selection) {
+        VarCharVector filteredVector = new VarCharVector("v", new RootAllocator(Long.MAX_VALUE));
+        filteredVector.allocateNew();
+
+        ArrowVectorBuilder builder = new ArrowVectorBuilder(filteredVector);
+
+        int count = 0;
+        for (int i = 0; i < selection.getValueCount(); i++) {
+            if (selection.get(i) == 1) {
+                builder.setValue(count, v.getValue(i));
+                count++;
+            }
+        }
+        filteredVector.setValueCount(count);
+        return filteredVector;
+    }
+
+    @Override
+    public Schema getSchema() {
+        return input.getSchema();
+    }
+
+    @Override
+    public List<PhysicalPlan> getChildren() {
+        return List.of(input);
+    }
+}
@@ -0,0 +1,140 @@
+package co.clflushopt.glint.query.physical.plan;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+
+import co.clflushopt.glint.query.functional.Accumulator;
+import co.clflushopt.glint.query.physical.expr.AggregateExpr;
+import co.clflushopt.glint.query.physical.expr.Expr;
+import co.clflushopt.glint.types.ArrowFieldVector;
+import co.clflushopt.glint.types.ArrowVectorBuilder;
+import co.clflushopt.glint.types.ColumnVector;
+import co.clflushopt.glint.types.RecordBatch;
+import co.clflushopt.glint.types.Schema;
+
+/**
+ * HashJoinOperator implements the Hash Aggregate Join algorithm where the input
+ * is consumed from two sources and the join is performed in two phases: build
+ * and probe.
+ *
+ * The build phase consumes the left input and builds a hash table using the
+ * join key. The probe phase consumes the right input and probes the hash table
+ * to find matching rows.
+ *
+ */
+public class HashJoinOperator implements PhysicalPlan {
+    private PhysicalPlan input;
+    private List<Expr> groupByExpr;
+    private List<AggregateExpr> aggregateExpr;
+    private Schema schema;
+
+    /**
+     * Create a new HashJoinOperator.
+     *
+     * @param left     the left input operator.
+     * @param right    the right input operator.
+     * @param leftKey  the join key for the left input.
+     * @param rightKey the join key for the right input.
+     */
+    public HashJoinOperator(PhysicalPlan input, List<Expr> groupByExpr,
+            List<AggregateExpr> aggregateExpr, Schema schema) {
+        this.input = input;
+        this.groupByExpr = groupByExpr;
+        this.aggregateExpr = aggregateExpr;
+        this.schema = schema;
+    }
+
+    @Override
+    public Iterator<RecordBatch> execute() {
+        // Map to store grouping keys and their accumulators
+        Map<List<Object>, List<Accumulator>> map = new HashMap<>();
+
+        // Process each batch from input
+        Iterator<RecordBatch> inputIter = input.execute();
+        while (inputIter.hasNext()) {
+            RecordBatch batch = inputIter.next();
+
+            // Evaluate grouping expressions
+            List<ColumnVector> groupKeys = groupByExpr.stream().map(expr -> expr.eval(batch))
+                    .collect(Collectors.toList());
+
+            // Evaluate aggregate input expressions
+            List<ColumnVector> aggrInputValues = aggregateExpr.stream()
+                    .map(expr -> expr.getInputExpr().eval(batch)).collect(Collectors.toList());
+
+            // Process each row in the batch
+            for (int rowIndex = 0; rowIndex < batch.getRowSize(); rowIndex++) {
+                // Create final variable for lambda.
+                final int currentRow = rowIndex;
+                // Create key for hash map
+                List<Object> rowKey = groupKeys.stream().map(key -> {
+                    Object value = key.getValue(currentRow);
+                    if (value instanceof byte[]) {
+                        return new String((byte[]) value);
+                    }
+                    return value;
+                }).collect(Collectors.toList());
+
+                // Get or create accumulators for this grouping key
+                List<Accumulator> accumulators = map.computeIfAbsent(rowKey, k -> aggregateExpr
+                        .stream().map(acc -> acc.getAccumulator()).collect(Collectors.toList()));
+
+                // Perform accumulation
+                for (int i = 0; i < accumulators.size(); i++) {
+                    Object value = aggrInputValues.get(i).getValue(rowIndex);
+                    accumulators.get(i).accumulate(value);
+                }
+            }
+        }
+
+        // Create result batch with final aggregate values
+        VectorSchemaRoot root = VectorSchemaRoot.create(schema.toArrow(),
+                new RootAllocator(Long.MAX_VALUE));
+        root.allocateNew();
+        root.setRowCount(map.size());
+
+        List<ArrowVectorBuilder> builders = root.getFieldVectors().stream()
+                .map(ArrowVectorBuilder::new).collect(Collectors.toList());
+
+        int rowIndex = 0;
+        for (Map.Entry<List<Object>, List<Accumulator>> entry : map.entrySet()) {
+            List<Object> groupingKey = entry.getKey();
+            List<Accumulator> accumulators = entry.getValue();
+
+            // Set grouping key values
+            for (int i = 0; i < groupByExpr.size(); i++) {
+                builders.get(i).setValue(rowIndex, groupingKey.get(i));
+            }
+
+            // Set aggregate values
+            for (int i = 0; i < aggregateExpr.size(); i++) {
+                builders.get(groupByExpr.size() + i).setValue(rowIndex,
+                        accumulators.get(i).getResult());
+            }
+            rowIndex++;
+        }
+
+        RecordBatch outputBatch = new RecordBatch(schema, root.getFieldVectors().stream()
+                .map(ArrowFieldVector::new).collect(Collectors.toList()));
+
+        return Collections.singletonList(outputBatch).iterator();
+    }
+
+    @Override
+    public Schema getSchema() {
+        return schema;
+    }
+
+    @Override
+    public List<PhysicalPlan> getChildren() {
+        return List.of(input);
+    }
+
+}
@@ -1,5 +1,6 @@
 package co.clflushopt.glint.query.physical.plan;
 
+import java.util.Iterator;
 import java.util.List;
 import java.util.stream.IntStream;
 
@@ -25,7 +26,7 @@ public interface PhysicalPlan {
      * is equivalent to `next()` in the Volcano paper.
      *
      */
-    public Iterable<RecordBatch> execute();
+    public Iterator<RecordBatch> execute();
 
     /**
      * Returns the pipeline structure of the plan.
 
@@ -1,10 +1,11 @@
 package co.clflushopt.glint.query.physical.plan;
 
-import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 import java.util.stream.Collectors;
 
 import co.clflushopt.glint.query.physical.expr.Expr;
+import co.clflushopt.glint.types.ColumnVector;
 import co.clflushopt.glint.types.RecordBatch;
 import co.clflushopt.glint.types.Schema;
 
@@ -31,17 +32,23 @@ public ProjectionOperator(PhysicalPlan input, Schema schema, List<Expr> projecti
     }
 
     @Override
-    public Iterable<RecordBatch> execute() {
-        var iter = input.execute().iterator();
-        List<RecordBatch> result = new ArrayList<>();
+    public Iterator<RecordBatch> execute() {
+        Iterator<RecordBatch> inputIterator = input.execute();
 
-        while (iter.hasNext()) {
-            var columns = this.projections.stream().map(expr -> expr.eval(iter.next()))
-                    .collect(Collectors.toList());
-            result.add(new RecordBatch(schema, columns));
-        }
+        return new Iterator<RecordBatch>() {
+            @Override
+            public boolean hasNext() {
+                return inputIterator.hasNext();
+            }
 
-        return result;
+            @Override
+            public RecordBatch next() {
+                RecordBatch batch = inputIterator.next();
+                List<ColumnVector> columns = projections.stream()
+                        .map(expression -> expression.eval(batch)).collect(Collectors.toList());
+                return new RecordBatch(schema, columns);
+            }
+        };
     }
 
     @Override
 
@@ -1,5 +1,6 @@
 package co.clflushopt.glint.query.physical.plan;
 
+import java.util.Iterator;
 import java.util.List;
 
 import co.clflushopt.glint.datasource.DataSource;
@@ -30,8 +31,8 @@ public Schema getSchema() {
     }
 
     @Override
-    public Iterable<RecordBatch> execute() {
-        return dataSource.scan(projection);
+    public Iterator<RecordBatch> execute() {
+        return dataSource.scan(projection).iterator();
     }
 
     @Override
 
@@ -0,0 +1,43 @@
+package co.clflushopt.glint.query.physical;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.junit.Test;
+
+import co.clflushopt.glint.query.physical.expr.BooleanExpr;
+import co.clflushopt.glint.query.physical.expr.ColumnExpr;
+import co.clflushopt.glint.query.physical.expr.Expr;
+import co.clflushopt.glint.query.physical.expr.LiteralIntExpr;
+import co.clflushopt.glint.query.physical.plan.FilterOperator;
+import co.clflushopt.glint.query.physical.plan.ScanOperator;
+import co.clflushopt.glint.types.RecordBatch;
+import co.clflushopt.glint.types.Schema;
+
+public class FilterOperatorTest {
+    @Test
+    public void testFilter() {
+        // Create test data
+        Schema schema = new Schema(TestUtils.createTestSchema());
+        RecordBatch testBatch = TestUtils.createTestBatch();
+        SyntheticDataSource dataSource = new SyntheticDataSource(schema,
+                Collections.singletonList(testBatch));
+        ScanOperator scan = new ScanOperator(dataSource, Collections.emptyList());
+
+        // Filter age > 30
+        Expr filterExpr = new BooleanExpr.GtExpression(new ColumnExpr(2), new LiteralIntExpr(30));
+
+        FilterOperator filter = new FilterOperator(scan, filterExpr);
+        Iterator<RecordBatch> result = filter.execute();
+
+        // Verify results
+        assertTrue(result.hasNext());
+        RecordBatch batch = result.next();
+        assertEquals(1, batch.getRowSize());
+        assertEquals("Charlie", batch.getField(1).getValue(0));
+        assertEquals(35, Integer.parseInt((String) batch.getField(2).getValue(0)));
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,6 @@`
`1`	`1`	`package co.clflushopt.glint.query.physical.plan;`
`2`	`2`
	`3`	`+import java.util.Iterator;`
`3`	`4`	`import java.util.List;`
`4`	`5`
`5`	`6`	`import co.clflushopt.glint.datasource.DataSource;`
`@@ -30,8 +31,8 @@ public Schema getSchema() {`
`30`	`31`	`}`
`31`	`32`
`32`	`33`	`@Override`
`33`		`- public Iterable<RecordBatch> execute() {`
`34`		`- return dataSource.scan(projection);`
	`34`	`+ public Iterator<RecordBatch> execute() {`
	`35`	`+ return dataSource.scan(projection).iterator();`
`35`	`36`	`}`
`36`	`37`
`37`	`38`	`@Override`