diff --git a/README.md b/README.md index 4aa6eeb3a..ad3fbeaaf 100644 --- a/README.md +++ b/README.md @@ -216,3 +216,23 @@ Cask is a trademark of Cask Data, Inc. All rights reserved. Apache, Apache HBase, and HBase are trademarks of The Apache Software Foundation. Used with permission. No endorsement by The Apache Software Foundation is implied by the use of these marks. +## 🆕 New Parsers: Byte Size & Time Duration + +Wrangler now supports parsing: + +- Byte sizes: `10KB`, `1.5MB`, `2GB` +- Time durations: `500ms`, `3s`, `2h` + +### 🔧 Usage + +```text +aggregate-stats :data_transfer :response_time total_size_mb total_time_sec + +--- + +## ✅ Step 10: Commit & Push Your Changes + +```bash +git add . +git commit -m "Add ByteSize, TimeDuration parsers and AggregateStats directive" +git push origin feature/byte-time-parsers diff --git a/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/ByteSize.java b/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/ByteSize.java new file mode 100644 index 000000000..87dbb0346 --- /dev/null +++ b/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/ByteSize.java @@ -0,0 +1,24 @@ +package io.cdap.wrangler.api.parser; + +public class ByteSize implements Token { + private final long bytes; + + public ByteSize(String input) { + input = input.toUpperCase().trim(); + if (input.endsWith("KB")) bytes = (long)(Double.parseDouble(input.replace("KB", "")) * 1024); + else if (input.endsWith("MB")) bytes = (long)(Double.parseDouble(input.replace("MB", "")) * 1024 * 1024); + else if (input.endsWith("GB")) bytes = (long)(Double.parseDouble(input.replace("GB", "")) * 1024 * 1024 * 1024); + else if (input.endsWith("TB")) bytes = (long)(Double.parseDouble(input.replace("TB", "")) * 1024L * 1024 * 1024 * 1024); + else if (input.endsWith("B")) bytes = Long.parseLong(input.replace("B", "")); + else throw new IllegalArgumentException("Invalid byte size format: " + input); + } + + public long getBytes() { + return bytes; + } + + @Override + public String toString() { + return bytes + "B"; + } +} diff --git a/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/TimeDuration.java b/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/TimeDuration.java new file mode 100644 index 000000000..92565cb86 --- /dev/null +++ b/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/TimeDuration.java @@ -0,0 +1,23 @@ +package io.cdap.wrangler.api.parser; + +public class TimeDuration implements Token { + private final long millis; + + public TimeDuration(String input) { + input = input.trim(); + if (input.endsWith("ms")) millis = Long.parseLong(input.replace("ms", "")); + else if (input.endsWith("s")) millis = Long.parseLong(input.replace("s", "")) * 1000; + else if (input.endsWith("m")) millis = Long.parseLong(input.replace("m", "")) * 60 * 1000; + else if (input.endsWith("h")) millis = Long.parseLong(input.replace("h", "")) * 3600 * 1000; + else throw new IllegalArgumentException("Invalid time duration format: " + input); + } + + public long getMillis() { + return millis; + } + + @Override + public String toString() { + return millis + "ms"; + } +} diff --git a/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/UsageDefinitionTest.java b/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/UsageDefinitionTest.java index 8141d57bf..033cd8795 100644 --- a/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/UsageDefinitionTest.java +++ b/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/UsageDefinitionTest.java @@ -69,6 +69,40 @@ public void testUsageStringCreation() { usage = builder.build().toString(); Assert.assertEquals("set-columns :cols [,:cols ]*", usage); usages.add(usage); + public class ByteSize implements Token { + private final long bytes; + + public ByteSize(String input) { + input = input.toUpperCase().trim(); + if (input.endsWith("KB")) bytes = (long)(Double.parseDouble(input.replace("KB", "")) * 1024); + else if (input.endsWith("MB")) bytes = (long)(Double.parseDouble(input.replace("MB", "")) * 1024 * 1024); + else if (input.endsWith("GB")) bytes = (long)(Double.parseDouble(input.replace("GB", "")) * 1024 * 1024 * 1024); + else if (input.endsWith("TB")) bytes = (long)(Double.parseDouble(input.replace("TB", "")) * 1024L * 1024 * 1024 * 1024); + else if (input.endsWith("B")) bytes = Long.parseLong(input.replace("B", "")); + else throw new IllegalArgumentException("Invalid byte size format: " + input); + } + + public long getBytes() { + return bytes; + } +} +public class TimeDuration implements Token { + private final long millis; + + public TimeDuration(String input) { + input = input.trim(); + if (input.endsWith("ms")) millis = Long.parseLong(input.replace("ms", "")); + else if (input.endsWith("s")) millis = Long.parseLong(input.replace("s", "")) * 1000; + else if (input.endsWith("m")) millis = Long.parseLong(input.replace("m", "")) * 60 * 1000; + else if (input.endsWith("h")) millis = Long.parseLong(input.replace("h", "")) * 3600 * 1000; + else throw new IllegalArgumentException("Invalid time format: " + input); + } + + public long getMillis() { + return millis; + } +} + Assert.assertTrue(true); } diff --git a/wrangler-core/RecipeVisitor.java b/wrangler-core/RecipeVisitor.java new file mode 100644 index 000000000..34bcf647b --- /dev/null +++ b/wrangler-core/RecipeVisitor.java @@ -0,0 +1,9 @@ +@Override +public Token visitByteSizeArg(DirectivesParser.ByteSizeArgContext ctx) { + return new ByteSize(ctx.getText()); +} + +@Override +public Token visitTimeDurationArg(DirectivesParser.TimeDurationArgContext ctx) { + return new TimeDuration(ctx.getText()); +} diff --git a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 index 7c517ed6a..1776028e4 100644 --- a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 +++ b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 @@ -311,3 +311,24 @@ fragment Int fragment Digit : [0-9] ; + +BYTE_SIZE: DIGITS ('.' DIGITS)? BYTE_UNIT; +TIME_DURATION: DIGITS ('.' DIGITS)? TIME_UNIT; + +fragment BYTE_UNIT: ('B' | 'KB' | 'MB' | 'GB' | 'TB'); +fragment TIME_UNIT: ('ms' | 's' | 'm' | 'h'); +fragment DIGITS: [0-9]+; + +byteSizeArg: BYTE_SIZE; +timeDurationArg: TIME_DURATION; + +value + : STRING + | NUMBER + | BOOLEAN + | BYTE_SIZE + | TIME_DURATION + ; + + + diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/parser/CustomDirectiveVisitor.java b/wrangler-core/src/main/java/io/cdap/wrangler/parser/CustomDirectiveVisitor.java new file mode 100644 index 000000000..c54b31994 --- /dev/null +++ b/wrangler-core/src/main/java/io/cdap/wrangler/parser/CustomDirectiveVisitor.java @@ -0,0 +1,28 @@ +package io.cdap.wrangler.parser; + +import io.cdap.wrangler.api.parser.ByteSize; +import io.cdap.wrangler.api.parser.TimeDuration; +import io.cdap.wrangler.api.parser.Token; +import io.cdap.wrangler.grammar.DirectivesBaseVisitor; +import io.cdap.wrangler.grammar.DirectivesParser; + +public class CustomDirectiveVisitor extends DirectivesBaseVisitor { + + @Override + public Token visitByteSizeArg(DirectivesParser.ByteSizeArgContext ctx) { + return new ByteSize(ctx.getText()); + } + + @Override + public Token visitTimeDurationArg(DirectivesParser.TimeDurationArgContext ctx) { + return new TimeDuration(ctx.getText()); + } + + @Override + public Token visitValue(DirectivesParser.ValueContext ctx) { + if (ctx.BYTE_SIZE() != null) return new ByteSize(ctx.getText()); + if (ctx.TIME_DURATION() != null) return new TimeDuration(ctx.getText()); + // return other existing tokens (string, boolean, number) + return super.visitValue(ctx); + } +} diff --git a/wrangler-core/src/test/java/io/cdap/wrangler/AggregateStatsTest.java b/wrangler-core/src/test/java/io/cdap/wrangler/AggregateStatsTest.java new file mode 100644 index 000000000..9060d3890 --- /dev/null +++ b/wrangler-core/src/test/java/io/cdap/wrangler/AggregateStatsTest.java @@ -0,0 +1,21 @@ +public class AggregateStatsTest { + @Test + public void testAggregateStats() throws Exception { + List rows = Arrays.asList( + new Row("data_transfer", "10KB").add("response_time", "2s"), + new Row("data_transfer", "1.5MB").add("response_time", "3s") + ); + + String[] recipe = { + "aggregate-stats :data_transfer :response_time total_size_mb total_time_sec" + }; + + List results = TestingRig.execute(recipe, rows); + + double expectedMB = (10240 + 1572864) / (1024.0 * 1024); + double expectedSec = (2000 + 3000) / 1000.0; + + Assert.assertEquals(expectedMB, results.get(0).getValue("total_size_mb"), 0.01); + Assert.assertEquals(expectedSec, results.get(0).getValue("total_time_sec"), 0.01); + } +}