Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,23 @@ Cask is a trademark of Cask Data, Inc. All rights reserved.

Apache, Apache HBase, and HBase are trademarks of The Apache Software Foundation. Used with
permission. No endorsement by The Apache Software Foundation is implied by the use of these marks.
## 🆕 New Parsers: Byte Size & Time Duration

Wrangler now supports parsing:

- Byte sizes: `10KB`, `1.5MB`, `2GB`
- Time durations: `500ms`, `3s`, `2h`

### 🔧 Usage

```text
aggregate-stats :data_transfer :response_time total_size_mb total_time_sec

---

## ✅ Step 10: Commit & Push Your Changes

```bash
git add .
git commit -m "Add ByteSize, TimeDuration parsers and AggregateStats directive"
git push origin feature/byte-time-parsers
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package io.cdap.wrangler.api.parser;

public class ByteSize implements Token {
private final long bytes;

public ByteSize(String input) {
input = input.toUpperCase().trim();
if (input.endsWith("KB")) bytes = (long)(Double.parseDouble(input.replace("KB", "")) * 1024);
else if (input.endsWith("MB")) bytes = (long)(Double.parseDouble(input.replace("MB", "")) * 1024 * 1024);
else if (input.endsWith("GB")) bytes = (long)(Double.parseDouble(input.replace("GB", "")) * 1024 * 1024 * 1024);
else if (input.endsWith("TB")) bytes = (long)(Double.parseDouble(input.replace("TB", "")) * 1024L * 1024 * 1024 * 1024);
else if (input.endsWith("B")) bytes = Long.parseLong(input.replace("B", ""));
else throw new IllegalArgumentException("Invalid byte size format: " + input);
}

public long getBytes() {
return bytes;
}

@Override
public String toString() {
return bytes + "B";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package io.cdap.wrangler.api.parser;

public class TimeDuration implements Token {
private final long millis;

public TimeDuration(String input) {
input = input.trim();
if (input.endsWith("ms")) millis = Long.parseLong(input.replace("ms", ""));
else if (input.endsWith("s")) millis = Long.parseLong(input.replace("s", "")) * 1000;
else if (input.endsWith("m")) millis = Long.parseLong(input.replace("m", "")) * 60 * 1000;
else if (input.endsWith("h")) millis = Long.parseLong(input.replace("h", "")) * 3600 * 1000;
else throw new IllegalArgumentException("Invalid time duration format: " + input);
}

public long getMillis() {
return millis;
}

@Override
public String toString() {
return millis + "ms";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,40 @@ public void testUsageStringCreation() {
usage = builder.build().toString();
Assert.assertEquals("set-columns :cols [,:cols ]*", usage);
usages.add(usage);
public class ByteSize implements Token {
private final long bytes;

public ByteSize(String input) {
input = input.toUpperCase().trim();
if (input.endsWith("KB")) bytes = (long)(Double.parseDouble(input.replace("KB", "")) * 1024);
else if (input.endsWith("MB")) bytes = (long)(Double.parseDouble(input.replace("MB", "")) * 1024 * 1024);
else if (input.endsWith("GB")) bytes = (long)(Double.parseDouble(input.replace("GB", "")) * 1024 * 1024 * 1024);
else if (input.endsWith("TB")) bytes = (long)(Double.parseDouble(input.replace("TB", "")) * 1024L * 1024 * 1024 * 1024);
else if (input.endsWith("B")) bytes = Long.parseLong(input.replace("B", ""));
else throw new IllegalArgumentException("Invalid byte size format: " + input);
}

public long getBytes() {
return bytes;
}
}
public class TimeDuration implements Token {
private final long millis;

public TimeDuration(String input) {
input = input.trim();
if (input.endsWith("ms")) millis = Long.parseLong(input.replace("ms", ""));
else if (input.endsWith("s")) millis = Long.parseLong(input.replace("s", "")) * 1000;
else if (input.endsWith("m")) millis = Long.parseLong(input.replace("m", "")) * 60 * 1000;
else if (input.endsWith("h")) millis = Long.parseLong(input.replace("h", "")) * 3600 * 1000;
else throw new IllegalArgumentException("Invalid time format: " + input);
}

public long getMillis() {
return millis;
}
}


Assert.assertTrue(true);
}
Expand Down
9 changes: 9 additions & 0 deletions wrangler-core/RecipeVisitor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
@Override
public Token visitByteSizeArg(DirectivesParser.ByteSizeArgContext ctx) {
return new ByteSize(ctx.getText());
}

@Override
public Token visitTimeDurationArg(DirectivesParser.TimeDurationArgContext ctx) {
return new TimeDuration(ctx.getText());
}
Original file line number Diff line number Diff line change
Expand Up @@ -311,3 +311,24 @@ fragment Int
fragment Digit
: [0-9]
;

BYTE_SIZE: DIGITS ('.' DIGITS)? BYTE_UNIT;
TIME_DURATION: DIGITS ('.' DIGITS)? TIME_UNIT;

fragment BYTE_UNIT: ('B' | 'KB' | 'MB' | 'GB' | 'TB');
fragment TIME_UNIT: ('ms' | 's' | 'm' | 'h');
fragment DIGITS: [0-9]+;

byteSizeArg: BYTE_SIZE;
timeDurationArg: TIME_DURATION;

value
: STRING
| NUMBER
| BOOLEAN
| BYTE_SIZE
| TIME_DURATION
;



Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package io.cdap.wrangler.parser;

import io.cdap.wrangler.api.parser.ByteSize;
import io.cdap.wrangler.api.parser.TimeDuration;
import io.cdap.wrangler.api.parser.Token;
import io.cdap.wrangler.grammar.DirectivesBaseVisitor;
import io.cdap.wrangler.grammar.DirectivesParser;

public class CustomDirectiveVisitor extends DirectivesBaseVisitor<Token> {

@Override
public Token visitByteSizeArg(DirectivesParser.ByteSizeArgContext ctx) {
return new ByteSize(ctx.getText());
}

@Override
public Token visitTimeDurationArg(DirectivesParser.TimeDurationArgContext ctx) {
return new TimeDuration(ctx.getText());
}

@Override
public Token visitValue(DirectivesParser.ValueContext ctx) {
if (ctx.BYTE_SIZE() != null) return new ByteSize(ctx.getText());
if (ctx.TIME_DURATION() != null) return new TimeDuration(ctx.getText());
// return other existing tokens (string, boolean, number)
return super.visitValue(ctx);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
public class AggregateStatsTest {
@Test
public void testAggregateStats() throws Exception {
List<Row> rows = Arrays.asList(
new Row("data_transfer", "10KB").add("response_time", "2s"),
new Row("data_transfer", "1.5MB").add("response_time", "3s")
);

String[] recipe = {
"aggregate-stats :data_transfer :response_time total_size_mb total_time_sec"
};

List<Row> results = TestingRig.execute(recipe, rows);

double expectedMB = (10240 + 1572864) / (1024.0 * 1024);
double expectedSec = (2000 + 3000) / 1000.0;

Assert.assertEquals(expectedMB, results.get(0).getValue("total_size_mb"), 0.01);
Assert.assertEquals(expectedSec, results.get(0).getValue("total_time_sec"), 0.01);
}
}