Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ public class SpanParser {
Map.entry("months", "months"),
Map.entry("month", "months"),
Map.entry("mon", "months"),
Map.entry("M", "months"), // Uppercase M for months (case-sensitive)
// Milliseconds
Map.entry("ms", "ms"),
// Microseconds
Expand All @@ -63,7 +64,16 @@ public class SpanParser {

// Build direct lookup map for efficient unit detection
for (String unit : NORMALIZED_UNITS.keySet()) {
UNIT_LOOKUP.put(unit.toLowerCase(Locale.ROOT), unit);
// Preserve case for case-sensitive units: M (month), m (minute), us, cs, ds
if (unit.equals("M")
|| unit.equals("m")
|| unit.equals("us")
|| unit.equals("cs")
|| unit.equals("ds")) {
UNIT_LOOKUP.put(unit, unit);
} else {
UNIT_LOOKUP.put(unit.toLowerCase(Locale.ROOT), unit);
}
}
}

Expand Down Expand Up @@ -135,15 +145,27 @@ private static SpanInfo parseNumericSpan(String spanStr) {

/** Extracts time unit from span string (returns original matched unit, not normalized). */
public static String extractTimeUnit(String spanStr) {
String lowerSpanStr = spanStr.toLowerCase(Locale.ROOT);
String longestMatch = null;

// Find the longest unit that matches as a suffix
for (String unit : UNIT_LOOKUP.keySet()) {
if (lowerSpanStr.endsWith(unit)) {
// For case-sensitive units (M, m, us, cs, ds), match case-sensitively
boolean matches;
if (unit.equals("M")
|| unit.equals("m")
|| unit.equals("us")
|| unit.equals("cs")
|| unit.equals("ds")) {
matches = spanStr.endsWith(unit);
} else {
// For other units, match case-insensitively
matches = spanStr.toLowerCase(Locale.ROOT).endsWith(unit.toLowerCase(Locale.ROOT));
}

if (matches) {
// Ensure this is a word boundary (not part of a larger word)
int unitStartPos = lowerSpanStr.length() - unit.length();
if (unitStartPos == 0 || !Character.isLetter(lowerSpanStr.charAt(unitStartPos - 1))) {
int unitStartPos = spanStr.length() - unit.length();
if (unitStartPos == 0 || !Character.isLetter(spanStr.charAt(unitStartPos - 1))) {
// Keep the longest match
if (longestMatch == null || unit.length() > longestMatch.length()) {
longestMatch = unit;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ public RexNode createTimeSpanExpression(
private boolean shouldApplyAligntime(String spanStr) {
if (spanStr == null) return false;

spanStr = spanStr.replace("'", "").replace("\"", "").trim().toLowerCase();
spanStr = spanStr.replace("'", "").replace("\"", "").trim();
String timeUnit = SpanParser.extractTimeUnit(spanStr);

if (timeUnit == null) return true; // Pure number, assume hours

// Aligntime ignored for days, months, years
String normalizedUnit = normalizeTimeUnit(timeUnit);
return !normalizedUnit.equals("d") && !normalizedUnit.equals("M");
String normalizedUnit = SpanParser.getNormalizedUnit(timeUnit);
return !normalizedUnit.equals("d") && !normalizedUnit.equals("months");
}

private RexNode createAlignedTimeSpan(
Expand All @@ -64,7 +64,7 @@ private RexNode createAlignedTimeSpan(
if (timeUnit != null) {
String valueStr = spanStr.substring(0, spanStr.length() - timeUnit.length());
intervalValue = Integer.parseInt(valueStr);
normalizedUnit = normalizeTimeUnit(timeUnit);
normalizedUnit = SpanParser.getNormalizedUnit(timeUnit);
} else {
intervalValue = Integer.parseInt(spanStr);
normalizedUnit = "h";
Expand All @@ -86,7 +86,7 @@ private RexNode createStandardTimeSpan(
if (timeUnit != null) {
String valueStr = spanStr.substring(0, spanStr.length() - timeUnit.length());
int value = Integer.parseInt(valueStr);
String normalizedUnit = normalizeTimeUnit(timeUnit);
String normalizedUnit = SpanParser.getNormalizedUnit(timeUnit);
return BinTimeSpanUtils.createBinTimeSpanExpression(
fieldExpr, value, normalizedUnit, 0, context);
} else {
Expand Down Expand Up @@ -120,39 +120,4 @@ private String extractModifier(String aligntimeStr) {

return null;
}

private String normalizeTimeUnit(String unit) {
switch (unit.toLowerCase()) {
case "s", "sec", "secs", "second", "seconds" -> {
return "s";
}
case "m", "min", "mins", "minute", "minutes" -> {
return "m";
}
case "h", "hr", "hrs", "hour", "hours" -> {
return "h";
}
case "d", "day", "days" -> {
return "d";
}
case "mon", "month", "months" -> {
return "months";
}
case "us" -> {
return "us";
}
case "ms" -> {
return "ms";
}
case "cs" -> {
return "cs";
}
case "ds" -> {
return "ds";
}
default -> {
return unit;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,36 @@ public class TimeUnitRegistry {
private static final Map<String, TimeUnitConfig> UNIT_MAPPING = new HashMap<>();

static {
// Microseconds
// Microseconds (case-sensitive, lowercase only)
UNIT_MAPPING.put("us", TimeUnitConfig.MICROSECONDS);

// Milliseconds
UNIT_MAPPING.put("ms", TimeUnitConfig.MILLISECONDS);

// Centiseconds
// Centiseconds (case-sensitive, lowercase only)
UNIT_MAPPING.put("cs", TimeUnitConfig.CENTISECONDS);

// Deciseconds
// Deciseconds (case-sensitive, lowercase only)
UNIT_MAPPING.put("ds", TimeUnitConfig.DECISECONDS);

// Seconds
UNIT_MAPPING.put("s", TimeUnitConfig.SECONDS);
UNIT_MAPPING.put("sec", TimeUnitConfig.SECONDS);
UNIT_MAPPING.put("secs", TimeUnitConfig.SECONDS);
UNIT_MAPPING.put("second", TimeUnitConfig.SECONDS);
UNIT_MAPPING.put("seconds", TimeUnitConfig.SECONDS);

// Minutes
// Minutes (case-sensitive lowercase 'm')
UNIT_MAPPING.put("m", TimeUnitConfig.MINUTES);
UNIT_MAPPING.put("min", TimeUnitConfig.MINUTES);
UNIT_MAPPING.put("mins", TimeUnitConfig.MINUTES);
UNIT_MAPPING.put("minute", TimeUnitConfig.MINUTES);
UNIT_MAPPING.put("minutes", TimeUnitConfig.MINUTES);

// Hours
UNIT_MAPPING.put("h", TimeUnitConfig.HOURS);
UNIT_MAPPING.put("hr", TimeUnitConfig.HOURS);
UNIT_MAPPING.put("hrs", TimeUnitConfig.HOURS);
UNIT_MAPPING.put("hour", TimeUnitConfig.HOURS);
UNIT_MAPPING.put("hours", TimeUnitConfig.HOURS);

Expand All @@ -49,7 +52,7 @@ public class TimeUnitRegistry {
UNIT_MAPPING.put("day", TimeUnitConfig.DAYS);
UNIT_MAPPING.put("days", TimeUnitConfig.DAYS);

// Months (case-sensitive M)
// Months (case-sensitive uppercase 'M')
UNIT_MAPPING.put("M", TimeUnitConfig.MONTHS);
UNIT_MAPPING.put("mon", TimeUnitConfig.MONTHS);
UNIT_MAPPING.put("month", TimeUnitConfig.MONTHS);
Expand All @@ -59,15 +62,19 @@ public class TimeUnitRegistry {
/**
* Gets the time unit configuration for the given unit string.
*
* @param unit The unit string (e.g., "h", "hours", "M")
* @param unit The unit string (e.g., "h", "hours", "M", "m")
* @return The time unit configuration, or null if not found
*/
public static TimeUnitConfig getConfig(String unit) {
if (unit.equals("M")) {
// M is case-sensitive for months
// Handle case-sensitive units: M (month), m (minute), and subsecond units (us, cs, ds)
if (unit.equals("M")
|| unit.equals("m")
|| unit.equals("us")
|| unit.equals("cs")
|| unit.equals("ds")) {
return UNIT_MAPPING.get(unit);
} else {
// For all other units, use lowercase lookup
// For all other units, use lowercase lookup for case-insensitive matching
return UNIT_MAPPING.get(unit.toLowerCase());
}
}
Expand Down
19 changes: 16 additions & 3 deletions docs/user/ppl/cmd/bin.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,25 @@ Specifies the width of each bin interval with support for multiple span types:
- Creates logarithmic bin boundaries instead of linear

**3. Time Scale Span (comprehensive time units)**
- **Subseconds**: ``us`` (microseconds), ``ms`` (milliseconds), ``cs`` (centiseconds), ``ds`` (deciseconds)

The ``bin`` command supports a comprehensive set of time units, including subsecond precision:

- **Subseconds** (case-sensitive, lowercase only): ``us`` (microseconds), ``ms`` (milliseconds), ``cs`` (centiseconds), ``ds`` (deciseconds)
- **Seconds**: ``s``, ``sec``, ``secs``, ``second``, ``seconds``
- **Minutes**: ``m``, ``min``, ``mins``, ``minute``, ``minutes``
- **Minutes** (case-sensitive): ``m`` (lowercase), ``min``, ``mins``, ``minute``, ``minutes``
- **Hours**: ``h``, ``hr``, ``hrs``, ``hour``, ``hours``
- **Days**: ``d``, ``day``, ``days`` - **Uses precise daily binning algorithm**
- **Months**: ``mon``, ``month``, ``months`` - **Uses precise monthly binning algorithm**
- **Months** (case-sensitive): ``M`` (uppercase), ``mon``, ``month``, ``months`` - **Uses precise monthly binning algorithm**

**Case Sensitivity Note**:
- ``m`` (lowercase) = minute
- ``M`` (uppercase) = month
- Subsecond units (``us``, ``ms``, ``cs``, ``ds``) are case-sensitive and must be lowercase

.. note::

The ``bin`` command implements time binning using **Calcite**, which provides full control over the binning logic. This allows it to support **subsecond units** (us, ms, cs, ds) that are not available in the ``stats`` command, which relies on OpenSearch's calendar intervals.

- **Examples**:
- ``span=30seconds``
- ``span=15minutes``
Expand Down
10 changes: 10 additions & 0 deletions docs/user/ppl/cmd/stats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ stats [bucket_nullable=bool] <aggregation>... [by-clause]
| year (y) |
+----------------------------+

**Case Sensitivity Note**:
- ``m`` (lowercase) = minute
- ``M`` (uppercase) = month

.. note::

The ``stats`` command uses **OpenSearch calendar intervals** for time-based aggregations. **Subsecond units** (us, cs, ds) are **not supported** by the stats command due to OpenSearch limitations.

If you need subsecond precision for time binning, use the ``bin`` command instead, which implements time binning using Calcite and supports all time units including subsecond precision.

Configuration
=============
Some aggregation functions require Calcite to be enabled for proper functionality. To enable Calcite, use the following command:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -984,4 +984,86 @@ public void testStatsWithBinsOnTimeAndTermField_Avg() throws IOException {
rows(50, "us-east", "2024-07-01 00:05:00"),
rows(40.25, "us-west", "2024-07-01 00:01:00"));
}

@Test
public void testBinCaseSensitivity_mon_vs_M() throws IOException {
// Test uppercase 'M' for months - bin by 1 month
JSONObject monthResultM =
executeQuery(
String.format(
"source=%s | bin @timestamp span=1M | fields `@timestamp` | sort `@timestamp` |"
+ " head 1",
TEST_INDEX_TIME_DATA));
verifySchema(monthResultM, schema("@timestamp", null, "string"));
verifyDataRows(monthResultM, rows("2025-07"));

// Test full name 'mon' for months - should produce same result as 'M'
JSONObject monthResultMon =
executeQuery(
String.format(
"source=%s | bin @timestamp span=1mon | fields `@timestamp` | sort `@timestamp` |"
+ " head 1",
TEST_INDEX_TIME_DATA));
verifySchema(monthResultMon, schema("@timestamp", null, "string"));
verifyDataRows(monthResultMon, rows("2025-07"));
}

@Test
public void testBinWithSubsecondUnits() throws IOException {
// Test milliseconds (ms) - bin by 100 milliseconds
JSONObject msResult =
executeQuery(
String.format(
"source=%s | bin @timestamp span=100ms | fields `@timestamp` | sort `@timestamp` |"
+ " head 3",
TEST_INDEX_TIME_DATA));
verifySchema(msResult, schema("@timestamp", null, "timestamp"));
verifyDataRows(
msResult,
rows("2025-07-28 00:15:23"),
rows("2025-07-28 01:42:15"),
rows("2025-07-28 02:28:45"));

// Test microseconds (us) - bin by 500 microseconds
JSONObject usResult =
executeQuery(
String.format(
"source=%s | bin @timestamp span=500us | fields `@timestamp` | sort `@timestamp` |"
+ " head 3",
TEST_INDEX_TIME_DATA));
verifySchema(usResult, schema("@timestamp", null, "timestamp"));
verifyDataRows(
usResult,
rows("2025-07-28 00:15:23"),
rows("2025-07-28 01:42:15"),
rows("2025-07-28 02:28:45"));

// Test centiseconds (cs) - bin by 10 centiseconds (100ms)
JSONObject csResult =
executeQuery(
String.format(
"source=%s | bin @timestamp span=10cs | fields `@timestamp` | sort `@timestamp` |"
+ " head 3",
TEST_INDEX_TIME_DATA));
verifySchema(csResult, schema("@timestamp", null, "timestamp"));
verifyDataRows(
csResult,
rows("2025-07-28 00:15:23"),
rows("2025-07-28 01:42:15"),
rows("2025-07-28 02:28:45"));

// Test deciseconds (ds) - bin by 5 deciseconds (500ms)
JSONObject dsResult =
executeQuery(
String.format(
"source=%s | bin @timestamp span=5ds | fields `@timestamp` | sort `@timestamp` |"
+ " head 3",
TEST_INDEX_TIME_DATA));
verifySchema(dsResult, schema("@timestamp", null, "timestamp"));
verifyDataRows(
dsResult,
rows("2025-07-28 00:15:23"),
rows("2025-07-28 01:42:15"),
rows("2025-07-28 02:28:45"));
}
}
Loading