diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/SpanParser.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/SpanParser.java index 4212c05644d..2d7e29d8b5d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/SpanParser.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/SpanParser.java @@ -52,6 +52,7 @@ public class SpanParser { Map.entry("months", "months"), Map.entry("month", "months"), Map.entry("mon", "months"), + Map.entry("M", "months"), // Uppercase M for months (case-sensitive) // Milliseconds Map.entry("ms", "ms"), // Microseconds @@ -63,7 +64,16 @@ public class SpanParser { // Build direct lookup map for efficient unit detection for (String unit : NORMALIZED_UNITS.keySet()) { - UNIT_LOOKUP.put(unit.toLowerCase(Locale.ROOT), unit); + // Preserve case for case-sensitive units: M (month), m (minute), us, cs, ds + if (unit.equals("M") + || unit.equals("m") + || unit.equals("us") + || unit.equals("cs") + || unit.equals("ds")) { + UNIT_LOOKUP.put(unit, unit); + } else { + UNIT_LOOKUP.put(unit.toLowerCase(Locale.ROOT), unit); + } } } @@ -135,15 +145,27 @@ private static SpanInfo parseNumericSpan(String spanStr) { /** Extracts time unit from span string (returns original matched unit, not normalized). */ public static String extractTimeUnit(String spanStr) { - String lowerSpanStr = spanStr.toLowerCase(Locale.ROOT); String longestMatch = null; // Find the longest unit that matches as a suffix for (String unit : UNIT_LOOKUP.keySet()) { - if (lowerSpanStr.endsWith(unit)) { + // For case-sensitive units (M, m, us, cs, ds), match case-sensitively + boolean matches; + if (unit.equals("M") + || unit.equals("m") + || unit.equals("us") + || unit.equals("cs") + || unit.equals("ds")) { + matches = spanStr.endsWith(unit); + } else { + // For other units, match case-insensitively + matches = spanStr.toLowerCase(Locale.ROOT).endsWith(unit.toLowerCase(Locale.ROOT)); + } + + if (matches) { // Ensure this is a word boundary (not part of a larger word) - int unitStartPos = lowerSpanStr.length() - unit.length(); - if (unitStartPos == 0 || !Character.isLetter(lowerSpanStr.charAt(unitStartPos - 1))) { + int unitStartPos = spanStr.length() - unit.length(); + if (unitStartPos == 0 || !Character.isLetter(spanStr.charAt(unitStartPos - 1))) { // Keep the longest match if (longestMatch == null || unit.length() > longestMatch.length()) { longestMatch = unit; diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/TimeSpanHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/TimeSpanHelper.java index d70b5fe4667..a7a1d326c6d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/TimeSpanHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/TimeSpanHelper.java @@ -31,14 +31,14 @@ public RexNode createTimeSpanExpression( private boolean shouldApplyAligntime(String spanStr) { if (spanStr == null) return false; - spanStr = spanStr.replace("'", "").replace("\"", "").trim().toLowerCase(); + spanStr = spanStr.replace("'", "").replace("\"", "").trim(); String timeUnit = SpanParser.extractTimeUnit(spanStr); if (timeUnit == null) return true; // Pure number, assume hours // Aligntime ignored for days, months, years - String normalizedUnit = normalizeTimeUnit(timeUnit); - return !normalizedUnit.equals("d") && !normalizedUnit.equals("M"); + String normalizedUnit = SpanParser.getNormalizedUnit(timeUnit); + return !normalizedUnit.equals("d") && !normalizedUnit.equals("months"); } private RexNode createAlignedTimeSpan( @@ -64,7 +64,7 @@ private RexNode createAlignedTimeSpan( if (timeUnit != null) { String valueStr = spanStr.substring(0, spanStr.length() - timeUnit.length()); intervalValue = Integer.parseInt(valueStr); - normalizedUnit = normalizeTimeUnit(timeUnit); + normalizedUnit = SpanParser.getNormalizedUnit(timeUnit); } else { intervalValue = Integer.parseInt(spanStr); normalizedUnit = "h"; @@ -86,7 +86,7 @@ private RexNode createStandardTimeSpan( if (timeUnit != null) { String valueStr = spanStr.substring(0, spanStr.length() - timeUnit.length()); int value = Integer.parseInt(valueStr); - String normalizedUnit = normalizeTimeUnit(timeUnit); + String normalizedUnit = SpanParser.getNormalizedUnit(timeUnit); return BinTimeSpanUtils.createBinTimeSpanExpression( fieldExpr, value, normalizedUnit, 0, context); } else { @@ -120,39 +120,4 @@ private String extractModifier(String aligntimeStr) { return null; } - - private String normalizeTimeUnit(String unit) { - switch (unit.toLowerCase()) { - case "s", "sec", "secs", "second", "seconds" -> { - return "s"; - } - case "m", "min", "mins", "minute", "minutes" -> { - return "m"; - } - case "h", "hr", "hrs", "hour", "hours" -> { - return "h"; - } - case "d", "day", "days" -> { - return "d"; - } - case "mon", "month", "months" -> { - return "months"; - } - case "us" -> { - return "us"; - } - case "ms" -> { - return "ms"; - } - case "cs" -> { - return "cs"; - } - case "ds" -> { - return "ds"; - } - default -> { - return unit; - } - } - } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/TimeUnitRegistry.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/TimeUnitRegistry.java index fc721c5896b..60df1c5c1df 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/TimeUnitRegistry.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/TimeUnitRegistry.java @@ -14,33 +14,36 @@ public class TimeUnitRegistry { private static final Map UNIT_MAPPING = new HashMap<>(); static { - // Microseconds + // Microseconds (case-sensitive, lowercase only) UNIT_MAPPING.put("us", TimeUnitConfig.MICROSECONDS); // Milliseconds UNIT_MAPPING.put("ms", TimeUnitConfig.MILLISECONDS); - // Centiseconds + // Centiseconds (case-sensitive, lowercase only) UNIT_MAPPING.put("cs", TimeUnitConfig.CENTISECONDS); - // Deciseconds + // Deciseconds (case-sensitive, lowercase only) UNIT_MAPPING.put("ds", TimeUnitConfig.DECISECONDS); // Seconds UNIT_MAPPING.put("s", TimeUnitConfig.SECONDS); UNIT_MAPPING.put("sec", TimeUnitConfig.SECONDS); + UNIT_MAPPING.put("secs", TimeUnitConfig.SECONDS); UNIT_MAPPING.put("second", TimeUnitConfig.SECONDS); UNIT_MAPPING.put("seconds", TimeUnitConfig.SECONDS); - // Minutes + // Minutes (case-sensitive lowercase 'm') UNIT_MAPPING.put("m", TimeUnitConfig.MINUTES); UNIT_MAPPING.put("min", TimeUnitConfig.MINUTES); + UNIT_MAPPING.put("mins", TimeUnitConfig.MINUTES); UNIT_MAPPING.put("minute", TimeUnitConfig.MINUTES); UNIT_MAPPING.put("minutes", TimeUnitConfig.MINUTES); // Hours UNIT_MAPPING.put("h", TimeUnitConfig.HOURS); UNIT_MAPPING.put("hr", TimeUnitConfig.HOURS); + UNIT_MAPPING.put("hrs", TimeUnitConfig.HOURS); UNIT_MAPPING.put("hour", TimeUnitConfig.HOURS); UNIT_MAPPING.put("hours", TimeUnitConfig.HOURS); @@ -49,7 +52,7 @@ public class TimeUnitRegistry { UNIT_MAPPING.put("day", TimeUnitConfig.DAYS); UNIT_MAPPING.put("days", TimeUnitConfig.DAYS); - // Months (case-sensitive M) + // Months (case-sensitive uppercase 'M') UNIT_MAPPING.put("M", TimeUnitConfig.MONTHS); UNIT_MAPPING.put("mon", TimeUnitConfig.MONTHS); UNIT_MAPPING.put("month", TimeUnitConfig.MONTHS); @@ -59,15 +62,19 @@ public class TimeUnitRegistry { /** * Gets the time unit configuration for the given unit string. * - * @param unit The unit string (e.g., "h", "hours", "M") + * @param unit The unit string (e.g., "h", "hours", "M", "m") * @return The time unit configuration, or null if not found */ public static TimeUnitConfig getConfig(String unit) { - if (unit.equals("M")) { - // M is case-sensitive for months + // Handle case-sensitive units: M (month), m (minute), and subsecond units (us, cs, ds) + if (unit.equals("M") + || unit.equals("m") + || unit.equals("us") + || unit.equals("cs") + || unit.equals("ds")) { return UNIT_MAPPING.get(unit); } else { - // For all other units, use lowercase lookup + // For all other units, use lowercase lookup for case-insensitive matching return UNIT_MAPPING.get(unit.toLowerCase()); } } diff --git a/docs/user/ppl/cmd/bin.rst b/docs/user/ppl/cmd/bin.rst index 1ebdc3f897e..d3bae5b4abf 100644 --- a/docs/user/ppl/cmd/bin.rst +++ b/docs/user/ppl/cmd/bin.rst @@ -75,12 +75,25 @@ Specifies the width of each bin interval with support for multiple span types: - Creates logarithmic bin boundaries instead of linear **3. Time Scale Span (comprehensive time units)** -- **Subseconds**: ``us`` (microseconds), ``ms`` (milliseconds), ``cs`` (centiseconds), ``ds`` (deciseconds) + +The ``bin`` command supports a comprehensive set of time units, including subsecond precision: + +- **Subseconds** (case-sensitive, lowercase only): ``us`` (microseconds), ``ms`` (milliseconds), ``cs`` (centiseconds), ``ds`` (deciseconds) - **Seconds**: ``s``, ``sec``, ``secs``, ``second``, ``seconds`` -- **Minutes**: ``m``, ``min``, ``mins``, ``minute``, ``minutes`` +- **Minutes** (case-sensitive): ``m`` (lowercase), ``min``, ``mins``, ``minute``, ``minutes`` - **Hours**: ``h``, ``hr``, ``hrs``, ``hour``, ``hours`` - **Days**: ``d``, ``day``, ``days`` - **Uses precise daily binning algorithm** -- **Months**: ``mon``, ``month``, ``months`` - **Uses precise monthly binning algorithm** +- **Months** (case-sensitive): ``M`` (uppercase), ``mon``, ``month``, ``months`` - **Uses precise monthly binning algorithm** + +**Case Sensitivity Note**: + - ``m`` (lowercase) = minute + - ``M`` (uppercase) = month + - Subsecond units (``us``, ``ms``, ``cs``, ``ds``) are case-sensitive and must be lowercase + +.. note:: + + The ``bin`` command implements time binning using **Calcite**, which provides full control over the binning logic. This allows it to support **subsecond units** (us, ms, cs, ds) that are not available in the ``stats`` command, which relies on OpenSearch's calendar intervals. + - **Examples**: - ``span=30seconds`` - ``span=15minutes`` diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst index e61b4120410..fb18a896ef1 100644 --- a/docs/user/ppl/cmd/stats.rst +++ b/docs/user/ppl/cmd/stats.rst @@ -84,6 +84,16 @@ stats [bucket_nullable=bool] ... [by-clause] | year (y) | +----------------------------+ +**Case Sensitivity Note**: + - ``m`` (lowercase) = minute + - ``M`` (uppercase) = month + +.. note:: + + The ``stats`` command uses **OpenSearch calendar intervals** for time-based aggregations. **Subsecond units** (us, cs, ds) are **not supported** by the stats command due to OpenSearch limitations. + + If you need subsecond precision for time binning, use the ``bin`` command instead, which implements time binning using Calcite and supports all time units including subsecond precision. + Configuration ============= Some aggregation functions require Calcite to be enabled for proper functionality. To enable Calcite, use the following command: diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java index 13e6b4a47e1..66ae49a040e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java @@ -984,4 +984,86 @@ public void testStatsWithBinsOnTimeAndTermField_Avg() throws IOException { rows(50, "us-east", "2024-07-01 00:05:00"), rows(40.25, "us-west", "2024-07-01 00:01:00")); } + + @Test + public void testBinCaseSensitivity_mon_vs_M() throws IOException { + // Test uppercase 'M' for months - bin by 1 month + JSONObject monthResultM = + executeQuery( + String.format( + "source=%s | bin @timestamp span=1M | fields `@timestamp` | sort `@timestamp` |" + + " head 1", + TEST_INDEX_TIME_DATA)); + verifySchema(monthResultM, schema("@timestamp", null, "string")); + verifyDataRows(monthResultM, rows("2025-07")); + + // Test full name 'mon' for months - should produce same result as 'M' + JSONObject monthResultMon = + executeQuery( + String.format( + "source=%s | bin @timestamp span=1mon | fields `@timestamp` | sort `@timestamp` |" + + " head 1", + TEST_INDEX_TIME_DATA)); + verifySchema(monthResultMon, schema("@timestamp", null, "string")); + verifyDataRows(monthResultMon, rows("2025-07")); + } + + @Test + public void testBinWithSubsecondUnits() throws IOException { + // Test milliseconds (ms) - bin by 100 milliseconds + JSONObject msResult = + executeQuery( + String.format( + "source=%s | bin @timestamp span=100ms | fields `@timestamp` | sort `@timestamp` |" + + " head 3", + TEST_INDEX_TIME_DATA)); + verifySchema(msResult, schema("@timestamp", null, "timestamp")); + verifyDataRows( + msResult, + rows("2025-07-28 00:15:23"), + rows("2025-07-28 01:42:15"), + rows("2025-07-28 02:28:45")); + + // Test microseconds (us) - bin by 500 microseconds + JSONObject usResult = + executeQuery( + String.format( + "source=%s | bin @timestamp span=500us | fields `@timestamp` | sort `@timestamp` |" + + " head 3", + TEST_INDEX_TIME_DATA)); + verifySchema(usResult, schema("@timestamp", null, "timestamp")); + verifyDataRows( + usResult, + rows("2025-07-28 00:15:23"), + rows("2025-07-28 01:42:15"), + rows("2025-07-28 02:28:45")); + + // Test centiseconds (cs) - bin by 10 centiseconds (100ms) + JSONObject csResult = + executeQuery( + String.format( + "source=%s | bin @timestamp span=10cs | fields `@timestamp` | sort `@timestamp` |" + + " head 3", + TEST_INDEX_TIME_DATA)); + verifySchema(csResult, schema("@timestamp", null, "timestamp")); + verifyDataRows( + csResult, + rows("2025-07-28 00:15:23"), + rows("2025-07-28 01:42:15"), + rows("2025-07-28 02:28:45")); + + // Test deciseconds (ds) - bin by 5 deciseconds (500ms) + JSONObject dsResult = + executeQuery( + String.format( + "source=%s | bin @timestamp span=5ds | fields `@timestamp` | sort `@timestamp` |" + + " head 3", + TEST_INDEX_TIME_DATA)); + verifySchema(dsResult, schema("@timestamp", null, "timestamp")); + verifyDataRows( + dsResult, + rows("2025-07-28 00:15:23"), + rows("2025-07-28 01:42:15"), + rows("2025-07-28 02:28:45")); + } }