diff --git a/ibis/backends/sql/compilers/base.py b/ibis/backends/sql/compilers/base.py index 521c22a7b6de..6250e0427189 100644 --- a/ibis/backends/sql/compilers/base.py +++ b/ibis/backends/sql/compilers/base.py @@ -1599,6 +1599,27 @@ def visit_DropColumns(self, op, *, parent, columns_to_drop): ) return sg.select(*columns_to_keep).from_(parent) + def visit_TemplateSQL( + self, + op: ops.TemplateSQL, + *, + strings: tuple[str], + values: tuple[sge.Expression], + dialect: str, + ): + def iter(): + for s, i in itertools.zip_longest(strings, values): + if s: + yield s + if i: + yield i + + str_parts = [ + part if isinstance(part, str) else part.sql(dialect) for part in iter() + ] + sql = "".join(str_parts) + return sg.parse_one(sql, read=dialect) + def add_query_to_expr(self, *, name: str, table: ir.Table, query: str) -> str: dialect = self.dialect diff --git a/ibis/backends/tests/test_template.py b/ibis/backends/tests/test_template.py new file mode 100644 index 000000000000..a110f330e85e --- /dev/null +++ b/ibis/backends/tests/test_template.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +import datetime +import zoneinfo + +import pytest + +import ibis +from ibis.tests.tstring import t + +tm = pytest.importorskip("pandas.testing") + +five = ibis.literal(5) +world = ibis.literal("world") + + +@pytest.mark.notimpl(["polars"]) +@pytest.mark.parametrize( + ("template", "expected_result"), + [ + (t("{five} + 3"), 8), + (t("{five:.2f} + 3"), 8), # format strings ignored + (t("'hello ' || {world}"), "hello world"), + (t("'hello ' || {world!r}"), "hello world"), # conversion strings ignored + ], +) +def test_scalar(con, template, expected_result): + """Test that scalar template expressions execute correctly.""" + expr = ibis.sql_value(template) + result = con.execute(expr) + assert result == expected_result + + +@pytest.mark.xfail( + reason="sqlglot hasn't implemented inferring the dtype from this complex expression" +) +def test_complex_timestamp(): + # parse a UTC timestamp into alaska local time, eg "8/1/2024 21:44:00" into 2024-08-01 13:44:00 (8 hours before UTC). + con = ibis.duckdb.connect() + timestamp = ibis.timestamp("2024-08-01 21:44:00") # noqa: F841 + in_ak_time = ibis.sql_value(t("{timestamp} AT TIME ZONE 'America/Anchorage'")) + result = con.execute(in_ak_time) + expected = datetime.datetime( + 2024, 8, 1, 13, 44, 0, tzinfo=zoneinfo.ZoneInfo("America/Anchorage") + ) + assert result == expected + + +@pytest.mark.notimpl(["polars"]) +def test_column(con, alltypes): + """Test template with column interpolation.""" + c = alltypes.int_col # noqa: F841 + template = t("{c + 2} - 1") + expr = ibis.sql_value(template) + result = con.execute(expr) + expected = con.execute(alltypes.int_col + 1) + tm.assert_series_equal(result, expected, check_names=False) + + +def test_dialect(): + pa = pytest.importorskip("pyarrow") + five = ibis.literal(5) # noqa: F841 + template = t("CAST({five} AS REAL)") + + expr_sqlite = ibis.sql_value(template, dialect="sqlite") + expr_default = ibis.sql_value(template) + + con_sqlite = ibis.sqlite.connect() + result = con_sqlite.to_pyarrow(expr_default) + assert result.type == pa.float32() + assert result.as_py() == 5.0 + result = con_sqlite.to_pyarrow(expr_sqlite) + assert result.type == pa.float64() + assert result.as_py() == 5.0 + + con_duckdb = ibis.duckdb.connect() + result = con_duckdb.to_pyarrow(expr_default) + assert result.type == pa.float32() + assert result.as_py() == 5.0 + result = con_duckdb.to_pyarrow(expr_sqlite) + assert result.type == pa.float64() + assert result.as_py() == 5.0 diff --git a/ibis/expr/api.py b/ibis/expr/api.py index b5cbafe7a116..226ae34083af 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -25,6 +25,7 @@ from ibis.common.temporal import normalize_datetime, normalize_timezone from ibis.expr.datatypes import DataType from ibis.expr.decompile import decompile +from ibis.expr.operations.template import IntoInterpolation, IntoTemplate from ibis.expr.schema import Schema from ibis.expr.sql import parse_sql, to_sql from ibis.expr.types import ( @@ -62,6 +63,8 @@ "DataType", "Deferred", "Expr", + "IntoInterpolation", + "IntoTemplate", "Scalar", "Schema", "Table", @@ -120,6 +123,7 @@ "schema", "selectors", "set_backend", + "sql_value", "struct", "table", "time", @@ -594,6 +598,85 @@ def _deferred_method_call(expr, method_name, **kwargs): return method(value) +def sql_value(template: IntoTemplate, /, *, dialect: str | None = None) -> ir.Value: + """Create an ibis value from a t-string. + + t-strings, or Template Strings, were added as builtin syntax in Python 3.14. + See https://docs.python.org/3.14/library/string.templatelib.html + for more information. + + This function allows you to create an ibis value expression from a t-string. + It does NOT support generic SELECT statements, only expressions that + represent a single value. + + Parameters + ---------- + template + The template to use for creating the SQL expression. + dialect + The SQL dialect to use for the expression. + Defaults to "duckdb". + + Returns + ------- + ValueExpr + An ibis ValueExpr. + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> con = ibis.duckdb.connect() + >>> table = con.create_table("my_table", {"a": [1, 2, 3], "b": [4, 5, 6]}) + + If you are using python 3.14+, you can replace the lines + below with `template = t"{table.b} + 3 - {table.a / 10}"`. + Here, since we are testing on older versions, + we use a tiny implementation of t-strings included in ibis that works as a replacement. + If you are on python < 3.14, you should use a backport such as + https://pypi.org/project/tstrings-backport and do `from tstrings import t`. + + >>> from ibis.tests.tstring import t + >>> template = t("{table.b} + 3 - {table.a / 10}") + + Now create an ibis expression based on this. + + >>> expr = ibis.sql_value(template) + >>> print(expr.to_sql()) + SELECT + "t0"."b" + 3 - "t0"."a" / 10 AS "TemplateSQL((), (b, Divide(a, 10)))" + FROM "memory"."main"."my_table" AS "t0" + >>> table.mutate(expr=expr, s=expr.cast(str) + "!") + ┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ + ┃ a ┃ b ┃ expr ┃ s ┃ + ┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ + │ int64 │ int64 │ float64 │ string │ + ├───────┼───────┼─────────┼────────┤ + │ 1 │ 4 │ 6.9 │ 6.9! │ + │ 2 │ 5 │ 7.8 │ 7.8! │ + │ 3 │ 6 │ 8.7 │ 8.7! │ + └───────┴───────┴─────────┴────────┘ + + You can provide a `dialect` parameter if you pass in a template written in + a specific SQL dialect, and then this will be transpiled to + the correct dialect upon execution. + + For example, write a template in sqlite syntax (with datatype REAL) + and then execute it on duckdb (where REAL will be interpreted as DOUBLE). + + >>> template = t("CAST({table.a} AS REAL)") + >>> expr = ibis.sql_value(template, dialect="sqlite") + >>> arr = con.to_pyarrow(expr) + >>> arr.type + DataType(double) + >>> arr.to_pylist() + [1.0, 2.0, 3.0] + """ + from ibis.expr.operations.template import TemplateSQL + + return TemplateSQL.from_template(template, dialect=dialect).to_expr() + + def desc(expr: ir.Column | str, /, *, nulls_first: bool = False) -> ir.Value: """Create a descending sort key from `expr` or column name. diff --git a/ibis/expr/operations/__init__.py b/ibis/expr/operations/__init__.py index ba89a1d4d2d4..3d9ed86fbfea 100644 --- a/ibis/expr/operations/__init__.py +++ b/ibis/expr/operations/__init__.py @@ -16,6 +16,7 @@ from ibis.expr.operations.strings import * # noqa: F403 from ibis.expr.operations.structs import * # noqa: F403 from ibis.expr.operations.subqueries import * # noqa: F403 +from ibis.expr.operations.template import TemplateSQL # noqa: F401 from ibis.expr.operations.temporal import * # noqa: F403 from ibis.expr.operations.temporal_windows import * # noqa: F403 from ibis.expr.operations.udf import * # noqa: F403 diff --git a/ibis/expr/operations/template.py b/ibis/expr/operations/template.py new file mode 100644 index 000000000000..cfd8afc215d9 --- /dev/null +++ b/ibis/expr/operations/template.py @@ -0,0 +1,127 @@ +"""Operations for template strings (t-strings).""" + +from __future__ import annotations + +from itertools import zip_longest +from typing import TYPE_CHECKING, Optional, Protocol + +import sqlglot as sg +import sqlglot.expressions as sge +from public import public +from sqlglot.optimizer.annotate_types import annotate_types +from typing_extensions import runtime_checkable + +import ibis.expr.datashape as ds +import ibis.expr.datatypes as dt +import ibis.expr.rules as rlz +from ibis.common.annotations import attribute +from ibis.common.typing import VarTuple # noqa: TC001 +from ibis.expr.operations.core import Value + +if TYPE_CHECKING: + from collections.abc import Iterator + + from ibis.backends.sql.datatypes import SqlglotType + from ibis.expr.operations.relations import Relation + from ibis.expr.types.generic import Value as ExprValue + + +@runtime_checkable +class IntoInterpolation(Protocol): + """Protocol for an object that can be interpreted as a PEP 750 t-string Interpolation.""" + + value: ExprValue + expression: str + + +@runtime_checkable +class IntoTemplate(Protocol): + """Protocol for an object that can be interpreted as a PEP 750 t-string Template.""" + + strings: tuple[str, ...] + interpolations: tuple[IntoInterpolation, ...] + + +@public +class TemplateSQL(Value): + strings: VarTuple[str] + values: VarTuple[Value] + dialect: Optional[str] = None + """The SQL dialect the template was written in. + + eg if t'CAST({val} AS REAL)', you should use 'sqlite', + since REAL is a sqlite-specific concept. + """ + + def __init__(self, strings, values, dialect: str | None = None): + super().__init__(strings=strings, values=values, dialect=dialect or "duckdb") + if self.dtype.is_unknown(): + raise TypeError( + f"Could not infer the dtype of the template expression with sql:\n{self.sql_for_inference}" + ) + + @classmethod + def from_template( + cls, template: IntoTemplate, /, *, dialect: str | None = None + ) -> TemplateSQL: + return cls( + strings=template.strings, + values=[interp.value for interp in template.interpolations], + dialect=dialect, + ) + + @attribute + def shape(self): + if not self.values: + return ds.scalar + return rlz.highest_precedence_shape(self.values) + + @attribute + def dtype(self) -> dt.DataType: + parsed = sg.parse_one(self.sql_for_inference, dialect=self.dialect) + annotated = annotate_types(parsed, dialect=self.dialect) + sqlglot_type = annotated.type + return self.type_mapper.to_ibis(sqlglot_type) + + @attribute + def relations(self) -> frozenset[Relation]: + children = (n.relations for n in self.values) + return frozenset().union(*children) + + @property + def sql_for_inference(self) -> str: + parts: list[str] = [] + for part in self.parts: + if isinstance(part, str): + parts.append(part) + else: + ibis_type: dt.DataType = part.dtype + null_sqlglot_value = sge.cast( + sge.null(), self.type_mapper.from_ibis(ibis_type) + ) + parts.append(null_sqlglot_value.sql(self.dialect)) + return "".join(parts) + + @property + def type_mapper(self) -> SqlglotType: + return get_type_mapper(self.dialect) + + @property + def parts(self): + def iter() -> Iterator[str | Value]: + for s, i in zip_longest(self.strings, self.values): + if s: + yield s + if i: + yield i + + return tuple(iter()) + + +def get_type_mapper(dialect: str | None) -> SqlglotType: + """Get the type mapper for the given SQL dialect.""" + import importlib + + module = importlib.import_module(f"ibis.backends.sql.compilers.{dialect}") + compiler_instance = module.compiler + return compiler_instance.type_mapper diff --git a/ibis/expr/operations/tests/test_template.py b/ibis/expr/operations/tests/test_template.py new file mode 100644 index 000000000000..b0895f134bed --- /dev/null +++ b/ibis/expr/operations/tests/test_template.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import pytest + +import ibis +import ibis.expr.datatypes as dt +from ibis.expr.operations.template import TemplateSQL +from ibis.tests.tstring import t + + +@pytest.mark.parametrize( + "five", + [ + pytest.param(5, id="int"), + pytest.param(ibis.literal(5), id="literal"), + pytest.param(ibis.literal(5).op(), id="value"), + ], +) +def test_scalar(five): # noqa: ARG001 + template = t("{five} + 4") + op = TemplateSQL.from_template(template) + assert op.dialect == "duckdb" + assert op.shape.is_scalar() + assert op.dtype == dt.int32 + + +def test_column(): + col = ibis.memtable({"c": ["a", "b"]}).c # noqa: F841 + template = t("{col} || 'c'") + op = TemplateSQL.from_template(template) + assert op.dialect == "duckdb" + assert op.shape.is_columnar() + assert op.dtype == dt.string + + +def test_dialect(): + # When parsed in sqlite dialect, REAL is interpreted as float64, + # in default duckdb dialect, REAL is interpreted as float32 + five = ibis.literal(5) # noqa: F841 + template = t("CAST({five} AS REAL)") + + op = TemplateSQL.from_template(template, dialect="sqlite") + assert op.dialect == "sqlite" + assert op.shape.is_scalar() + assert op.dtype == dt.float64 + + op = TemplateSQL.from_template(template) + assert op.dialect == "duckdb" + assert op.shape.is_scalar() + assert op.dtype == dt.float32 + + +def test_no_interpolations(): + template = t("5 + 4") + op = TemplateSQL.from_template(template) + assert op.dialect == "duckdb" + assert op.shape.is_scalar() + assert op.dtype == dt.int32 + + +def test_select_errors(): + five = ibis.literal(5) # noqa: F841 + template = t("SELECT {five}") + with pytest.raises(TypeError, match=r".*SELECT CAST\(NULL AS TINYINT\)"): + TemplateSQL.from_template(template) + + +def test_api(): + five = ibis.literal(5) # noqa: F841 + template = t("{five} + 4") + expr = ibis.sql_value(template) + assert isinstance(expr, ibis.Value) + assert expr.type().is_integer() + assert expr.type().nullable + + +def test_name(): + five = ibis.literal(5) # noqa: F841 + template = t("{five} + 4") + expr = ibis.sql_value(template) + actual = expr.get_name() + assert actual + # explicitly not tested + # expected_name = "TemplateSQL((), (5,))" + # assert actual == expected_name diff --git a/ibis/tests/tstring.py b/ibis/tests/tstring.py new file mode 100644 index 000000000000..0db517f86dae --- /dev/null +++ b/ibis/tests/tstring.py @@ -0,0 +1,243 @@ +"""A Backport of PEP 750 Template Strings (t-strings).""" + +from __future__ import annotations + +import re +import sys +from dataclasses import dataclass +from itertools import zip_longest +from typing import TYPE_CHECKING, Literal, NoReturn + +if TYPE_CHECKING: + from collections.abc import Iterator + +__all__ = [ + "Interpolation", + "Template", + "t", +] + +# Regex to find and parse an f-string-like interpolation. +# It captures: +# 1. The main expression. +# 2. An optional debug specifier (=). +# 3. An optional conversion specifier (!r, !s, or !a). +# 4. An optional format specifier (:...). +_INTERPOLATION_RE = re.compile( + r""" + \{ + # The core expression, non-greedy + (?P.+?) + # Optional debug specifier + (?P=)? + # Optional conversion, one of !r, !s, or !a + (?P![rsa])? + # Optional format spec, starting with a colon, non-greedy until } + (?P:[^}]*)? + } + """, + re.VERBOSE | re.DOTALL, +) + +if sys.version_info >= (3, 10): + dataclass_extra_args = {"slots": True} +else: + dataclass_extra_args = {} + + +@dataclass(frozen=True, eq=False, **dataclass_extra_args) +class Interpolation: + """Emulates the string.templatelib.Interpolation class from PEP 750. + + Represents an expression inside a template string. + """ + + value: object + expression: str + conversion: Literal["a", "r", "s"] | None = None + format_spec: str = "" + + def __eq__(self, value: object) -> bool: + """Template and Interpolation instances compare with object identity (is).""" + return self is value + + def __hash__(self) -> int: + """Hash based on identity.""" + return id(self) + + +@dataclass(frozen=True, eq=False, **dataclass_extra_args) +class Template: + """Emulates the string.templatelib.Template class from PEP 750. + + Represents a parsed t-string literal. + """ + + strings: tuple[str, ...] + """ + A non-empty tuple of the string parts of the template, + with N+1 items, where N is the number of interpolations + in the template. + """ + interpolations: tuple[Interpolation, ...] + """ + A tuple of the interpolation parts of the template. + This will be an empty tuple if there are no interpolations. + """ + + @property + def values(self) -> tuple[object, ...]: + """A tuple of the `value` attributes of each Interpolation in the template. + + This will be an empty tuple if there are no interpolations. + """ + return tuple(interp.value for interp in self.interpolations) + + def __iter__(self) -> Iterator[str | Interpolation]: + """Iterate over the string parts and interpolations in the template. + + These may appear in any order. Empty strings will not be included. + """ + for s, i in zip_longest(self.strings, self.interpolations): + if s: + yield s + if i: + yield i + + def __add__(self, other: Template) -> Template: + """Adds two templates together.""" + # lazy duck-typing isinstance check + if not hasattr(other, "strings") or not hasattr(other, "interpolations"): + return NotImplemented + *first, final = self.strings + other_first, *other_rest = other.strings + return self.__class__( + strings=(*first, final + other_first, *other_rest), + interpolations=self.interpolations + other.interpolations, + ) + + def __eq__(self, value: object) -> bool: + """Template and Interpolation instances compare with object identity (is).""" + return self is value + + def __hash__(self) -> int: + """Hash based on identity.""" + return id(self) + + def __str__(self) -> NoReturn: + """Explicitly disallowed.""" + raise TypeError("Template instances cannot be converted to strings directly.") + + +def t(template_string: str, /) -> Template: + """Emulates a PEP 750 t-string literal for Python < 3.14. + + This function parses a string with f-string-like syntax and returns + a `Template` object, correctly evaluating expressions in the caller's + scope. + + Args: + template_string: The string to parse, e.g., "Hello {name!r}". + + Returns: + A `Template` instance containing the parsed static strings and + evaluated interpolations. + + Example: + >>> temp, unit = 22.43, "C" + >>> template = t("Temperature: {temp:.1f} degrees {unit!s}") + >>> template.strings + ('Temperature: ', ' degrees ', '') + >>> len(template.interpolations) + 2 + >>> template.interpolations[0] + Interpolation(value=22.43, expression='temp', conversion=None, format_spec='.1f') + >>> template.interpolations[1] + Interpolation(value='C', expression='unit', conversion='s', format_spec='') + """ + # Get the execution frame of the caller to evaluate expressions in their scope. + # sys._getframe(0) is the frame of t() + # sys._getframe(1) is the frame of the caller of t() + caller_frame = sys._getframe(1) + caller_globals = caller_frame.f_globals + caller_locals = caller_frame.f_locals + + strings = [] + interpolations = [] + last_end = 0 + + for match in _INTERPOLATION_RE.finditer(template_string): + # Add the static string part before this interpolation + strings.append(template_string[last_end : match.start()]) + last_end = match.end() + + groups = match.groupdict() + + # The debug specifier is syntactic sugar. It modifies both the + # preceding string part and the interpolation itself. + if groups["debug"]: + # t'{value=}' becomes t'value={value!r}' + # t'{value=:fmt}' becomes t'value={value!s:fmt}' + + # Find the position of the '=' in the original match string + # so we can split the expression and the '=' (with whitespace) + expr_with_possible_ws = groups["expression"] + # Find the '=' at the end (possibly with whitespace before/after) + eq_index = expr_with_possible_ws.rfind("=") + if eq_index != -1: + expr_for_static = expr_with_possible_ws[: eq_index + 1] + # Remove trailing whitespace and the '=' for evaluation + expr_for_eval = expr_with_possible_ws[:eq_index] + # Strip all whitespace from both ends for evaluation + expr_for_eval = expr_for_eval.strip() + # Remove any trailing '=' if present (shouldn't be, but for safety) + if expr_for_eval.endswith("="): + expr_for_eval = expr_for_eval[:-1].rstrip() + else: + expr_for_static = expr_with_possible_ws + "=" + expr_for_eval = expr_with_possible_ws.strip() + + # Prepend 'expression=' (with whitespace) to the *current* static string. + strings[-1] += expr_for_static + + # For debug specifier, strip trailing '=' and whitespace for evaluation + # (already done above) + + if groups["conversion"]: + raise SyntaxError("f-string: cannot specify both conversion and '='") + + # If a format spec is present, conversion becomes 's'. Otherwise, 'r'. + conv_char = "s" if groups["format_spec"] else "r" + expression_to_eval = expr_for_eval + else: + conv_char = groups["conversion"][1] if groups["conversion"] else None + expression_to_eval = groups["expression"] + + fmt_spec = groups["format_spec"][1:] if groups["format_spec"] else "" + + # Dedent multiline expressions for evaluation + import textwrap + + expr_eval_str = textwrap.dedent(expression_to_eval) + + # Evaluate the expression to get its value using the caller's context + try: + value = eval(expr_eval_str, caller_globals, caller_locals) # noqa: S307 + except Exception as e: + # Re-raise with more context + msg = f"Failed to evaluate expression '{expression_to_eval}': {e}" + raise type(e)(msg) from e + + interpolations.append( + Interpolation( + value=value, + expression=expression_to_eval, + conversion=conv_char, + format_spec=fmt_spec, + ) + ) + + # Add the final static string part after the last interpolation + strings.append(template_string[last_end:]) + + return Template(strings=tuple(strings), interpolations=tuple(interpolations))