Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
6c0f474
Interpret strings with respect to schema when conforming properties
ReubenFrankel Apr 22, 2025
a17bffb
`interpret` -> `transform`
ReubenFrankel Apr 22, 2025
cdaf620
Handle both nullable and non-nullable property schema types
ReubenFrankel Apr 22, 2025
a00f7b8
Add specific test to address incomplete patch coverage
ReubenFrankel Apr 25, 2025
6650795
Merge branch 'main' into feat/conform-primitive-interpret-string
ReubenFrankel Apr 25, 2025
cc23559
Use finite check over explicit NaN and infinite checks
ReubenFrankel Apr 25, 2025
e7396fc
Merge branch 'main' into feat/conform-primitive-interpret-string
edgarrmondragon Apr 28, 2025
f5aa93c
Merge branch 'main' into feat/conform-primitive-interpret-string
edgarrmondragon Apr 28, 2025
9c176a5
Merge branch 'main' into feat/conform-primitive-interpret-string
edgarrmondragon Apr 28, 2025
a8658f4
Merge branch 'main' into feat/conform-primitive-interpret-string
edgarrmondragon Apr 30, 2025
ae69a16
Merge branch 'main' into feat/conform-primitive-interpret-string
edgarrmondragon Apr 30, 2025
24cb6e2
Merge branch 'main' into feat/conform-primitive-interpret-string
edgarrmondragon May 5, 2025
e2aac19
Merge branch 'main' into feat/conform-primitive-interpret-string
edgarrmondragon May 6, 2025
9c7be41
Merge branch 'main' into feat/conform-primitive-interpret-string
ReubenFrankel May 12, 2025
97eed5a
Merge branch 'main' into feat/conform-primitive-interpret-string
ReubenFrankel May 12, 2025
2b14c55
Merge branch 'main' into feat/conform-primitive-interpret-string
ReubenFrankel May 23, 2025
c391924
Try using `cachetools` to cache schema type-check function calls
ReubenFrankel May 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ requires-python = ">=3.9"
dependencies = [
'backoff>=2.0.0; python_version<"4"',
'backports-datetime-fromisoformat>=2.0.1; python_version<"3.11"',
"cachetools>=5.5.2",
"click~=8.0",
"fsspec>=2024.9.0",
'importlib-metadata>=5.0; python_version<"3.12"',
Expand Down
53 changes: 50 additions & 3 deletions singer_sdk/helpers/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
import copy
import datetime
import decimal
import functools
import json
import logging
import math
import typing as t
import uuid
from enum import Enum
from functools import lru_cache

from cachetools import cached

_MAX_TIMESTAMP = "9999-12-31 23:59:59.999999"
_MAX_TIME = "23:59:59.999999"
JSONSCHEMA_ANNOTATION_SECRET = "secret" # noqa: S105
Expand All @@ -21,6 +25,11 @@
logger = logging.getLogger(__name__)


@functools.wraps
def cached_schema(): # noqa: ANN202
return cached({}, key=lambda schema: json.dumps(schema, sort_keys=True))

Check warning on line 30 in singer_sdk/helpers/_typing.py

View check run for this annotation

Codecov / codecov/patch

singer_sdk/helpers/_typing.py#L30

Added line #L30 was not covered by tests


class DatetimeErrorTreatmentEnum(Enum):
"""Enum for treatment options for date parsing error."""

Expand Down Expand Up @@ -78,6 +87,7 @@
return result


@cached_schema
def is_secret_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type definition appears to be a secret.

Expand Down Expand Up @@ -108,6 +118,7 @@
return False


@cached_schema
def is_object_type(property_schema: dict) -> bool | None:
"""Return true if the JSON Schema type is an object or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand All @@ -121,6 +132,7 @@
)


@cached_schema
def is_uniform_list(property_schema: dict) -> bool | None:
"""Return true if the JSON Schema type is an array with a single schema.

Expand All @@ -135,6 +147,7 @@
)


@cached_schema
def is_datetime_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type definition is a 'date-time' type.

Expand All @@ -154,6 +167,7 @@
raise ValueError(msg)


@cached_schema
def is_date_or_datetime_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type definition is a 'date'/'date-time' type.

Expand Down Expand Up @@ -185,6 +199,7 @@
raise ValueError(msg)


@cached_schema
def get_datelike_property_type(property_schema: dict) -> str | None:
"""Return one of 'date-time', 'time', or 'date' if property is date-like.

Expand All @@ -199,6 +214,7 @@
return None


@cached_schema
def _is_string_with_format(type_dict: dict[str, t.Any]) -> bool | None:
if "string" in type_dict.get("type", []) and type_dict.get("format") in {
"date-time",
Expand Down Expand Up @@ -235,6 +251,7 @@
raise ValueError(msg)


@cached_schema
def is_string_array_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type definition is a string array."""
if not type_dict:
Expand All @@ -253,6 +270,7 @@
return "array" in type_dict["type"] and bool(is_string_type(type_dict["items"]))


@cached_schema
def is_array_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type is an array."""
if not type_dict:
Expand All @@ -271,6 +289,7 @@
return "array" in type_dict["type"]


@cached_schema
def is_boolean_type(property_schema: dict) -> bool | None:
"""Return true if the JSON Schema type is a boolean or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand All @@ -286,6 +305,7 @@
return False


@cached_schema
def _is_exclusive_boolean_type(property_schema: dict) -> bool:
if "type" not in property_schema:
return False
Expand All @@ -297,6 +317,7 @@
)


@cached_schema
def is_integer_type(property_schema: dict) -> bool | None:
"""Return true if the JSON Schema type is an integer or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand All @@ -312,6 +333,7 @@
return False


@cached_schema
def is_string_type(property_schema: dict) -> bool | None:
"""Return true if the JSON Schema type is a string or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand All @@ -327,6 +349,7 @@
return False


@cached_schema
def is_null_type(property_schema: dict) -> bool | None:
"""Return true if the JSON Schema type is a null or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand All @@ -342,6 +365,7 @@
return False


@cached_schema
def is_number_type(property_schema: dict) -> bool | None:
"""Return true if the JSON Schema type is a number or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand Down Expand Up @@ -542,9 +566,32 @@
# for BIT value, treat 0 as False and anything else as True
return elem != b"\x00" if is_boolean_type(property_schema) else elem.hex()
if isinstance(elem, (float, decimal.Decimal)):
if math.isnan(elem) or math.isinf(elem):
return None
return elem
return elem if math.isfinite(elem) else None
if isinstance(elem, str) and not is_string_type(property_schema):
return _transform_string_property(elem, property_schema)
if _is_exclusive_boolean_type(property_schema):
return None if elem is None else elem != 0
return elem


def _transform_string_property( # noqa: PLR0911
elem: str,
property_schema: dict,
) -> t.Any: # noqa: ANN401
if not elem and is_null_type(property_schema):
return None # if nullable, None for empty string

if is_boolean_type(property_schema):
return (
elem.lower() == "true"
) # false for any non-"true" string (case-insensitive), including empty string
if is_integer_type(property_schema):
return int(elem or 0) # 0 for empty string
if is_number_type(property_schema):
d = decimal.Decimal(elem or 0) # 0 for empty string
return d if d.is_finite() else None
if is_array_type(property_schema):
return json.loads(elem) if elem else [] # empty array for empty string
if is_object_type(property_schema):
return json.loads(elem) if elem else {} # empty object for empty string
return elem
57 changes: 57 additions & 0 deletions tests/core/test_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from singer_sdk.helpers._typing import (
TypeConformanceLevel,
_conform_primitive_property,
_transform_string_property,
conform_record_data_types,
)
from singer_sdk.typing import (
Expand Down Expand Up @@ -357,12 +358,68 @@ def test_conform_object_additional_properties():
pytest.param(
decimal.Decimal("nan"), {"type": "number"}, None, id="decimal_nan_to_number"
),
pytest.param("", {"type": "string"}, "", id="string_empty_to_string"),
pytest.param(
"",
{"type": ["boolean", "null"]},
None,
id="string_empty_to_any_nullable_non_string",
),
pytest.param("true", {"type": "boolean"}, True, id="string_true_to_boolean"),
pytest.param(
"TRUE",
{"type": "boolean"},
True,
id="string_true_uppercase_to_boolean",
),
pytest.param("false", {"type": "boolean"}, False, id="string_false_to_boolean"),
pytest.param(
"something else",
{"type": "boolean"},
False,
id="string_not_true_to_boolean",
),
pytest.param("", {"type": "boolean"}, False, id="string_empty_to_boolean"),
pytest.param("3", {"type": "integer"}, 3, id="string_integer_to_integer"),
pytest.param("", {"type": "integer"}, 0, id="string_empty_to_integer"),
pytest.param(
"3.14",
{"type": "number"},
decimal.Decimal("3.14"),
id="string_float_to_number",
),
pytest.param("inf", {"type": "number"}, None, id="string_inf_to_number"),
pytest.param("nan", {"type": "number"}, None, id="string_nan_to_number"),
pytest.param(
"",
{"type": "number"},
decimal.Decimal(0),
id="string_empty_to_number",
),
pytest.param(
"[1, 2, 3]",
{"type": "array"},
[1, 2, 3],
id="string_json_array_to_array",
),
pytest.param("", {"type": "array"}, [], id="string_empty_to_array"),
pytest.param(
'{"a": 1, "b": true, "c": 3.14}',
{"type": "object"},
{"a": 1, "b": True, "c": 3.14},
id="string_json_object_to_object",
),
pytest.param("", {"type": "object"}, {}, id="string_empty_to_object"),
],
)
def test_conform_primitives(value: t.Any, type_dict: dict, expected: t.Any):
assert _conform_primitive_property(value, type_dict) == expected


def test_transform_string_to_string():
assert _transform_string_property("test", {"type": "string"}) == "test"


@pytest.mark.filterwarnings("ignore:Use `JSONSchemaToSQL` instead.:DeprecationWarning")
@pytest.mark.parametrize(
"jsonschema_type,expected",
Expand Down
Loading
Loading