Skip to content

#1820 Add Geography/Geometry type #2224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions pyiceberg/avro/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,16 @@ def skip(self, decoder: BinaryDecoder) -> None:
decoder.skip_bytes()


class GeographyReader(BinaryReader):
"""Reads a geography from the stream.
"""


class GeometryReader(BinaryReader):
"""Reads a geometry from the stream.
"""


@dataclass(frozen=True, init=False)
class DecimalReader(Reader):
"""Reads a value as a decimal.
Expand Down
24 changes: 24 additions & 0 deletions pyiceberg/avro/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
DoubleReader,
FixedReader,
FloatReader,
GeographyReader,
GeometryReader,
IntegerReader,
ListReader,
MapReader,
Expand All @@ -60,6 +62,8 @@
DoubleWriter,
FixedWriter,
FloatWriter,
GeographyWriter,
GeometryWriter,
IntegerWriter,
ListWriter,
MapWriter,
Expand Down Expand Up @@ -96,6 +100,8 @@
FloatType,
IcebergType,
IntegerType,
GeographyType,
GeometryType,
ListType,
LongType,
MapType,
Expand Down Expand Up @@ -181,6 +187,12 @@ def visit_float(self, float_type: FloatType) -> Writer:
def visit_double(self, double_type: DoubleType) -> Writer:
return DoubleWriter()

def visit_geography(self, geography_type: GeographyType) -> Writer:
return GeographyWriter()

def visit_geometry(self, geometry_type: GeometryType) -> Writer:
return GeometryWriter()

def visit_date(self, date_type: DateType) -> Writer:
return DateWriter()

Expand Down Expand Up @@ -338,6 +350,12 @@ def visit_decimal(self, decimal_type: DecimalType, partner: Optional[IcebergType
def visit_date(self, date_type: DateType, partner: Optional[IcebergType]) -> Writer:
return DateWriter()

def visit_geography(self, geography_type: GeographyType, partner: Optional[IcebergType]) -> Writer:
return GeographyWriter()

def visit_geometry(self, geometry_type: GeometryType, partner: Optional[IcebergType]) -> Writer:
return GeometryWriter()

def visit_time(self, time_type: TimeType, partner: Optional[IcebergType]) -> Writer:
return TimeWriter()

Expand Down Expand Up @@ -504,6 +522,12 @@ def visit_fixed(self, fixed_type: FixedType, partner: Optional[IcebergType]) ->
def visit_binary(self, binary_type: BinaryType, partner: Optional[IcebergType]) -> Reader:
return BinaryReader()

def visit_geography(self, geography_type: GeographyType, partner: Optional[IcebergType]) -> Reader:
return GeographyReader()

def visit_geometry(self, geometry_type: GeometryType, partner: Optional[IcebergType]) -> Reader:
return GeometryReader()

def visit_unknown(self, unknown_type: UnknownType, partner: Optional[IcebergType]) -> Reader:
return UnknownReader()

Expand Down
12 changes: 12 additions & 0 deletions pyiceberg/avro/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,18 @@ def write(self, encoder: BinaryEncoder, val: Any) -> None:
encoder.write_bytes(val)


@dataclass(frozen=True)
class GeographyWriter(Writer):
def write(self, encoder: BinaryEncoder, val: Any) -> None:
encoder.write_bytes(val)


@dataclass(frozen=True)
class GeometryWriter(Writer):
def write(self, encoder: BinaryEncoder, val: Any) -> None:
encoder.write_bytes(val)


@dataclass(frozen=True)
class DecimalWriter(Writer):
precision: int = dataclassfield()
Expand Down
20 changes: 20 additions & 0 deletions pyiceberg/conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
DoubleType,
FixedType,
FloatType,
GeometryType,
GeographyType,
IntegerType,
LongType,
PrimitiveType,
Expand Down Expand Up @@ -167,6 +169,8 @@ def _(_: UUIDType, value_str: str) -> uuid.UUID:

@partition_to_py.register(FixedType)
@partition_to_py.register(BinaryType)
@partition_to_py.register(GeographyType)
@partition_to_py.register(GeometryType)
@handle_none
def _(_: PrimitiveType, value_str: str) -> bytes:
return bytes(value_str, UTF8)
Expand Down Expand Up @@ -275,6 +279,8 @@ def _(_: UUIDType, value: Union[uuid.UUID, bytes]) -> bytes:

@to_bytes.register(BinaryType)
@to_bytes.register(FixedType)
@to_bytes.register(GeographyType)
@to_bytes.register(GeometryType)
def _(_: PrimitiveType, value: bytes) -> bytes:
return value

Expand Down Expand Up @@ -355,6 +361,8 @@ def _(_: StringType, b: bytes) -> str:

@from_bytes.register(BinaryType)
@from_bytes.register(FixedType)
@from_bytes.register(GeographyType)
@from_bytes.register(GeometryType)
@from_bytes.register(UUIDType)
def _(_: PrimitiveType, b: bytes) -> bytes:
return b
Expand Down Expand Up @@ -453,6 +461,8 @@ def _(t: FixedType, b: bytes) -> str:


@to_json.register(BinaryType)
@to_json.register(GeographyType)
@to_json.register(GeometryType)
def _(_: BinaryType, b: bytes) -> str:
"""Python bytes serializes into hexadecimal encoded string."""
return codecs.encode(b, "hex").decode(UTF8)
Expand Down Expand Up @@ -580,6 +590,16 @@ def _(_: BinaryType, val: Union[bytes, str]) -> bytes:
return val


@from_json.register(GeographyType)
def _(_: GeographyType, val: bytes) -> bytes:
return val


@from_json.register(GeometryType)
def _(_: GeometryType, val: bytes) -> bytes:
return val


@from_json.register(DecimalType)
def _(_: DecimalType, val: str) -> Decimal:
"""Convert JSON string into a Python Decimal."""
Expand Down
14 changes: 14 additions & 0 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@
DoubleType,
FixedType,
FloatType,
GeographyType,
GeometryType,
IcebergType,
IntegerType,
ListType,
Expand Down Expand Up @@ -754,6 +756,12 @@ def visit_unknown(self, _: UnknownType) -> pa.DataType:
def visit_binary(self, _: BinaryType) -> pa.DataType:
return pa.large_binary()

def visit_geography(self, _: GeographyType) -> pa.DataType:
return pa.large_binary()

def visit_geometry(self, _: GeometryType) -> pa.DataType:
return pa.large_binary()


def _convert_scalar(value: Any, iceberg_type: IcebergType) -> pa.scalar:
if not isinstance(iceberg_type, PrimitiveType):
Expand Down Expand Up @@ -1977,6 +1985,12 @@ def visit_uuid(self, uuid_type: UUIDType) -> str:
def visit_binary(self, binary_type: BinaryType) -> str:
return "BYTE_ARRAY"

def visit_geography(self, geography_type: GeographyType) -> str:
return "BYTE_ARRAY"

def visit_geometry(self, geometry_type: GeometryType) -> str:
return "BYTE_ARRAY"

def visit_unknown(self, unknown_type: UnknownType) -> str:
return "UNKNOWN"

Expand Down
30 changes: 30 additions & 0 deletions pyiceberg/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
DoubleType,
FixedType,
FloatType,
GeographyType,
GeometryType,
IcebergType,
IntegerType,
ListType,
Expand Down Expand Up @@ -553,6 +555,10 @@ def primitive(self, primitive: PrimitiveType, primitive_partner: Optional[P]) ->
return self.visit_fixed(primitive, primitive_partner)
elif isinstance(primitive, BinaryType):
return self.visit_binary(primitive, primitive_partner)
elif isinstance(primitive, GeographyType):
return self.visit_geography(primitive, primitive_partner)
elif isinstance(primitive, GeometryType):
return self.visit_geometry(primitive, primitive_partner)
elif isinstance(primitive, UnknownType):
return self.visit_unknown(primitive, primitive_partner)
else:
Expand Down Expand Up @@ -622,6 +628,14 @@ def visit_fixed(self, fixed_type: FixedType, partner: Optional[P]) -> T:
def visit_binary(self, binary_type: BinaryType, partner: Optional[P]) -> T:
"""Visit a BinaryType."""

@abstractmethod
def visit_geography(self, date_type: GeographyType, partner: Optional[P]) -> T:
"""Visit a GeographyType."""

@abstractmethod
def visit_geometry(self, date_type: GeometryType, partner: Optional[P]) -> T:
"""Visit a GeometryType."""

@abstractmethod
def visit_unknown(self, unknown_type: UnknownType, partner: Optional[P]) -> T:
"""Visit a UnknownType."""
Expand Down Expand Up @@ -747,6 +761,10 @@ def primitive(self, primitive: PrimitiveType) -> T:
return self.visit_uuid(primitive)
elif isinstance(primitive, BinaryType):
return self.visit_binary(primitive)
elif isinstance(primitive, GeographyType):
return self.visit_geography(primitive)
elif isinstance(primitive, GeometryType):
return self.visit_geometry(primitive)
elif isinstance(primitive, UnknownType):
return self.visit_unknown(primitive)
else:
Expand Down Expand Up @@ -816,6 +834,14 @@ def visit_uuid(self, uuid_type: UUIDType) -> T:
def visit_binary(self, binary_type: BinaryType) -> T:
"""Visit a BinaryType."""

@abstractmethod
def visit_geography(self, geography_type: GeographyType) -> T:
"""Visit a GeographyType."""

@abstractmethod
def visit_geometry(self, geometry_type: GeometryType) -> T:
"""Visit a GeometryType."""

@abstractmethod
def visit_unknown(self, unknown_type: UnknownType) -> T:
"""Visit a UnknownType."""
Expand Down Expand Up @@ -1645,9 +1671,13 @@ def _(file_type: StringType, read_type: IcebergType) -> IcebergType:


@promote.register(BinaryType)
@promote.register(GeographyType)
@promote.register(GeometryType)
def _(file_type: BinaryType, read_type: IcebergType) -> IcebergType:
if isinstance(read_type, StringType):
return read_type
elif isinstance(read_type, (GeographyType, GeometryType)):
return read_type
else:
raise ResolveError(f"Cannot promote an binary to {read_type}")

Expand Down
4 changes: 3 additions & 1 deletion pyiceberg/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
DateType,
DecimalType,
FixedType,
GeographyType,
GeometryType,
IcebergType,
IntegerType,
LongType,
Expand Down Expand Up @@ -372,7 +374,7 @@ def hash_func(v: Any) -> int:
def hash_func(v: Any) -> int:
return mmh3.hash(decimal_to_bytes(v))

elif isinstance(source, (StringType, FixedType, BinaryType)):
elif isinstance(source, (StringType, FixedType, BinaryType, GeographyType, GeometryType)):

def hash_func(v: Any) -> int:
return mmh3.hash(v)
Expand Down
Loading
Loading