Skip to content

Commit 5b20362

Browse files
fix: tests for Athena category were not executing at all (#2702)
1 parent e7ecd81 commit 5b20362

File tree

2 files changed

+65
-24
lines changed

2 files changed

+65
-24
lines changed

tests/unit/test_athena.py

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -576,8 +576,19 @@ def test_athena_time_zone(glue_database):
576576
assert df["value"][0].year == datetime.datetime.utcnow().year
577577

578578

579-
@pytest.mark.xfail(raises=NotImplementedError, reason="Unable to create pandas categorical from pyarrow table")
580-
def test_category(path, glue_table, glue_database):
579+
@pytest.mark.parametrize(
580+
"ctas_approach",
581+
[
582+
pytest.param(False),
583+
pytest.param(
584+
True,
585+
marks=pytest.mark.xfail(
586+
raises=NotImplementedError, reason="Unable to create pandas categorical from pyarrow table"
587+
),
588+
),
589+
],
590+
)
591+
def test_category(path: str, glue_table: str, glue_database: str, ctas_approach: bool) -> None:
581592
df = get_df_category()
582593
wr.s3.to_parquet(
583594
df=df,
@@ -588,37 +599,42 @@ def test_category(path, glue_table, glue_database):
588599
mode="overwrite",
589600
partition_cols=["par0", "par1"],
590601
)
591-
df2 = wr.s3.read_parquet(
592-
path=path,
593-
dataset=True,
594-
pyarrow_additional_kwargs={
595-
"categories": [c for c in df.columns if c not in ["par0", "par1"]],
596-
"strings_to_categorical": True,
597-
},
598-
)
599-
ensure_data_types_category(df2)
600-
df2 = wr.athena.read_sql_query(f"SELECT * FROM {glue_table}", database=glue_database, categories=list(df.columns))
601-
ensure_data_types_category(df2)
602-
df2 = wr.athena.read_sql_table(table=glue_table, database=glue_database, categories=list(df.columns))
603-
ensure_data_types_category(df2)
602+
604603
df2 = wr.athena.read_sql_query(
605-
f"SELECT * FROM {glue_table}", database=glue_database, categories=list(df.columns), ctas_approach=False
604+
f"SELECT * FROM {glue_table}", database=glue_database, categories=list(df.columns), ctas_approach=ctas_approach
606605
)
607606
ensure_data_types_category(df2)
608-
dfs = wr.athena.read_sql_query(
609-
f"SELECT * FROM {glue_table}",
607+
608+
609+
@pytest.mark.parametrize(
610+
"ctas_approach",
611+
[
612+
pytest.param(False),
613+
pytest.param(
614+
True,
615+
marks=pytest.mark.xfail(
616+
raises=NotImplementedError, reason="Unable to create pandas categorical from pyarrow table"
617+
),
618+
),
619+
],
620+
)
621+
def test_category_chunked(path: str, glue_table: str, glue_database: str, ctas_approach: bool) -> None:
622+
df = get_df_category()
623+
wr.s3.to_parquet(
624+
df=df,
625+
path=path,
626+
dataset=True,
610627
database=glue_database,
611-
categories=list(df.columns),
612-
ctas_approach=False,
613-
chunksize=1,
628+
table=glue_table,
629+
mode="overwrite",
630+
partition_cols=["par0", "par1"],
614631
)
615-
for df2 in dfs:
616-
ensure_data_types_category(df2)
632+
617633
dfs = wr.athena.read_sql_query(
618634
f"SELECT * FROM {glue_table}",
619635
database=glue_database,
620636
categories=list(df.columns),
621-
ctas_approach=True,
637+
ctas_approach=ctas_approach,
622638
chunksize=1,
623639
)
624640
for df2 in dfs:

tests/unit/test_s3_parquet.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
from .._utils import (
2121
assert_pandas_equals,
2222
ensure_data_types,
23+
ensure_data_types_category,
24+
get_df_category,
2325
get_df_list,
2426
is_ray_modin,
2527
to_pandas,
@@ -888,6 +890,29 @@ def test_chunked_columns(path, columns, chunked):
888890
assert df[columns].shape if columns else df.shape == df2.shape
889891

890892

893+
@pytest.mark.xfail(raises=NotImplementedError, reason="Unable to create pandas categorical from pyarrow table")
894+
def test_category_s3_read_parquet(path: str, glue_table: str, glue_database: str) -> None:
895+
df = get_df_category()
896+
wr.s3.to_parquet(
897+
df=df,
898+
path=path,
899+
dataset=True,
900+
database=glue_database,
901+
table=glue_table,
902+
mode="overwrite",
903+
partition_cols=["par0", "par1"],
904+
)
905+
df2 = wr.s3.read_parquet(
906+
path=path,
907+
dataset=True,
908+
pyarrow_additional_kwargs={
909+
"categories": [c for c in df.columns if c not in ["par0", "par1"]],
910+
"strings_to_categorical": True,
911+
},
912+
)
913+
ensure_data_types_category(df2)
914+
915+
891916
@pytest.mark.xfail(
892917
is_ray_modin,
893918
raises=TypeError,

0 commit comments

Comments
 (0)