@@ -576,8 +576,19 @@ def test_athena_time_zone(glue_database):
576
576
assert df ["value" ][0 ].year == datetime .datetime .utcnow ().year
577
577
578
578
579
- @pytest .mark .xfail (raises = NotImplementedError , reason = "Unable to create pandas categorical from pyarrow table" )
580
- def test_category (path , glue_table , glue_database ):
579
+ @pytest .mark .parametrize (
580
+ "ctas_approach" ,
581
+ [
582
+ pytest .param (False ),
583
+ pytest .param (
584
+ True ,
585
+ marks = pytest .mark .xfail (
586
+ raises = NotImplementedError , reason = "Unable to create pandas categorical from pyarrow table"
587
+ ),
588
+ ),
589
+ ],
590
+ )
591
+ def test_category (path : str , glue_table : str , glue_database : str , ctas_approach : bool ) -> None :
581
592
df = get_df_category ()
582
593
wr .s3 .to_parquet (
583
594
df = df ,
@@ -588,37 +599,42 @@ def test_category(path, glue_table, glue_database):
588
599
mode = "overwrite" ,
589
600
partition_cols = ["par0" , "par1" ],
590
601
)
591
- df2 = wr .s3 .read_parquet (
592
- path = path ,
593
- dataset = True ,
594
- pyarrow_additional_kwargs = {
595
- "categories" : [c for c in df .columns if c not in ["par0" , "par1" ]],
596
- "strings_to_categorical" : True ,
597
- },
598
- )
599
- ensure_data_types_category (df2 )
600
- df2 = wr .athena .read_sql_query (f"SELECT * FROM { glue_table } " , database = glue_database , categories = list (df .columns ))
601
- ensure_data_types_category (df2 )
602
- df2 = wr .athena .read_sql_table (table = glue_table , database = glue_database , categories = list (df .columns ))
603
- ensure_data_types_category (df2 )
602
+
604
603
df2 = wr .athena .read_sql_query (
605
- f"SELECT * FROM { glue_table } " , database = glue_database , categories = list (df .columns ), ctas_approach = False
604
+ f"SELECT * FROM { glue_table } " , database = glue_database , categories = list (df .columns ), ctas_approach = ctas_approach
606
605
)
607
606
ensure_data_types_category (df2 )
608
- dfs = wr .athena .read_sql_query (
609
- f"SELECT * FROM { glue_table } " ,
607
+
608
+
609
+ @pytest .mark .parametrize (
610
+ "ctas_approach" ,
611
+ [
612
+ pytest .param (False ),
613
+ pytest .param (
614
+ True ,
615
+ marks = pytest .mark .xfail (
616
+ raises = NotImplementedError , reason = "Unable to create pandas categorical from pyarrow table"
617
+ ),
618
+ ),
619
+ ],
620
+ )
621
+ def test_category_chunked (path : str , glue_table : str , glue_database : str , ctas_approach : bool ) -> None :
622
+ df = get_df_category ()
623
+ wr .s3 .to_parquet (
624
+ df = df ,
625
+ path = path ,
626
+ dataset = True ,
610
627
database = glue_database ,
611
- categories = list ( df . columns ) ,
612
- ctas_approach = False ,
613
- chunksize = 1 ,
628
+ table = glue_table ,
629
+ mode = "overwrite" ,
630
+ partition_cols = [ "par0" , "par1" ] ,
614
631
)
615
- for df2 in dfs :
616
- ensure_data_types_category (df2 )
632
+
617
633
dfs = wr .athena .read_sql_query (
618
634
f"SELECT * FROM { glue_table } " ,
619
635
database = glue_database ,
620
636
categories = list (df .columns ),
621
- ctas_approach = True ,
637
+ ctas_approach = ctas_approach ,
622
638
chunksize = 1 ,
623
639
)
624
640
for df2 in dfs :
0 commit comments