@@ -781,16 +781,22 @@ def test_read_sql_athena_with_time_zone(session, bucket, database):
781
781
782
782
def test_normalize_columns_names_athena ():
783
783
dataframe = pandas .DataFrame ({
784
- "CammelCase " : [1 , 2 , 3 ],
784
+ "CamelCase " : [1 , 2 , 3 ],
785
785
"With Spaces" : [4 , 5 , 6 ],
786
786
"With-Dash" : [7 , 8 , 9 ],
787
787
"Ãccént" : [10 , 11 , 12 ],
788
+ "with.dot" : [10 , 11 , 12 ],
789
+ "Camel_Case2" : [13 , 14 , 15 ],
790
+ "Camel___Case3" : [16 , 17 , 18 ]
788
791
})
789
792
Pandas .normalize_columns_names_athena (dataframe = dataframe , inplace = True )
790
- assert dataframe .columns [0 ] == "cammel_case "
793
+ assert dataframe .columns [0 ] == "camel_case "
791
794
assert dataframe .columns [1 ] == "with_spaces"
792
795
assert dataframe .columns [2 ] == "with_dash"
793
796
assert dataframe .columns [3 ] == "accent"
797
+ assert dataframe .columns [4 ] == "with_dot"
798
+ assert dataframe .columns [5 ] == "camel_case2"
799
+ assert dataframe .columns [6 ] == "camel_case3"
794
800
795
801
796
802
def test_to_parquet_with_normalize (
@@ -799,11 +805,13 @@ def test_to_parquet_with_normalize(
799
805
database ,
800
806
):
801
807
dataframe = pandas .DataFrame ({
802
- "CammelCase " : [1 , 2 , 3 ],
808
+ "CamelCase " : [1 , 2 , 3 ],
803
809
"With Spaces" : [4 , 5 , 6 ],
804
810
"With-Dash" : [7 , 8 , 9 ],
805
811
"Ãccént" : [10 , 11 , 12 ],
806
812
"with.dot" : [10 , 11 , 12 ],
813
+ "Camel_Case2" : [13 , 14 , 15 ],
814
+ "Camel___Case3" : [16 , 17 , 18 ]
807
815
})
808
816
session .pandas .to_parquet (dataframe = dataframe ,
809
817
database = database ,
@@ -818,11 +826,57 @@ def test_to_parquet_with_normalize(
818
826
sleep (2 )
819
827
assert len (dataframe .index ) == len (dataframe2 .index )
820
828
assert (len (list (dataframe .columns )) + 1 ) == len (list (dataframe2 .columns ))
821
- assert dataframe2 .columns [0 ] == "cammel_case "
829
+ assert dataframe2 .columns [0 ] == "camel_case "
822
830
assert dataframe2 .columns [1 ] == "with_spaces"
823
831
assert dataframe2 .columns [2 ] == "with_dash"
824
832
assert dataframe2 .columns [3 ] == "accent"
825
833
assert dataframe2 .columns [4 ] == "with_dot"
834
+ assert dataframe2 .columns [5 ] == "camel_case2"
835
+ assert dataframe2 .columns [6 ] == "camel_case3"
836
+
837
+
838
+ def test_to_parquet_with_normalize_and_cast (
839
+ session ,
840
+ bucket ,
841
+ database ,
842
+ ):
843
+ dataframe = pandas .DataFrame ({
844
+ "CamelCase" : [1 , 2 , 3 ],
845
+ "With Spaces" : [4 , 5 , 6 ],
846
+ "With-Dash" : [7 , 8 , 9 ],
847
+ "Ãccént" : [10 , 11 , 12 ],
848
+ "with.dot" : [10 , 11 , 12 ],
849
+ "Camel_Case2" : [13 , 14 , 15 ],
850
+ "Camel___Case3" : [16 , 17 , 18 ]
851
+ })
852
+ session .pandas .to_parquet (dataframe = dataframe ,
853
+ database = database ,
854
+ path = f"s3://{ bucket } /TestTable-with.dot/" ,
855
+ mode = "overwrite" ,
856
+ partition_cols = ["CamelCase" ],
857
+ cast_columns = {
858
+ "Camel_Case2" : "double" ,
859
+ "Camel___Case3" : "float"
860
+ })
861
+ dataframe2 = None
862
+ for counter in range (10 ):
863
+ dataframe2 = session .pandas .read_sql_athena (
864
+ sql = "select * from test_table_with_dot" , database = database )
865
+ if len (dataframe .index ) == len (dataframe2 .index ):
866
+ break
867
+ sleep (2 )
868
+ assert len (dataframe .index ) == len (dataframe2 .index )
869
+ assert (len (list (dataframe .columns )) + 1 ) == len (list (dataframe2 .columns ))
870
+ assert dataframe2 .columns [0 ] == "with_spaces"
871
+ assert dataframe2 .columns [1 ] == "with_dash"
872
+ assert dataframe2 .columns [2 ] == "accent"
873
+ assert dataframe2 .columns [3 ] == "with_dot"
874
+ assert dataframe2 .columns [4 ] == "camel_case2"
875
+ assert dataframe2 .columns [5 ] == "camel_case3"
876
+ assert dataframe2 .columns [6 ] == "__index_level_0__"
877
+ assert dataframe2 .columns [7 ] == "camel_case"
878
+ assert dataframe2 [dataframe2 .columns [4 ]].dtype == "float64"
879
+ assert dataframe2 [dataframe2 .columns [5 ]].dtype == "float64"
826
880
827
881
828
882
def test_drop_duplicated_columns ():
0 commit comments