@@ -2369,6 +2369,7 @@ def test_s3_overall_nan(bucket, database):
2369
2369
2370
2370
def test_aurora_postgres_load_varchar (bucket , postgres_parameters ):
2371
2371
df = pd .DataFrame ({"id" : [1 , 2 , 3 ], "varchar3" : ["foo" , "boo" , "bar" ], "varchar1" : ["a" , "b" , "c" ]})
2372
+ df ["varchar3" ] = df ["varchar3" ].astype ("string" )
2372
2373
path = f"s3://{ bucket } /test_aurora_postgres_load_varchar"
2373
2374
wr .pandas .to_aurora (dataframe = df ,
2374
2375
connection = "aws-data-wrangler-postgres" ,
@@ -2404,6 +2405,7 @@ def test_aurora_postgres_load_varchar(bucket, postgres_parameters):
2404
2405
2405
2406
def test_aurora_mysql_load_varchar (bucket ):
2406
2407
df = pd .DataFrame ({"id" : [1 , 2 , 3 ], "varchar3" : ["foo" , "boo" , "bar" ], "varchar1" : ["a" , "b" , "c" ]})
2408
+ df ["varchar3" ] = df ["varchar3" ].astype ("string" )
2407
2409
path = f"s3://{ bucket } /test_aurora_mysql_load_varchar"
2408
2410
wr .pandas .to_aurora (dataframe = df ,
2409
2411
connection = "aws-data-wrangler-mysql" ,
@@ -2430,3 +2432,51 @@ def test_aurora_mysql_load_varchar(bucket):
2430
2432
assert rows [1 ][2 ] == "b"
2431
2433
assert rows [2 ][2 ] == "c"
2432
2434
conn .close ()
2435
+
2436
+
2437
+ def test_to_parquet_string (bucket , database ):
2438
+ path = f"s3://{ bucket } /test_to_parquet_string"
2439
+ wr .s3 .delete_objects (path = path )
2440
+ df = pd .DataFrame ({
2441
+ "id" : [1 , 2 , 3 , 4 , 5 ],
2442
+ "c_str" : ["foo" , None , None , "bar" , None ],
2443
+ })
2444
+ df ["id" ] = df ["id" ].astype ("Int64" )
2445
+ df ["c_str" ] = df ["c_str" ].astype ("string" )
2446
+ wr .pandas .to_parquet (dataframe = df ,
2447
+ database = database ,
2448
+ path = path ,
2449
+ mode = "overwrite" ,
2450
+ preserve_index = False ,
2451
+ procs_cpu_bound = 5 ,
2452
+ inplace = False )
2453
+ sleep (15 )
2454
+ df2 = wr .pandas .read_sql_athena (database = database ,
2455
+ sql = "SELECT * FROM test_to_parquet_string ORDER BY id" ,
2456
+ ctas_approach = False )
2457
+ wr .s3 .delete_objects (path = path )
2458
+ assert df .equals (df2 )
2459
+
2460
+
2461
+ def test_to_csv_string (bucket , database ):
2462
+ path = f"s3://{ bucket } /test_to_csv_string"
2463
+ wr .s3 .delete_objects (path = path )
2464
+ df = pd .DataFrame ({
2465
+ "id" : [1 , 2 , 3 , 4 , 5 ],
2466
+ "c_str" : ["foo" , None , None , "bar" , None ],
2467
+ })
2468
+ df ["id" ] = df ["id" ].astype ("Int64" )
2469
+ df ["c_str" ] = df ["c_str" ].astype ("string" )
2470
+ wr .pandas .to_parquet (dataframe = df ,
2471
+ database = database ,
2472
+ path = path ,
2473
+ mode = "overwrite" ,
2474
+ preserve_index = False ,
2475
+ procs_cpu_bound = 5 ,
2476
+ inplace = False )
2477
+ sleep (5 )
2478
+ df2 = wr .pandas .read_sql_athena (database = database ,
2479
+ sql = "SELECT * FROM test_to_csv_string ORDER BY id" ,
2480
+ ctas_approach = False )
2481
+ wr .s3 .delete_objects (path = path )
2482
+ assert df .equals (df2 )
0 commit comments