|
1 | 1 | import json
|
2 | 2 | import logging
|
3 | 3 | from datetime import date, datetime
|
| 4 | +from decimal import Decimal |
4 | 5 |
|
5 | 6 | import pytest
|
6 | 7 | import boto3
|
@@ -421,3 +422,90 @@ def test_connection_with_different_port_types(redshift_parameters):
|
421 | 422 | password=redshift_parameters.get("RedshiftPassword"),
|
422 | 423 | )
|
423 | 424 | conn.close()
|
| 425 | + |
| 426 | + |
| 427 | +def test_to_redshift_pandas_decimal(session, bucket, redshift_parameters): |
| 428 | + df = pd.DataFrame({ |
| 429 | + "id": [1, 2, 3], |
| 430 | + "decimal_2": [Decimal((0, (1, 9, 9), -2)), None, Decimal((0, (1, 9, 0), -2))], |
| 431 | + "decimal_5": [Decimal((0, (1, 9, 9, 9, 9, 9), -5)), None, Decimal((0, (1, 9, 0, 0, 0, 0), -5))], |
| 432 | + }) |
| 433 | + con = Redshift.generate_connection( |
| 434 | + database="test", |
| 435 | + host=redshift_parameters.get("RedshiftAddress"), |
| 436 | + port=redshift_parameters.get("RedshiftPort"), |
| 437 | + user="test", |
| 438 | + password=redshift_parameters.get("RedshiftPassword"), |
| 439 | + ) |
| 440 | + path = f"s3://{bucket}/redshift-load/" |
| 441 | + session.pandas.to_redshift( |
| 442 | + dataframe=df, |
| 443 | + path=path, |
| 444 | + schema="public", |
| 445 | + table="test", |
| 446 | + connection=con, |
| 447 | + iam_role=redshift_parameters.get("RedshiftRole"), |
| 448 | + mode="overwrite", |
| 449 | + preserve_index=False, |
| 450 | + ) |
| 451 | + cursor = con.cursor() |
| 452 | + cursor.execute("SELECT * from public.test") |
| 453 | + rows = cursor.fetchall() |
| 454 | + cursor.close() |
| 455 | + con.close() |
| 456 | + assert len(df.index) == len(rows) |
| 457 | + assert len(list(df.columns)) == len(list(rows[0])) |
| 458 | + print(rows) |
| 459 | + for row in rows: |
| 460 | + if row[0] == 1: |
| 461 | + assert row[1] == Decimal((0, (1, 9, 9), -2)) |
| 462 | + assert row[2] == Decimal((0, (1, 9, 9, 9, 9, 9), -5)) |
| 463 | + elif row[1] == 2: |
| 464 | + assert row[1] is None |
| 465 | + assert row[2] is None |
| 466 | + elif row[2] == 3: |
| 467 | + assert row[1] == Decimal((0, (1, 9, 0), -2)) |
| 468 | + assert row[2] == Decimal((0, (1, 9, 0, 0, 0, 0), -5)) |
| 469 | + |
| 470 | + |
| 471 | +def test_to_redshift_spark_decimal(session, bucket, redshift_parameters): |
| 472 | + df = session.spark_session.createDataFrame(pd.DataFrame({ |
| 473 | + "id": [1, 2, 3], |
| 474 | + "decimal_2": [Decimal((0, (1, 9, 9), -2)), None, Decimal((0, (1, 9, 0), -2))], |
| 475 | + "decimal_5": [Decimal((0, (1, 9, 9, 9, 9, 9), -5)), None, Decimal((0, (1, 9, 0, 0, 0, 0), -5))]}), |
| 476 | + schema="id INTEGER, decimal_2 DECIMAL(3,2), decimal_5 DECIMAL(6,5)") |
| 477 | + con = Redshift.generate_connection( |
| 478 | + database="test", |
| 479 | + host=redshift_parameters.get("RedshiftAddress"), |
| 480 | + port=redshift_parameters.get("RedshiftPort"), |
| 481 | + user="test", |
| 482 | + password=redshift_parameters.get("RedshiftPassword"), |
| 483 | + ) |
| 484 | + path = f"s3://{bucket}/redshift-load2/" |
| 485 | + session.spark.to_redshift( |
| 486 | + dataframe=df, |
| 487 | + path=path, |
| 488 | + schema="public", |
| 489 | + table="test2", |
| 490 | + connection=con, |
| 491 | + iam_role=redshift_parameters.get("RedshiftRole"), |
| 492 | + mode="overwrite", |
| 493 | + ) |
| 494 | + cursor = con.cursor() |
| 495 | + cursor.execute("SELECT * from public.test2") |
| 496 | + rows = cursor.fetchall() |
| 497 | + cursor.close() |
| 498 | + con.close() |
| 499 | + assert df.count() == len(rows) |
| 500 | + assert len(list(df.columns)) == len(list(rows[0])) |
| 501 | + print(rows) |
| 502 | + for row in rows: |
| 503 | + if row[0] == 1: |
| 504 | + assert row[1] == Decimal((0, (1, 9, 9), -2)) |
| 505 | + assert row[2] == Decimal((0, (1, 9, 9, 9, 9, 9), -5)) |
| 506 | + elif row[1] == 2: |
| 507 | + assert row[1] is None |
| 508 | + assert row[2] is None |
| 509 | + elif row[2] == 3: |
| 510 | + assert row[1] == Decimal((0, (1, 9, 0), -2)) |
| 511 | + assert row[2] == Decimal((0, (1, 9, 0, 0, 0, 0), -5)) |
0 commit comments