Skip to content

Commit 2796305

Browse files
authored
Merge pull request #189 from awslabs/create-table-append
Add append mode to create_*_table
2 parents 0002fe6 + 21b7955 commit 2796305

File tree

2 files changed

+14
-7
lines changed

2 files changed

+14
-7
lines changed

awswrangler/catalog.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def create_parquet_table(
8787
table: str,
8888
path: str,
8989
columns_types: Dict[str, str],
90-
partitions_types: Optional[Dict[str, str]],
90+
partitions_types: Optional[Dict[str, str]] = None,
9191
compression: Optional[str] = None,
9292
description: Optional[str] = None,
9393
parameters: Optional[Dict[str, str]] = None,
@@ -120,7 +120,7 @@ def create_parquet_table(
120120
columns_comments: Dict[str, str], optional
121121
Columns names and the related comments (e.g. {'col0': 'Column 0.', 'col1': 'Column 1.', 'col2': 'Partition.'}).
122122
mode: str
123-
Only 'overwrite' available by now.
123+
'overwrite' to recreate any possible axisting table or 'append' to keep any possible axisting table.
124124
boto3_session : boto3.Session(), optional
125125
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
126126
@@ -859,7 +859,7 @@ def create_csv_table(
859859
table: str,
860860
path: str,
861861
columns_types: Dict[str, str],
862-
partitions_types: Optional[Dict[str, str]],
862+
partitions_types: Optional[Dict[str, str]] = None,
863863
compression: Optional[str] = None,
864864
description: Optional[str] = None,
865865
parameters: Optional[Dict[str, str]] = None,
@@ -893,7 +893,7 @@ def create_csv_table(
893893
columns_comments: Dict[str, str], optional
894894
Columns names and the related comments (e.g. {'col0': 'Column 0.', 'col1': 'Column 1.', 'col2': 'Partition.'}).
895895
mode: str
896-
Only 'overwrite' available by now.
896+
'overwrite' to recreate any possible axisting table or 'append' to keep any possible axisting table.
897897
sep : str
898898
String of length 1. Field delimiter for the output file.
899899
boto3_session : boto3.Session(), optional
@@ -967,10 +967,11 @@ def _create_table(
967967
if name in columns_comments:
968968
par["Comment"] = columns_comments[name]
969969
session: boto3.Session = _utils.ensure_session(session=boto3_session)
970-
if mode == "overwrite":
970+
exist: bool = does_table_exist(database=database, table=table, boto3_session=session)
971+
if (mode == "overwrite") or (exist is False):
971972
delete_table_if_exists(database=database, table=table, boto3_session=session)
972-
client_glue: boto3.client = _utils.client(service_name="glue", session=session)
973-
client_glue.create_table(DatabaseName=database, TableInput=table_input)
973+
client_glue: boto3.client = _utils.client(service_name="glue", session=session)
974+
client_glue.create_table(DatabaseName=database, TableInput=table_input)
974975

975976

976977
def _csv_table_definition(

testing/test_awswrangler/test_data_lake.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,9 @@ def test_catalog(bucket, database):
443443
partitions_types={"y": "int", "m": "int"},
444444
compression="snappy",
445445
)
446+
wr.catalog.create_parquet_table(
447+
database=database, table="test_catalog", path=path, columns_types={"col0": "string"}, mode="append"
448+
)
446449
assert wr.catalog.does_table_exist(database=database, table="test_catalog") is True
447450
assert wr.catalog.delete_table_if_exists(database=database, table="test_catalog") is True
448451
assert wr.catalog.delete_table_if_exists(database=database, table="test_catalog") is False
@@ -853,6 +856,9 @@ def test_athena_types(bucket, database):
853856
partitions_types=partitions_types,
854857
columns_types=columns_types,
855858
)
859+
wr.catalog.create_csv_table(
860+
database=database, table="test_athena_types", path=path, columns_types={"col0": "string"}, mode="append"
861+
)
856862
wr.athena.repair_table("test_athena_types", database)
857863
assert len(wr.catalog.get_csv_partitions(database, "test_athena_types")) == 3
858864
df2 = wr.athena.read_sql_table("test_athena_types", database)

0 commit comments

Comments
 (0)