aws
diff --git a/‎awswrangler/_config.py‎
Lines changed: 11 additions & 1 deletion b/‎awswrangler/_config.py‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎awswrangler/_databases.py‎
Lines changed: 20 additions & 11 deletions b/‎awswrangler/_databases.py‎
Lines changed: 20 additions & 11 deletions
diff --git a/‎awswrangler/mysql.py‎
Lines changed: 14 additions & 6 deletions b/‎awswrangler/mysql.py‎
Lines changed: 14 additions & 6 deletions
diff --git a/‎awswrangler/postgresql.py‎
Lines changed: 13 additions & 5 deletions b/‎awswrangler/postgresql.py‎
Lines changed: 13 additions & 5 deletions
diff --git a/‎awswrangler/redshift.py‎
Lines changed: 14 additions & 6 deletions b/‎awswrangler/redshift.py‎
Lines changed: 14 additions & 6 deletions
diff --git a/‎awswrangler/sqlserver.py‎
Lines changed: 17 additions & 5 deletions b/‎awswrangler/sqlserver.py‎
Lines changed: 17 additions & 5 deletions
diff --git a/‎tests/test_config.py‎
Lines changed: 22 additions & 1 deletion b/‎tests/test_config.py‎
Lines changed: 22 additions & 1 deletion
@@ -34,6 +34,7 @@ class _ConfigArg(NamedTuple):
     "max_local_cache_entries": _ConfigArg(dtype=int, nullable=False),
     "s3_block_size": _ConfigArg(dtype=int, nullable=False, enforced=True),
     "workgroup": _ConfigArg(dtype=str, nullable=False, enforced=True),
+    "chunksize": _ConfigArg(dtype=int, nullable=False, enforced=True),
     # Endpoints URLs
     "s3_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True),
     "athena_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True),
@@ -47,7 +48,7 @@ class _ConfigArg(NamedTuple):
 }
 
 
-class _Config:  # pylint: disable=too-many-instance-attributes
+class _Config:  # pylint: disable=too-many-instance-attributes, too-many-public-methods
     """Wrangler's Configuration class."""
 
     def __init__(self) -> None:
@@ -279,6 +280,15 @@ def workgroup(self) -> Optional[str]:
     def workgroup(self, value: Optional[str]) -> None:
         self._set_config_value(key="workgroup", value=value)
 
+    @property
+    def chunksize(self) -> int:
+        """Property chunksize."""
+        return cast(int, self["chunksize"])
+
+    @chunksize.setter
+    def chunksize(self, value: int) -> None:
+        self._set_config_value(key="chunksize", value=value)
+
     @property
     def s3_endpoint_url(self) -> Optional[str]:
         """Property s3_endpoint_url."""
 
@@ -1,7 +1,7 @@
 """Databases Utilities."""
 
 import logging
-from typing import Any, Dict, Iterator, List, NamedTuple, Optional, Tuple, Union, cast
+from typing import Any, Dict, Generator, Iterator, List, NamedTuple, Optional, Tuple, Union, cast
 
 import boto3
 import pandas as pd
@@ -219,13 +219,22 @@ def read_sql_query(
         raise
 
 
-def extract_parameters(df: pd.DataFrame) -> List[List[Any]]:
-    """Extract Parameters."""
-    parameters: List[List[Any]] = df.values.tolist()
-    for i, row in enumerate(parameters):
-        for j, value in enumerate(row):
-            if pd.isna(value):
-                parameters[i][j] = None
-            elif hasattr(value, "to_pydatetime"):
-                parameters[i][j] = value.to_pydatetime()
-    return parameters
+def generate_placeholder_parameter_pairs(
+    df: pd.DataFrame, column_placeholders: str, chunksize: int
+) -> Generator[Tuple[str, List[Any]], None, None]:
+    """Extract Placeholder and Parameter pairs."""
+
+    def convert_value_to_native_python_type(value: Any) -> Any:
+        if pd.isna(value):
+            return None
+        if hasattr(value, "to_pydatetime"):
+            return value.to_pydatetime()
+
+        return value
+
+    parameters = df.values.tolist()
+    for i in range(0, len(df.index), chunksize):
+        parameters_chunk = parameters[i : i + chunksize]
+        chunk_placeholders = ", ".join([f"({column_placeholders})" for _ in range(len(parameters_chunk))])
+        flattened_chunk = [convert_value_to_native_python_type(value) for row in parameters_chunk for value in row]
+        yield chunk_placeholders, flattened_chunk
@@ -12,6 +12,7 @@
 from awswrangler import _data_types
 from awswrangler import _databases as _db_utils
 from awswrangler import exceptions
+from awswrangler._config import apply_configs
 
 _logger: logging.Logger = logging.getLogger(__name__)
 
@@ -257,6 +258,7 @@ def read_sql_table(
     )
 
 
+@apply_configs
 def to_sql(
     df: pd.DataFrame,
     con: pymysql.connections.Connection,
@@ -267,6 +269,7 @@ def to_sql(
     dtype: Optional[Dict[str, str]] = None,
     varchar_lengths: Optional[Dict[str, int]] = None,
     use_column_names: bool = False,
+    chunksize: int = 200,
 ) -> None:
     """Write records stored in a DataFrame into MySQL.
 
@@ -295,6 +298,8 @@ def to_sql(
         If set to True, will use the column names of the DataFrame for generating the INSERT SQL Query.
         E.g. If the DataFrame has two columns `col1` and `col3` and `use_column_names` is True, data will only be
         inserted into the database columns `col1` and `col3`.
+    chunksize: int
+        Number of rows which are inserted with each SQL query. Defaults to inserting 200 rows per query.
 
     Returns
     -------
@@ -308,7 +313,7 @@ def to_sql(
     >>> import awswrangler as wr
     >>> con = wr.mysql.connect("MY_GLUE_CONNECTION")
     >>> wr.mysql.to_sql(
-    ...     df=df
+    ...     df=df,
     ...     table="my_table",
     ...     schema="test",
     ...     con=con
@@ -333,14 +338,17 @@ def to_sql(
             )
             if index:
                 df.reset_index(level=df.index.names, inplace=True)
-            placeholders: str = ", ".join(["%s"] * len(df.columns))
+            column_placeholders: str = ", ".join(["%s"] * len(df.columns))
             insertion_columns = ""
             if use_column_names:
                 insertion_columns = f"({', '.join(df.columns)})"
-            sql: str = f"INSERT INTO `{schema}`.`{table}` {insertion_columns} VALUES ({placeholders})"
-            _logger.debug("sql: %s", sql)
-            parameters: List[List[Any]] = _db_utils.extract_parameters(df=df)
-            cursor.executemany(sql, parameters)
+            placeholder_parameter_pair_generator = _db_utils.generate_placeholder_parameter_pairs(
+                df=df, column_placeholders=column_placeholders, chunksize=chunksize
+            )
+            for placeholders, parameters in placeholder_parameter_pair_generator:
+                sql: str = f"INSERT INTO `{schema}`.`{table}` {insertion_columns} VALUES {placeholders}"
+                _logger.debug("sql: %s", sql)
+                cursor.executemany(sql, (parameters,))
             con.commit()
     except Exception as ex:
         con.rollback()
 
@@ -11,6 +11,7 @@
 from awswrangler import _data_types
 from awswrangler import _databases as _db_utils
 from awswrangler import exceptions
+from awswrangler._config import apply_configs
 
 _logger: logging.Logger = logging.getLogger(__name__)
 
@@ -263,6 +264,7 @@ def read_sql_table(
     )
 
 
+@apply_configs
 def to_sql(
     df: pd.DataFrame,
     con: pg8000.Connection,
@@ -273,6 +275,7 @@ def to_sql(
     dtype: Optional[Dict[str, str]] = None,
     varchar_lengths: Optional[Dict[str, int]] = None,
     use_column_names: bool = False,
+    chunksize: int = 200,
 ) -> None:
     """Write records stored in a DataFrame into PostgreSQL.
 
@@ -301,6 +304,8 @@ def to_sql(
         If set to True, will use the column names of the DataFrame for generating the INSERT SQL Query.
         E.g. If the DataFrame has two columns `col1` and `col3` and `use_column_names` is True, data will only be
         inserted into the database columns `col1` and `col3`.
+    chunksize: int
+        Number of rows which are inserted with each SQL query. Defaults to inserting 200 rows per query.
 
     Returns
     -------
@@ -339,14 +344,17 @@ def to_sql(
             )
             if index:
                 df.reset_index(level=df.index.names, inplace=True)
-            placeholders: str = ", ".join(["%s"] * len(df.columns))
+            column_placeholders: str = ", ".join(["%s"] * len(df.columns))
             insertion_columns = ""
             if use_column_names:
                 insertion_columns = f"({', '.join(df.columns)})"
-            sql: str = f'INSERT INTO "{schema}"."{table}" {insertion_columns} VALUES ({placeholders})'
-            _logger.debug("sql: %s", sql)
-            parameters: List[List[Any]] = _db_utils.extract_parameters(df=df)
-            cursor.executemany(sql, parameters)
+            placeholder_parameter_pair_generator = _db_utils.generate_placeholder_parameter_pairs(
+                df=df, column_placeholders=column_placeholders, chunksize=chunksize
+            )
+            for placeholders, parameters in placeholder_parameter_pair_generator:
+                sql: str = f'INSERT INTO "{schema}"."{table}" {insertion_columns} VALUES {placeholders}'
+                _logger.debug("sql: %s", sql)
+                cursor.executemany(sql, (parameters,))
             con.commit()
     except Exception as ex:
         con.rollback()
 
@@ -13,6 +13,7 @@
 from awswrangler import _data_types
 from awswrangler import _databases as _db_utils
 from awswrangler import _utils, exceptions, s3
+from awswrangler._config import apply_configs
 
 _logger: logging.Logger = logging.getLogger(__name__)
 
@@ -629,6 +630,7 @@ def read_sql_table(
     )
 
 
+@apply_configs
 def to_sql(
     df: pd.DataFrame,
     con: redshift_connector.Connection,
@@ -645,6 +647,7 @@ def to_sql(
     varchar_lengths_default: int = 256,
     varchar_lengths: Optional[Dict[str, int]] = None,
     use_column_names: bool = False,
+    chunksize: int = 200,
 ) -> None:
     """Write records stored in a DataFrame into Redshift.
 
@@ -693,6 +696,8 @@ def to_sql(
         If set to True, will use the column names of the DataFrame for generating the INSERT SQL Query.
         E.g. If the DataFrame has two columns `col1` and `col3` and `use_column_names` is True, data will only be
         inserted into the database columns `col1` and `col3`.
+    chunksize: int
+        Number of rows which are inserted with each SQL query. Defaults to inserting 200 rows per query.
 
     Returns
     -------
@@ -706,7 +711,7 @@ def to_sql(
     >>> import awswrangler as wr
     >>> con = wr.redshift.connect("MY_GLUE_CONNECTION")
     >>> wr.redshift.to_sql(
-    ...     df=df
+    ...     df=df,
     ...     table="my_table",
     ...     schema="public",
     ...     con=con
@@ -740,15 +745,18 @@ def to_sql(
             )
             if index:
                 df.reset_index(level=df.index.names, inplace=True)
-            placeholders: str = ", ".join(["%s"] * len(df.columns))
+            column_placeholders: str = ", ".join(["%s"] * len(df.columns))
             schema_str = f'"{created_schema}".' if created_schema else ""
             insertion_columns = ""
             if use_column_names:
                 insertion_columns = f"({', '.join(df.columns)})"
-            sql: str = f'INSERT INTO {schema_str}"{created_table}" {insertion_columns} VALUES ({placeholders})'
-            _logger.debug("sql: %s", sql)
-            parameters: List[List[Any]] = _db_utils.extract_parameters(df=df)
-            cursor.executemany(sql, parameters)
+            placeholder_parameter_pair_generator = _db_utils.generate_placeholder_parameter_pairs(
+                df=df, column_placeholders=column_placeholders, chunksize=chunksize
+            )
+            for placeholders, parameters in placeholder_parameter_pair_generator:
+                sql: str = f'INSERT INTO {schema_str}"{created_table}" {insertion_columns} VALUES {placeholders}'
+                _logger.debug("sql: %s", sql)
+                cursor.executemany(sql, (parameters,))
             if table != created_table:  # upsert
                 _upsert(cursor=cursor, schema=schema, table=table, temp_table=created_table, primary_keys=primary_keys)
             con.commit()
 
@@ -2,6 +2,7 @@
 
 
 import importlib.util
+import inspect
 import logging
 from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypeVar, Union
 
@@ -12,6 +13,7 @@
 from awswrangler import _data_types
 from awswrangler import _databases as _db_utils
 from awswrangler import exceptions
+from awswrangler._config import apply_configs
 
 __all__ = ["connect", "read_sql_query", "read_sql_table", "to_sql"]
 
@@ -32,6 +34,9 @@ def inner(*args: Any, **kwargs: Any) -> Any:
             )
         return func(*args, **kwargs)
 
+    inner.__doc__ = func.__doc__
+    inner.__name__ = func.__name__
+    inner.__setattr__("__signature__", inspect.signature(func))  # pylint: disable=no-member
     return inner  # type: ignore
 
 
@@ -281,6 +286,7 @@ def read_sql_table(
 
 
 @_check_for_pyodbc
+@apply_configs
 def to_sql(
     df: pd.DataFrame,
     con: "pyodbc.Connection",
@@ -291,6 +297,7 @@ def to_sql(
     dtype: Optional[Dict[str, str]] = None,
     varchar_lengths: Optional[Dict[str, int]] = None,
     use_column_names: bool = False,
+    chunksize: int = 200,
 ) -> None:
     """Write records stored in a DataFrame into Microsoft SQL Server.
 
@@ -319,6 +326,8 @@ def to_sql(
         If set to True, will use the column names of the DataFrame for generating the INSERT SQL Query.
         E.g. If the DataFrame has two columns `col1` and `col3` and `use_column_names` is True, data will only be
         inserted into the database columns `col1` and `col3`.
+    chunksize: int
+        Number of rows which are inserted with each SQL query. Defaults to inserting 200 rows per query.
 
     Returns
     -------
@@ -357,15 +366,18 @@ def to_sql(
             )
             if index:
                 df.reset_index(level=df.index.names, inplace=True)
-            placeholders: str = ", ".join(["?"] * len(df.columns))
+            column_placeholders: str = ", ".join(["?"] * len(df.columns))
             table_identifier = _get_table_identifier(schema, table)
             insertion_columns = ""
             if use_column_names:
                 insertion_columns = f"({', '.join(df.columns)})"
-            sql: str = f"INSERT INTO {table_identifier} {insertion_columns} VALUES ({placeholders})"
-            _logger.debug("sql: %s", sql)
-            parameters: List[List[Any]] = _db_utils.extract_parameters(df=df)
-            cursor.executemany(sql, parameters)
+            placeholder_parameter_pair_generator = _db_utils.generate_placeholder_parameter_pairs(
+                df=df, column_placeholders=column_placeholders, chunksize=chunksize
+            )
+            for placeholders, parameters in placeholder_parameter_pair_generator:
+                sql: str = f"INSERT INTO {table_identifier} {insertion_columns} VALUES {placeholders}"
+                _logger.debug("sql: %s", sql)
+                cursor.executemany(sql, (parameters,))
             con.commit()
     except Exception as ex:
         con.rollback()
 
@@ -1,6 +1,6 @@
 import logging
 import os
-from unittest.mock import patch
+from unittest.mock import create_autospec, patch
 
 import boto3
 import botocore
@@ -9,6 +9,7 @@
 import pytest
 
 import awswrangler as wr
+from awswrangler._config import apply_configs
 from awswrangler.s3._fs import open_s3_object
 
 logging.getLogger("awswrangler").setLevel(logging.DEBUG)
@@ -180,3 +181,23 @@ def wrapper(self, **kwarg):
             s3obj.write(b"foo")
 
     wr.config.reset()
+
+
+def test_chunk_size():
+    expected_chunksize = 123
+
+    wr.config.chunksize = expected_chunksize
+
+    for function_to_mock in [wr.postgresql.to_sql, wr.mysql.to_sql, wr.sqlserver.to_sql, wr.redshift.to_sql]:
+        mock = create_autospec(function_to_mock)
+        apply_configs(mock)(df=None, con=None, table=None, schema=None)
+        mock.assert_called_with(df=None, con=None, table=None, schema=None, chunksize=expected_chunksize)
+
+    expected_chunksize = 456
+    os.environ["WR_CHUNKSIZE"] = str(expected_chunksize)
+    wr.config.reset()
+
+    for function_to_mock in [wr.postgresql.to_sql, wr.mysql.to_sql, wr.sqlserver.to_sql, wr.redshift.to_sql]:
+        mock = create_autospec(function_to_mock)
+        apply_configs(mock)(df=None, con=None, table=None, schema=None)
+        mock.assert_called_with(df=None, con=None, table=None, schema=None, chunksize=expected_chunksize)