From d0e7d869fa35348b85c5499d7948bdd1b7f80453 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Sun, 25 Jun 2023 13:16:21 -0700
Subject: [PATCH 01/22] ENH: Add arrow engine to to_csv

---
 pandas/core/generic.py                 |  11 ++
 pandas/io/formats/csvs.py              |  55 +++++-
 pandas/io/formats/format.py            |   2 +
 pandas/tests/io/formats/test_to_csv.py | 246 ++++++++++++++-----------
 4 files changed, 199 insertions(+), 115 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 9084395871675..e4c16aedb2430 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3658,6 +3658,7 @@ def to_csv(
         path_or_buf: None = ...,
         sep: str = ...,
         na_rep: str = ...,
+        engine: str = "python",
         float_format: str | Callable | None = ...,
         columns: Sequence[Hashable] | None = ...,
         header: bool_t | list[str] = ...,
@@ -3685,6 +3686,7 @@ def to_csv(
         path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str],
         sep: str = ...,
         na_rep: str = ...,
+        engine: str = "python",
         float_format: str | Callable | None = ...,
         columns: Sequence[Hashable] | None = ...,
         header: bool_t | list[str] = ...,
@@ -3716,6 +3718,7 @@ def to_csv(
         path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
         sep: str = ",",
         na_rep: str = "",
+        engine: str = "python",
         float_format: str | Callable | None = None,
         columns: Sequence[Hashable] | None = None,
         header: bool_t | list[str] = True,
@@ -3755,6 +3758,13 @@ def to_csv(
             String of length 1. Field delimiter for the output file.
         na_rep : str, default ''
             Missing data representation.
+        engine : str, default 'python'
+            The engine to use. Available options are "pyarrow" or "python".
+            The pyarrow engine requires the pyarrow library to be installed
+            and is generally faster than the python engine.
+
+            However, the python engine may be more feature complete than the
+            pyarrow engine.
         float_format : str, Callable, default None
             Format string for floating point numbers. If a Callable is given, it takes
             precedence over other numeric formatting parameters, like decimal.
@@ -3890,6 +3900,7 @@ def to_csv(
 
         return DataFrameRenderer(formatter).to_csv(
             path_or_buf,
+            engine=engine,
             lineterminator=lineterminator,
             sep=sep,
             encoding=encoding,
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 39abb0bf127d9..db9fd8783d9eb 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -20,6 +20,7 @@
 import numpy as np
 
 from pandas._libs import writers as libwriters
+from pandas.compat._optional import import_optional_dependency
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.generic import (
@@ -57,6 +58,7 @@ def __init__(
         self,
         formatter: DataFrameFormatter,
         path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes] = "",
+        engine: str = "python",
         sep: str = ",",
         cols: Sequence[Hashable] | None = None,
         index_label: IndexLabel | None = None,
@@ -78,6 +80,7 @@ def __init__(
         self.obj = self.fmt.frame
 
         self.filepath_or_buffer = path_or_buf
+        self.engine = engine
         self.encoding = encoding
         self.compression: CompressionOptions = compression
         self.mode = mode
@@ -252,8 +255,48 @@ def save(self) -> None:
             storage_options=self.storage_options,
         ) as handles:
             # Note: self.encoding is irrelevant here
+            self._save(handles.handle)
+
+    def _save_pyarrow(self, handle) -> None:
+        pa = import_optional_dependency("pyarrow")
+        pa_csv = import_optional_dependency("pyarrow.csv")
+        # Convert index to column and rename name to empty string
+        # since we serialize the index as basically a column with no name
+        # TODO: this won't work for multi-indexes
+        obj = self.obj.reset_index(names=[""])
+
+        table = pa.Table.from_pandas(obj)
+
+        # Map quoting arg to pyarrow equivalents
+        pa_quoting = None
+        if self.quoting == csvlib.QUOTE_MINIMAL:
+            pa_quoting = "needed"
+        elif self.quoting == csvlib.QUOTE_ALL:
+            # TODO: Is this a 1-1 mapping?
+            # This doesn't quote nulls, check if Python does this
+            pa_quoting = "all_valid"
+        elif self.quoting == csvlib.QUOTE_NONE:
+            pa_quoting = "none"
+        else:
+            raise ValueError(
+                f"Quoting option {self.quoting} is not supported with engine='pyarrow'"
+            )
+
+        write_options = pa_csv.WriteOptions(
+            include_header=self._need_to_save_header,
+            batch_size=self.chunksize,
+            delimiter=self.sep,
+            quoting_style=pa_quoting,
+        )
+        # pa_csv.write_csv(table, handle, write_options)
+        pa_csv.write_csv(table, self.filepath_or_buffer, write_options)
+
+    def _save(self, handle) -> None:
+        if self.engine == "pyarrow":
+            self._save_pyarrow(handle)
+        else:
             self.writer = csvlib.writer(
-                handles.handle,
+                handle,
                 lineterminator=self.lineterminator,
                 delimiter=self.sep,
                 quoting=self.quoting,
@@ -261,13 +304,9 @@ def save(self) -> None:
                 escapechar=self.escapechar,
                 quotechar=self.quotechar,
             )
-
-            self._save()
-
-    def _save(self) -> None:
-        if self._need_to_save_header:
-            self._save_header()
-        self._save_body()
+            if self._need_to_save_header:
+                self._save_header()
+            self._save_body()
 
     def _save_header(self) -> None:
         if not self.has_mi_columns or self._has_aliases:
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index a7a6f481ebdde..b89f3400675db 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1103,6 +1103,7 @@ def to_string(
     def to_csv(
         self,
         path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
+        engine: str = "python",
         encoding: str | None = None,
         sep: str = ",",
         columns: Sequence[Hashable] | None = None,
@@ -1132,6 +1133,7 @@ def to_csv(
 
         csv_formatter = CSVFormatter(
             path_or_buf=path_or_buf,
+            engine=engine,
             lineterminator=lineterminator,
             sep=sep,
             encoding=encoding,
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 32509a799fa69..eeb2a1b8a2c56 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -15,8 +15,14 @@
 import pandas._testing as tm
 
 
+@pytest.fixture(params=["python", "pyarrow"])
+def engine(request):
+    # TODO: Skip if pyarrow not found
+    return request.param
+
+
 class TestToCSV:
-    def test_to_csv_with_single_column(self):
+    def test_to_csv_with_single_column(self, engine):
         # see gh-18676, https://bugs.python.org/issue32255
         #
         # Python's CSV library adds an extraneous '""'
@@ -30,7 +36,7 @@ def test_to_csv_with_single_column(self):
 1.0
 """
         with tm.ensure_clean("test.csv") as path:
-            df1.to_csv(path, header=None, index=None)
+            df1.to_csv(path, header=None, index=None, engine=engine)
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected1
 
@@ -40,20 +46,20 @@ def test_to_csv_with_single_column(self):
 ""
 """
         with tm.ensure_clean("test.csv") as path:
-            df2.to_csv(path, header=None, index=None)
+            df2.to_csv(path, header=None, index=None, engine=engine)
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected2
 
-    def test_to_csv_default_encoding(self):
+    def test_to_csv_default_encoding(self, engine):
         # GH17097
         df = DataFrame({"col": ["AAAAA", "ÄÄÄÄÄ", "ßßßßß", "聞聞聞聞聞"]})
 
         with tm.ensure_clean("test.csv") as path:
-            # the default to_csv encoding is uft-8.
-            df.to_csv(path)
+            # the default to_csv encoding is utf-8.
+            df.to_csv(path, engine=engine)
             tm.assert_frame_equal(pd.read_csv(path, index_col=0), df)
 
-    def test_to_csv_quotechar(self):
+    def test_to_csv_quotechar(self, engine):
         df = DataFrame({"col": [1, 2]})
         expected = """\
 "","col"
@@ -62,7 +68,7 @@ def test_to_csv_quotechar(self):
 """
 
         with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, quoting=1)  # 1=QUOTE_ALL
+            df.to_csv(path, quoting=1, engine=engine)  # 1=QUOTE_ALL
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
@@ -73,13 +79,13 @@ def test_to_csv_quotechar(self):
 """
 
         with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, quoting=1, quotechar="$")
+            df.to_csv(path, quoting=1, quotechar="$", engine=engine)
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
         with tm.ensure_clean("test.csv") as path:
             with pytest.raises(TypeError, match="quotechar"):
-                df.to_csv(path, quoting=1, quotechar=None)
+                df.to_csv(path, quoting=1, quotechar=None, engine=engine)
 
     def test_to_csv_doublequote(self):
         df = DataFrame({"col": ['a"a', '"bb"']})
@@ -90,15 +96,15 @@ def test_to_csv_doublequote(self):
 '''
 
         with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, quoting=1, doublequote=True)  # QUOTE_ALL
+            df.to_csv(path, quoting=1, doublequote=True, engine=engine)  # QUOTE_ALL
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
         with tm.ensure_clean("test.csv") as path:
             with pytest.raises(Error, match="escapechar"):
-                df.to_csv(path, doublequote=False)  # no escapechar set
+                df.to_csv(path, doublequote=False, engine=engine)  # no escapechar set
 
-    def test_to_csv_escapechar(self):
+    def test_to_csv_escapechar(self, engine=engine):
         df = DataFrame({"col": ['a"a', '"bb"']})
         expected = """\
 "","col"
@@ -107,7 +113,9 @@ def test_to_csv_escapechar(self):
 """
 
         with tm.ensure_clean("test.csv") as path:  # QUOTE_ALL
-            df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
+            df.to_csv(
+                path, quoting=1, doublequote=False, escapechar="\\", engine=engine
+            )
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
@@ -119,36 +127,39 @@ def test_to_csv_escapechar(self):
 """
 
         with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, quoting=3, escapechar="\\")  # QUOTE_NONE
+            df.to_csv(path, quoting=3, escapechar="\\", engine=engine)  # QUOTE_NONE
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
-    def test_csv_to_string(self):
+    def test_csv_to_string(self, engine):
         df = DataFrame({"col": [1, 2]})
         expected_rows = [",col", "0,1", "1,2"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df.to_csv() == expected
+        assert df.to_csv(engine=engine) == expected
 
-    def test_to_csv_decimal(self):
+    def test_to_csv_decimal(self, engine):
         # see gh-781
         df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]})
 
         expected_rows = [",col1,col2,col3", "0,1,a,10.1"]
         expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df.to_csv() == expected_default
+        assert df.to_csv(engine=engine) == expected_default
 
         expected_rows = [";col1;col2;col3", "0;1;a;10,1"]
         expected_european_excel = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df.to_csv(decimal=",", sep=";") == expected_european_excel
+        assert df.to_csv(engine=engine, decimal=",", sep=";") == expected_european_excel
 
         expected_rows = [",col1,col2,col3", "0,1,a,10.10"]
         expected_float_format_default = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df.to_csv(float_format="%.2f") == expected_float_format_default
+        assert (
+            df.to_csv(engine=engine, float_format="%.2f")
+            == expected_float_format_default
+        )
 
         expected_rows = [";col1;col2;col3", "0;1;a;10,10"]
         expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
         assert (
-            df.to_csv(decimal=",", sep=";", float_format="%.2f")
+            df.to_csv(engine=engine, decimal=",", sep=";", float_format="%.2f")
             == expected_float_format
         )
 
@@ -157,13 +168,13 @@ def test_to_csv_decimal(self):
 
         expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df.to_csv(index=False, decimal="^") == expected
+        assert df.to_csv(engine=engine, index=False, decimal="^") == expected
 
         # same but for an index
-        assert df.set_index("a").to_csv(decimal="^") == expected
+        assert df.set_index("a").to_csv(engine=engine, decimal="^") == expected
 
         # same for a multi-index
-        assert df.set_index(["a", "b"]).to_csv(decimal="^") == expected
+        assert df.set_index(["a", "b"]).to_csv(engine=engine, decimal="^") == expected
 
     def test_to_csv_float_format(self):
         # testing if float_format is taken into account for the index
@@ -172,10 +183,13 @@ def test_to_csv_float_format(self):
 
         expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df.set_index("a").to_csv(float_format="%.2f") == expected
+        assert df.set_index("a").to_csv(engine=engine, float_format="%.2f") == expected
 
         # same for a multi-index
-        assert df.set_index(["a", "b"]).to_csv(float_format="%.2f") == expected
+        assert (
+            df.set_index(["a", "b"]).to_csv(engine=engine, float_format="%.2f")
+            == expected
+        )
 
     def test_to_csv_na_rep(self):
         # see gh-11553
@@ -185,7 +199,7 @@ def test_to_csv_na_rep(self):
         expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
 
-        assert df.set_index("a").to_csv(na_rep="_") == expected
+        assert df.set_index("a").to_csv(engine=engine, na_rep="_") == expected
         assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
 
         # now with an index containing only NaNs
@@ -193,31 +207,31 @@ def test_to_csv_na_rep(self):
         expected_rows = ["a,b,c", "_,0,2", "_,1,3"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
 
-        assert df.set_index("a").to_csv(na_rep="_") == expected
-        assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
+        assert df.set_index("a").to_csv(engine=engine, na_rep="_") == expected
+        assert df.set_index(["a", "b"]).to_csv(engine=engine, na_rep="_") == expected
 
         # check if na_rep parameter does not break anything when no NaN
         df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]})
         expected_rows = ["a,b,c", "0,0,2", "0,1,3"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
 
-        assert df.set_index("a").to_csv(na_rep="_") == expected
-        assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
+        assert df.set_index("a").to_csv(engine=engine, na_rep="_") == expected
+        assert df.set_index(["a", "b"]).to_csv(engine=engine, na_rep="_") == expected
 
-        csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ")
+        csv = pd.Series(["a", pd.NA, "c"]).to_csv(engine=engine, na_rep="ZZZZZ")
         expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
         assert expected == csv
 
-    def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype):
+    def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype, engine):
         # GH 29975
         # Make sure full na_rep shows up when a dtype is provided
         expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
         csv = pd.Series(["a", pd.NA, "c"], dtype=nullable_string_dtype).to_csv(
-            na_rep="ZZZZZ"
+            engine=engine, na_rep="ZZZZZ"
         )
         assert expected == csv
 
-    def test_to_csv_date_format(self):
+    def test_to_csv_date_format(self, engine):
         # GH 10209
         df_sec = DataFrame({"A": pd.date_range("20130101", periods=5, freq="s")})
         df_day = DataFrame({"A": pd.date_range("20130101", periods=5, freq="d")})
@@ -231,7 +245,7 @@ def test_to_csv_date_format(self):
             "4,2013-01-01 00:00:04",
         ]
         expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df_sec.to_csv() == expected_default_sec
+        assert df_sec.to_csv(engine=engine) == expected_default_sec
 
         expected_rows = [
             ",A",
@@ -242,7 +256,10 @@ def test_to_csv_date_format(self):
             "4,2013-01-05 00:00:00",
         ]
         expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df_day.to_csv(date_format="%Y-%m-%d %H:%M:%S") == expected_ymdhms_day
+        assert (
+            df_day.to_csv(date_format="%Y-%m-%d %H:%M:%S", engine=engine)
+            == expected_ymdhms_day
+        )
 
         expected_rows = [
             ",A",
@@ -253,7 +270,7 @@ def test_to_csv_date_format(self):
             "4,2013-01-01",
         ]
         expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df_sec.to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
+        assert df_sec.to_csv(date_format="%Y-%m-%d", engine=engine) == expected_ymd_sec
 
         expected_rows = [
             ",A",
@@ -264,8 +281,10 @@ def test_to_csv_date_format(self):
             "4,2013-01-05",
         ]
         expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df_day.to_csv() == expected_default_day
-        assert df_day.to_csv(date_format="%Y-%m-%d") == expected_default_day
+        assert df_day.to_csv(engine=engine) == expected_default_day
+        assert (
+            df_day.to_csv(date_format="%Y-%m-%d", engine=engine) == expected_default_day
+        )
 
         # see gh-7791
         #
@@ -278,9 +297,12 @@ def test_to_csv_date_format(self):
         expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
 
         df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"])
-        assert df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
+        assert (
+            df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d", engine=engine)
+            == expected_ymd_sec
+        )
 
-    def test_to_csv_different_datetime_formats(self):
+    def test_to_csv_different_datetime_formats(self, engine):
         # GH#21734
         df = DataFrame(
             {
@@ -294,14 +316,14 @@ def test_to_csv_different_datetime_formats(self):
             "1970-01-01,1970-01-01 01:00:00",
         ]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df.to_csv(index=False) == expected
+        assert df.to_csv(index=False, engine=engine) == expected
 
-    def test_to_csv_date_format_in_categorical(self):
+    def test_to_csv_date_format_in_categorical(self, engine):
         # GH#40754
         ser = pd.Series(pd.to_datetime(["2021-03-27", pd.NaT], format="%Y-%m-%d"))
         ser = ser.astype("category")
         expected = tm.convert_rows_list_to_csv_str(["0", "2021-03-27", '""'])
-        assert ser.to_csv(index=False) == expected
+        assert ser.to_csv(index=False, engine=engine) == expected
 
         ser = pd.Series(
             pd.date_range(
@@ -309,39 +331,41 @@ def test_to_csv_date_format_in_categorical(self):
             ).append(pd.DatetimeIndex([pd.NaT]))
         )
         ser = ser.astype("category")
-        assert ser.to_csv(index=False, date_format="%Y-%m-%d") == expected
+        assert (
+            ser.to_csv(index=False, engine=engine, date_format="%Y-%m-%d") == expected
+        )
 
-    def test_to_csv_float_ea_float_format(self):
+    def test_to_csv_float_ea_float_format(self, engine):
         # GH#45991
         df = DataFrame({"a": [1.1, 2.02, pd.NA, 6.000006], "b": "c"})
         df["a"] = df["a"].astype("Float64")
-        result = df.to_csv(index=False, float_format="%.5f")
+        result = df.to_csv(index=False, engine=engine, float_format="%.5f")
         expected = tm.convert_rows_list_to_csv_str(
             ["a,b", "1.10000,c", "2.02000,c", ",c", "6.00001,c"]
         )
         assert result == expected
 
-    def test_to_csv_float_ea_no_float_format(self):
+    def test_to_csv_float_ea_no_float_format(self, engine):
         # GH#45991
         df = DataFrame({"a": [1.1, 2.02, pd.NA, 6.000006], "b": "c"})
         df["a"] = df["a"].astype("Float64")
-        result = df.to_csv(index=False)
+        result = df.to_csv(index=False, engine=engine)
         expected = tm.convert_rows_list_to_csv_str(
             ["a,b", "1.1,c", "2.02,c", ",c", "6.000006,c"]
         )
         assert result == expected
 
-    def test_to_csv_multi_index(self):
+    def test_to_csv_multi_index(self, engine):
         # see gh-6618
         df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))
 
         exp_rows = [",1", ",2", "0,1"]
         exp = tm.convert_rows_list_to_csv_str(exp_rows)
-        assert df.to_csv() == exp
+        assert df.to_csv(engine=engine) == exp
 
         exp_rows = ["1", "2", "1"]
         exp = tm.convert_rows_list_to_csv_str(exp_rows)
-        assert df.to_csv(index=False) == exp
+        assert df.to_csv(index=False, engine=engine) == exp
 
         df = DataFrame(
             [1],
@@ -351,21 +375,21 @@ def test_to_csv_multi_index(self):
 
         exp_rows = [",,1", ",,2", "1,2,1"]
         exp = tm.convert_rows_list_to_csv_str(exp_rows)
-        assert df.to_csv() == exp
+        assert df.to_csv(engine=engine) == exp
 
         exp_rows = ["1", "2", "1"]
         exp = tm.convert_rows_list_to_csv_str(exp_rows)
-        assert df.to_csv(index=False) == exp
+        assert df.to_csv(index=False, engine=engine) == exp
 
         df = DataFrame([1], columns=pd.MultiIndex.from_arrays([["foo"], ["bar"]]))
 
         exp_rows = [",foo", ",bar", "0,1"]
         exp = tm.convert_rows_list_to_csv_str(exp_rows)
-        assert df.to_csv() == exp
+        assert df.to_csv(engine=engine) == exp
 
         exp_rows = ["foo", "bar", "1"]
         exp = tm.convert_rows_list_to_csv_str(exp_rows)
-        assert df.to_csv(index=False) == exp
+        assert df.to_csv(index=False, engine=engine) == exp
 
     @pytest.mark.parametrize(
         "ind,expected",
@@ -382,14 +406,16 @@ def test_to_csv_multi_index(self):
             ),
         ],
     )
-    def test_to_csv_single_level_multi_index(self, ind, expected, frame_or_series):
+    def test_to_csv_single_level_multi_index(
+        self, ind, expected, frame_or_series, engine
+    ):
         # see gh-19589
         obj = frame_or_series(pd.Series([1], ind, name="data"))
 
-        result = obj.to_csv(lineterminator="\n", header=True)
+        result = obj.to_csv(lineterminator="\n", header=True, engine=engine)
         assert result == expected
 
-    def test_to_csv_string_array_ascii(self):
+    def test_to_csv_string_array_ascii(self, engine):
         # GH 10813
         str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
         df = DataFrame(str_array)
@@ -399,11 +425,11 @@ def test_to_csv_string_array_ascii(self):
 1,"['baz', 'qux']"
 """
         with tm.ensure_clean("str_test.csv") as path:
-            df.to_csv(path, encoding="ascii")
+            df.to_csv(path, encoding="ascii", engine=engine)
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected_ascii
 
-    def test_to_csv_string_array_utf8(self):
+    def test_to_csv_string_array_utf8(self, engine):
         # GH 10813
         str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
         df = DataFrame(str_array)
@@ -413,11 +439,11 @@ def test_to_csv_string_array_utf8(self):
 1,"['baz', 'qux']"
 """
         with tm.ensure_clean("unicode_test.csv") as path:
-            df.to_csv(path, encoding="utf-8")
+            df.to_csv(path, encoding="utf-8", engine=engine)
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected_utf8
 
-    def test_to_csv_string_with_lf(self):
+    def test_to_csv_string_with_lf(self, engine):
         # GH 20353
         data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]}
         df = DataFrame(data)
@@ -434,24 +460,24 @@ def test_to_csv_string_with_lf(self):
                 + b'3,"g\nh\n\ni"'
                 + os_linesep
             )
-            df.to_csv(path, index=False)
+            df.to_csv(path, index=False, engine=engine)
             with open(path, "rb") as f:
                 assert f.read() == expected_noarg
         with tm.ensure_clean("lf_test.csv") as path:
             # case 2: LF as line terminator
             expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n'
-            df.to_csv(path, lineterminator="\n", index=False)
+            df.to_csv(path, lineterminator="\n", index=False, engine=engine)
             with open(path, "rb") as f:
                 assert f.read() == expected_lf
         with tm.ensure_clean("lf_test.csv") as path:
             # case 3: CRLF as line terminator
             # 'lineterminator' should not change inner element
             expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n'
-            df.to_csv(path, lineterminator="\r\n", index=False)
+            df.to_csv(path, lineterminator="\r\n", index=False, engine=engine)
             with open(path, "rb") as f:
                 assert f.read() == expected_crlf
 
-    def test_to_csv_string_with_crlf(self):
+    def test_to_csv_string_with_crlf(self, engine):
         # GH 20353
         data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]}
         df = DataFrame(data)
@@ -468,13 +494,13 @@ def test_to_csv_string_with_crlf(self):
                 + b'3,"g\r\nh\r\n\r\ni"'
                 + os_linesep
             )
-            df.to_csv(path, index=False)
+            df.to_csv(path, index=False, engine=engine)
             with open(path, "rb") as f:
                 assert f.read() == expected_noarg
         with tm.ensure_clean("crlf_test.csv") as path:
             # case 2: LF as line terminator
             expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n'
-            df.to_csv(path, lineterminator="\n", index=False)
+            df.to_csv(path, lineterminator="\n", index=False, engine=engine)
             with open(path, "rb") as f:
                 assert f.read() == expected_lf
         with tm.ensure_clean("crlf_test.csv") as path:
@@ -486,17 +512,17 @@ def test_to_csv_string_with_crlf(self):
                 b'2,"d\r\nef"\r\n'
                 b'3,"g\r\nh\r\n\r\ni"\r\n'
             )
-            df.to_csv(path, lineterminator="\r\n", index=False)
+            df.to_csv(path, lineterminator="\r\n", index=False, engine=engine)
             with open(path, "rb") as f:
                 assert f.read() == expected_crlf
 
-    def test_to_csv_stdout_file(self, capsys):
+    def test_to_csv_stdout_file(self, capsys, engine):
         # GH 21561
         df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"])
         expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"]
         expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)
 
-        df.to_csv(sys.stdout, encoding="ascii")
+        df.to_csv(sys.stdout, encoding="ascii", engine=engine)
         captured = capsys.readouterr()
 
         assert captured.out == expected_ascii
@@ -510,7 +536,7 @@ def test_to_csv_stdout_file(self, capsys):
             "(https://docs.python.org/3/library/csv.html#csv.writer)"
         ),
     )
-    def test_to_csv_write_to_open_file(self):
+    def test_to_csv_write_to_open_file(self, engine):
         # GH 21696
         df = DataFrame({"a": ["x", "y", "z"]})
         expected = """\
@@ -522,11 +548,11 @@ def test_to_csv_write_to_open_file(self):
         with tm.ensure_clean("test.txt") as path:
             with open(path, "w", encoding="utf-8") as f:
                 f.write("manual header\n")
-                df.to_csv(f, header=None, index=None)
+                df.to_csv(f, header=None, index=None, engine=engine)
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
-    def test_to_csv_write_to_open_file_with_newline_py3(self):
+    def test_to_csv_write_to_open_file_with_newline_py3(self, engine):
         # see gh-21696
         # see gh-20353
         df = DataFrame({"a": ["x", "y", "z"]})
@@ -535,7 +561,7 @@ def test_to_csv_write_to_open_file_with_newline_py3(self):
         with tm.ensure_clean("test.txt") as path:
             with open(path, "w", newline="", encoding="utf-8") as f:
                 f.write("manual header\n")
-                df.to_csv(f, header=None, index=None)
+                df.to_csv(f, header=None, index=None, engine=engine)
 
             with open(path, "rb") as f:
                 assert f.read() == bytes(expected, "utf-8")
@@ -543,7 +569,7 @@ def test_to_csv_write_to_open_file_with_newline_py3(self):
     @pytest.mark.parametrize("to_infer", [True, False])
     @pytest.mark.parametrize("read_infer", [True, False])
     def test_to_csv_compression(
-        self, compression_only, read_infer, to_infer, compression_to_extension
+        self, compression_only, read_infer, to_infer, compression_to_extension, engine
     ):
         # see gh-15008
         compression = compression_only
@@ -558,11 +584,11 @@ def test_to_csv_compression(
         read_compression = "infer" if read_infer else compression
 
         with tm.ensure_clean(filename) as path:
-            df.to_csv(path, compression=to_compression)
+            df.to_csv(path, compression=to_compression, engine=engine)
             result = pd.read_csv(path, index_col=0, compression=read_compression)
             tm.assert_frame_equal(result, df)
 
-    def test_to_csv_compression_dict(self, compression_only):
+    def test_to_csv_compression_dict(self, compression_only, engine):
         # GH 26023
         method = compression_only
         df = DataFrame({"ABC": [1]})
@@ -573,11 +599,11 @@ def test_to_csv_compression_dict(self, compression_only):
         }.get(method, method)
         filename += extension
         with tm.ensure_clean(filename) as path:
-            df.to_csv(path, compression={"method": method})
+            df.to_csv(path, compression={"method": method}, engine=engine)
             read_df = pd.read_csv(path, index_col=0)
             tm.assert_frame_equal(read_df, df)
 
-    def test_to_csv_compression_dict_no_method_raises(self):
+    def test_to_csv_compression_dict_no_method_raises(self, engine):
         # GH 26023
         df = DataFrame({"ABC": [1]})
         compression = {"some_option": True}
@@ -585,16 +611,18 @@ def test_to_csv_compression_dict_no_method_raises(self):
 
         with tm.ensure_clean("out.zip") as path:
             with pytest.raises(ValueError, match=msg):
-                df.to_csv(path, compression=compression)
+                df.to_csv(path, compression=compression, engine=engine)
 
     @pytest.mark.parametrize("compression", ["zip", "infer"])
     @pytest.mark.parametrize("archive_name", ["test_to_csv.csv", "test_to_csv.zip"])
-    def test_to_csv_zip_arguments(self, compression, archive_name):
+    def test_to_csv_zip_arguments(self, compression, archive_name, engine):
         # GH 26023
         df = DataFrame({"ABC": [1]})
         with tm.ensure_clean("to_csv_archive_name.zip") as path:
             df.to_csv(
-                path, compression={"method": compression, "archive_name": archive_name}
+                path,
+                compression={"method": compression, "archive_name": archive_name},
+                engine=engine,
             )
             with ZipFile(path) as zp:
                 assert len(zp.filelist) == 1
@@ -611,33 +639,35 @@ def test_to_csv_zip_arguments(self, compression, archive_name):
             ("archive.zip", "archive"),
         ],
     )
-    def test_to_csv_zip_infer_name(self, tmp_path, filename, expected_arcname):
+    def test_to_csv_zip_infer_name(self, tmp_path, filename, expected_arcname, engine):
         # GH 39465
         df = DataFrame({"ABC": [1]})
         path = tmp_path / filename
-        df.to_csv(path, compression="zip")
+        df.to_csv(path, compression="zip", engine=engine)
         with ZipFile(path) as zp:
             assert len(zp.filelist) == 1
             archived_file = zp.filelist[0].filename
             assert archived_file == expected_arcname
 
     @pytest.mark.parametrize("df_new_type", ["Int64"])
-    def test_to_csv_na_rep_long_string(self, df_new_type):
+    def test_to_csv_na_rep_long_string(self, df_new_type, engine):
         # see gh-25099
         df = DataFrame({"c": [float("nan")] * 3})
         df = df.astype(df_new_type)
         expected_rows = ["c", "mynull", "mynull", "mynull"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
 
-        result = df.to_csv(index=False, na_rep="mynull", encoding="ascii")
+        result = df.to_csv(
+            index=False, na_rep="mynull", encoding="ascii", engine=engine
+        )
 
         assert expected == result
 
-    def test_to_csv_timedelta_precision(self):
+    def test_to_csv_timedelta_precision(self, engine):
         # GH 6783
         s = pd.Series([1, 1]).astype("timedelta64[ns]")
         buf = io.StringIO()
-        s.to_csv(buf)
+        s.to_csv(buf, engine=engine)
         result = buf.getvalue()
         expected_rows = [
             ",0",
@@ -647,32 +677,32 @@ def test_to_csv_timedelta_precision(self):
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
         assert result == expected
 
-    def test_na_rep_truncated(self):
+    def test_na_rep_truncated(self, engine):
         # https://github.com/pandas-dev/pandas/issues/31447
-        result = pd.Series(range(8, 12)).to_csv(na_rep="-")
+        result = pd.Series(range(8, 12)).to_csv(na_rep="-", engine=engine)
         expected = tm.convert_rows_list_to_csv_str([",0", "0,8", "1,9", "2,10", "3,11"])
         assert result == expected
 
-        result = pd.Series([True, False]).to_csv(na_rep="nan")
+        result = pd.Series([True, False]).to_csv(na_rep="nan", engine=engine)
         expected = tm.convert_rows_list_to_csv_str([",0", "0,True", "1,False"])
         assert result == expected
 
-        result = pd.Series([1.1, 2.2]).to_csv(na_rep=".")
+        result = pd.Series([1.1, 2.2]).to_csv(na_rep=".", engine=engine)
         expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"])
         assert result == expected
 
     @pytest.mark.parametrize("errors", ["surrogatepass", "ignore", "replace"])
-    def test_to_csv_errors(self, errors):
+    def test_to_csv_errors(self, errors, engine):
         # GH 22610
         data = ["\ud800foo"]
         ser = pd.Series(data, index=pd.Index(data))
         with tm.ensure_clean("test.csv") as path:
-            ser.to_csv(path, errors=errors)
+            ser.to_csv(path, errors=errors, engine=engine)
         # No use in reading back the data as it is not the same anymore
         # due to the error handling
 
     @pytest.mark.parametrize("mode", ["wb", "w"])
-    def test_to_csv_binary_handle(self, mode):
+    def test_to_csv_binary_handle(self, mode, engine):
         """
         Binary file objects should work (if 'mode' contains a 'b') or even without
         it in most cases.
@@ -682,11 +712,11 @@ def test_to_csv_binary_handle(self, mode):
         df = tm.makeDataFrame()
         with tm.ensure_clean() as path:
             with open(path, mode="w+b") as handle:
-                df.to_csv(handle, mode=mode)
+                df.to_csv(handle, mode=mode, engine=engine)
             tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
 
     @pytest.mark.parametrize("mode", ["wb", "w"])
-    def test_to_csv_encoding_binary_handle(self, mode):
+    def test_to_csv_encoding_binary_handle(self, mode, engine):
         """
         Binary file objects should honor a specified encoding.
 
@@ -698,34 +728,36 @@ def test_to_csv_encoding_binary_handle(self, mode):
         df = pd.read_csv(buffer, encoding="utf-8-sig")
 
         buffer = io.BytesIO()
-        df.to_csv(buffer, mode=mode, encoding="utf-8-sig", index=False)
+        df.to_csv(buffer, mode=mode, encoding="utf-8-sig", index=False, engine=engine)
         buffer.seek(0)  # tests whether file handle wasn't closed
         assert buffer.getvalue().startswith(content)
 
         # example from GH 13068
         with tm.ensure_clean() as path:
             with open(path, "w+b") as handle:
-                DataFrame().to_csv(handle, mode=mode, encoding="utf-8-sig")
+                DataFrame().to_csv(
+                    handle, mode=mode, encoding="utf-8-sig", engine=engine
+                )
 
                 handle.seek(0)
                 assert handle.read().startswith(b'\xef\xbb\xbf""')
 
 
-def test_to_csv_iterative_compression_name(compression):
+def test_to_csv_iterative_compression_name(compression, engine):
     # GH 38714
     df = tm.makeDataFrame()
     with tm.ensure_clean() as path:
-        df.to_csv(path, compression=compression, chunksize=1)
+        df.to_csv(path, compression=compression, chunksize=1, engine=engine)
         tm.assert_frame_equal(
             pd.read_csv(path, compression=compression, index_col=0), df
         )
 
 
-def test_to_csv_iterative_compression_buffer(compression):
+def test_to_csv_iterative_compression_buffer(compression, engine):
     # GH 38714
     df = tm.makeDataFrame()
     with io.BytesIO() as buffer:
-        df.to_csv(buffer, compression=compression, chunksize=1)
+        df.to_csv(buffer, compression=compression, chunksize=1, engine=engine)
         buffer.seek(0)
         tm.assert_frame_equal(
             pd.read_csv(buffer, compression=compression, index_col=0), df

From 8328120b215b416c743247c92b1aa07617b95729 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Sat, 22 Jul 2023 17:17:28 -0700
Subject: [PATCH 02/22] pass more

---
 pandas/core/generic.py                 | 11 +++++++--
 pandas/io/formats/csvs.py              | 34 +++++++++++++++++---------
 pandas/io/formats/format.py            | 12 ++++++---
 pandas/tests/io/formats/test_to_csv.py |  4 ++-
 4 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 79edbf9c1eebe..f8d7b2481f13b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3729,7 +3729,7 @@ def to_csv(
         header: bool_t | list[str] = True,
         index: bool_t = True,
         index_label: IndexLabel | None = None,
-        mode: str = "w",
+        mode: str | None = None,
         encoding: str | None = None,
         compression: CompressionOptions = "infer",
         quoting: int | None = None,
@@ -3786,7 +3786,7 @@ def to_csv(
             sequence should be given if the object uses MultiIndex. If
             False do not print fields for index names. Use index_label=False
             for easier importing in R.
-        mode : {{'w', 'x', 'a'}}, default 'w'
+        mode : {{'w', 'x', 'a'}}, default 'w' (Python engine) or 'wb' (Pyarrow engine)
             Forwarded to either `open(mode=)` or `fsspec.open(mode=)` to control
             the file opening. Typical values include:
 
@@ -3794,6 +3794,8 @@ def to_csv(
             - 'x', exclusive creation, failing if the file already exists.
             - 'a', append to the end of file if it exists.
 
+            NOTE: The pyarrow engine can only handle binary buffers.
+
         encoding : str, optional
             A string representing the encoding to use in the output file,
             defaults to 'utf-8'. `encoding` is not supported if `path_or_buf`
@@ -3903,6 +3905,11 @@ def to_csv(
             decimal=decimal,
         )
 
+        if mode is None:
+            mode = "w"
+            if engine == "pyarrow":
+                mode += "b"
+
         return DataFrameRenderer(formatter).to_csv(
             path_or_buf,
             engine=engine,
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index db9fd8783d9eb..6970e36ebff54 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -20,6 +20,7 @@
 import numpy as np
 
 from pandas._libs import writers as libwriters
+from pandas.compat import pa_version_under11p0
 from pandas.compat._optional import import_optional_dependency
 from pandas.util._decorators import cache_readonly
 
@@ -253,6 +254,8 @@ def save(self) -> None:
             errors=self.errors,
             compression=self.compression,
             storage_options=self.storage_options,
+            # pyarrow engine exclusively writes bytes
+            is_text=self.engine == "python",
         ) as handles:
             # Note: self.encoding is irrelevant here
             self._save(handles.handle)
@@ -262,13 +265,17 @@ def _save_pyarrow(self, handle) -> None:
         pa_csv = import_optional_dependency("pyarrow.csv")
         # Convert index to column and rename name to empty string
         # since we serialize the index as basically a column with no name
-        # TODO: this won't work for multi-indexes
-        obj = self.obj.reset_index(names=[""])
+        # TODO: this won't work for multi-indexes (without names)
+        obj = self.obj
+        if self.index:
+            new_names = [
+                label if label is not None else "" for label in self.obj.index.names
+            ]
+            obj = self.obj.reset_index(names=new_names)
 
         table = pa.Table.from_pandas(obj)
 
         # Map quoting arg to pyarrow equivalents
-        pa_quoting = None
         if self.quoting == csvlib.QUOTE_MINIMAL:
             pa_quoting = "needed"
         elif self.quoting == csvlib.QUOTE_ALL:
@@ -278,18 +285,21 @@ def _save_pyarrow(self, handle) -> None:
         elif self.quoting == csvlib.QUOTE_NONE:
             pa_quoting = "none"
         else:
-            raise ValueError(
+            raise NotImplementedError(
                 f"Quoting option {self.quoting} is not supported with engine='pyarrow'"
             )
 
-        write_options = pa_csv.WriteOptions(
-            include_header=self._need_to_save_header,
-            batch_size=self.chunksize,
-            delimiter=self.sep,
-            quoting_style=pa_quoting,
-        )
-        # pa_csv.write_csv(table, handle, write_options)
-        pa_csv.write_csv(table, self.filepath_or_buffer, write_options)
+        kwargs = {
+            "include_header": self._need_to_save_header,
+            "batch_size": self.chunksize,
+            "delimiter": self.sep,
+        }
+
+        if not pa_version_under11p0:
+            kwargs["quoting_style"] = pa_quoting
+
+        write_options = pa_csv.WriteOptions(**kwargs)
+        pa_csv.write_csv(table, handle, write_options)
 
     def _save(self, handle) -> None:
         if self.engine == "pyarrow":
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index b89f3400675db..5ede0e080302f 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -18,7 +18,10 @@
 )
 from decimal import Decimal
 from functools import partial
-from io import StringIO
+from io import (
+    BytesIO,
+    StringIO,
+)
 import math
 import re
 from shutil import get_terminal_size
@@ -1127,7 +1130,7 @@ def to_csv(
 
         if path_or_buf is None:
             created_buffer = True
-            path_or_buf = StringIO()
+            path_or_buf = StringIO() if engine == "python" else BytesIO()
         else:
             created_buffer = False
 
@@ -1154,8 +1157,11 @@ def to_csv(
         csv_formatter.save()
 
         if created_buffer:
-            assert isinstance(path_or_buf, StringIO)
             content = path_or_buf.getvalue()
+            if isinstance(path_or_buf, BytesIO):
+                # Need to decode into string since the
+                # pyarrow engine only writes binary data
+                content = content.decode("utf-8")
             path_or_buf.close()
             return content
 
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index eeb2a1b8a2c56..bd17cc59c3c5a 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -546,7 +546,8 @@ def test_to_csv_write_to_open_file(self, engine):
 z
 """
         with tm.ensure_clean("test.txt") as path:
-            with open(path, "w", encoding="utf-8") as f:
+            # TODO: open in bytes mode for pyarrow
+            with open(path, encoding="utf-8") as f:
                 f.write("manual header\n")
                 df.to_csv(f, header=None, index=None, engine=engine)
             with open(path, encoding="utf-8") as f:
@@ -559,6 +560,7 @@ def test_to_csv_write_to_open_file_with_newline_py3(self, engine):
         expected_rows = ["x", "y", "z"]
         expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
         with tm.ensure_clean("test.txt") as path:
+            # TODO: Open in bytes mode for pyarrow
             with open(path, "w", newline="", encoding="utf-8") as f:
                 f.write("manual header\n")
                 df.to_csv(f, header=None, index=None, engine=engine)

From a889ebf5fcfd730215ace4124ff180052504afb9 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Thu, 3 Aug 2023 11:36:25 -0700
Subject: [PATCH 03/22] xfail everything

---
 pandas/tests/io/formats/test_to_csv.py | 44 ++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index bd17cc59c3c5a..a8926c3cf1ccf 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -21,7 +21,22 @@ def engine(request):
     return request.param
 
 
+@pytest.fixture
+def pyarrow_xfail(request):
+    """
+    Fixture that xfails a test if the engine is pyarrow.
+    """
+    engine = request.getfixturevalue("engine")
+    if engine == "pyarrow":
+        mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
+        request.node.add_marker(mark)
+
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+
+
 class TestToCSV:
+    @xfail_pyarrow
     def test_to_csv_with_single_column(self, engine):
         # see gh-18676, https://bugs.python.org/issue32255
         #
@@ -59,6 +74,7 @@ def test_to_csv_default_encoding(self, engine):
             df.to_csv(path, engine=engine)
             tm.assert_frame_equal(pd.read_csv(path, index_col=0), df)
 
+    @xfail_pyarrow
     def test_to_csv_quotechar(self, engine):
         df = DataFrame({"col": [1, 2]})
         expected = """\
@@ -131,12 +147,14 @@ def test_to_csv_escapechar(self, engine=engine):
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
+    @xfail_pyarrow
     def test_csv_to_string(self, engine):
         df = DataFrame({"col": [1, 2]})
         expected_rows = [",col", "0,1", "1,2"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
         assert df.to_csv(engine=engine) == expected
 
+    @xfail_pyarrow
     def test_to_csv_decimal(self, engine):
         # see gh-781
         df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]})
@@ -176,7 +194,8 @@ def test_to_csv_decimal(self, engine):
         # same for a multi-index
         assert df.set_index(["a", "b"]).to_csv(engine=engine, decimal="^") == expected
 
-    def test_to_csv_float_format(self):
+    @xfail_pyarrow
+    def test_to_csv_float_format(self, engine):
         # testing if float_format is taken into account for the index
         # GH 11553
         df = DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1})
@@ -191,7 +210,8 @@ def test_to_csv_float_format(self):
             == expected
         )
 
-    def test_to_csv_na_rep(self):
+    @xfail_pyarrow
+    def test_to_csv_na_rep(self, engine):
         # see gh-11553
         #
         # Testing if NaN values are correctly represented in the index.
@@ -222,6 +242,7 @@ def test_to_csv_na_rep(self):
         expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
         assert expected == csv
 
+    @xfail_pyarrow
     def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype, engine):
         # GH 29975
         # Make sure full na_rep shows up when a dtype is provided
@@ -231,6 +252,7 @@ def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype, engine):
         )
         assert expected == csv
 
+    @xfail_pyarrow
     def test_to_csv_date_format(self, engine):
         # GH 10209
         df_sec = DataFrame({"A": pd.date_range("20130101", periods=5, freq="s")})
@@ -302,6 +324,7 @@ def test_to_csv_date_format(self, engine):
             == expected_ymd_sec
         )
 
+    @xfail_pyarrow
     def test_to_csv_different_datetime_formats(self, engine):
         # GH#21734
         df = DataFrame(
@@ -318,6 +341,7 @@ def test_to_csv_different_datetime_formats(self, engine):
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
         assert df.to_csv(index=False, engine=engine) == expected
 
+    @xfail_pyarrow
     def test_to_csv_date_format_in_categorical(self, engine):
         # GH#40754
         ser = pd.Series(pd.to_datetime(["2021-03-27", pd.NaT], format="%Y-%m-%d"))
@@ -335,6 +359,7 @@ def test_to_csv_date_format_in_categorical(self, engine):
             ser.to_csv(index=False, engine=engine, date_format="%Y-%m-%d") == expected
         )
 
+    @xfail_pyarrow
     def test_to_csv_float_ea_float_format(self, engine):
         # GH#45991
         df = DataFrame({"a": [1.1, 2.02, pd.NA, 6.000006], "b": "c"})
@@ -345,6 +370,7 @@ def test_to_csv_float_ea_float_format(self, engine):
         )
         assert result == expected
 
+    @xfail_pyarrow
     def test_to_csv_float_ea_no_float_format(self, engine):
         # GH#45991
         df = DataFrame({"a": [1.1, 2.02, pd.NA, 6.000006], "b": "c"})
@@ -355,6 +381,7 @@ def test_to_csv_float_ea_no_float_format(self, engine):
         )
         assert result == expected
 
+    @xfail_pyarrow
     def test_to_csv_multi_index(self, engine):
         # see gh-6618
         df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))
@@ -391,6 +418,7 @@ def test_to_csv_multi_index(self, engine):
         exp = tm.convert_rows_list_to_csv_str(exp_rows)
         assert df.to_csv(index=False, engine=engine) == exp
 
+    @xfail_pyarrow
     @pytest.mark.parametrize(
         "ind,expected",
         [
@@ -415,6 +443,7 @@ def test_to_csv_single_level_multi_index(
         result = obj.to_csv(lineterminator="\n", header=True, engine=engine)
         assert result == expected
 
+    @xfail_pyarrow
     def test_to_csv_string_array_ascii(self, engine):
         # GH 10813
         str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
@@ -429,6 +458,7 @@ def test_to_csv_string_array_ascii(self, engine):
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected_ascii
 
+    @xfail_pyarrow
     def test_to_csv_string_array_utf8(self, engine):
         # GH 10813
         str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
@@ -443,6 +473,7 @@ def test_to_csv_string_array_utf8(self, engine):
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected_utf8
 
+    @xfail_pyarrow
     def test_to_csv_string_with_lf(self, engine):
         # GH 20353
         data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]}
@@ -477,6 +508,7 @@ def test_to_csv_string_with_lf(self, engine):
             with open(path, "rb") as f:
                 assert f.read() == expected_crlf
 
+    @xfail_pyarrow
     def test_to_csv_string_with_crlf(self, engine):
         # GH 20353
         data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]}
@@ -516,6 +548,7 @@ def test_to_csv_string_with_crlf(self, engine):
             with open(path, "rb") as f:
                 assert f.read() == expected_crlf
 
+    @xfail_pyarrow
     def test_to_csv_stdout_file(self, capsys, engine):
         # GH 21561
         df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"])
@@ -528,6 +561,7 @@ def test_to_csv_stdout_file(self, capsys, engine):
         assert captured.out == expected_ascii
         assert not sys.stdout.closed
 
+    @xfail_pyarrow
     @pytest.mark.xfail(
         compat.is_platform_windows(),
         reason=(
@@ -553,6 +587,7 @@ def test_to_csv_write_to_open_file(self, engine):
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
+    @xfail_pyarrow
     def test_to_csv_write_to_open_file_with_newline_py3(self, engine):
         # see gh-21696
         # see gh-20353
@@ -651,6 +686,7 @@ def test_to_csv_zip_infer_name(self, tmp_path, filename, expected_arcname, engin
             archived_file = zp.filelist[0].filename
             assert archived_file == expected_arcname
 
+    @xfail_pyarrow
     @pytest.mark.parametrize("df_new_type", ["Int64"])
     def test_to_csv_na_rep_long_string(self, df_new_type, engine):
         # see gh-25099
@@ -665,6 +701,7 @@ def test_to_csv_na_rep_long_string(self, df_new_type, engine):
 
         assert expected == result
 
+    @xfail_pyarrow
     def test_to_csv_timedelta_precision(self, engine):
         # GH 6783
         s = pd.Series([1, 1]).astype("timedelta64[ns]")
@@ -679,6 +716,7 @@ def test_to_csv_timedelta_precision(self, engine):
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
         assert result == expected
 
+    @xfail_pyarrow
     def test_na_rep_truncated(self, engine):
         # https://github.com/pandas-dev/pandas/issues/31447
         result = pd.Series(range(8, 12)).to_csv(na_rep="-", engine=engine)
@@ -693,6 +731,7 @@ def test_na_rep_truncated(self, engine):
         expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"])
         assert result == expected
 
+    @xfail_pyarrow
     @pytest.mark.parametrize("errors", ["surrogatepass", "ignore", "replace"])
     def test_to_csv_errors(self, errors, engine):
         # GH 22610
@@ -717,6 +756,7 @@ def test_to_csv_binary_handle(self, mode, engine):
                 df.to_csv(handle, mode=mode, engine=engine)
             tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
 
+    @xfail_pyarrow
     @pytest.mark.parametrize("mode", ["wb", "w"])
     def test_to_csv_encoding_binary_handle(self, mode, engine):
         """

From 1f7ffea6c7b71a911ddb2bf79c7b256cea0a44d5 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Thu, 3 Aug 2023 12:55:06 -0700
Subject: [PATCH 04/22] revert unintentional change

---
 pandas/tests/io/formats/test_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index a8926c3cf1ccf..0472285968d68 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -581,7 +581,7 @@ def test_to_csv_write_to_open_file(self, engine):
 """
         with tm.ensure_clean("test.txt") as path:
             # TODO: open in bytes mode for pyarrow
-            with open(path, encoding="utf-8") as f:
+            with open(path, "w", encoding="utf-8") as f:
                 f.write("manual header\n")
                 df.to_csv(f, header=None, index=None, engine=engine)
             with open(path, encoding="utf-8") as f:

From faeed4c579472485607fc73f839d05a3c7e2bcc9 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Thu, 3 Aug 2023 16:37:20 -0700
Subject: [PATCH 05/22] fix typing and tests

---
 pandas/io/formats/csvs.py   | 22 ++++++++++++++++------
 pandas/io/formats/format.py |  6 +++++-
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 6970e36ebff54..4e69aa3c47685 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -12,15 +12,20 @@
 import csv as csvlib
 import os
 from typing import (
+    IO,
     TYPE_CHECKING,
     Any,
+    AnyStr,
     cast,
 )
 
 import numpy as np
 
 from pandas._libs import writers as libwriters
-from pandas.compat import pa_version_under11p0
+from pandas.compat import (
+    pa_version_under8p0,
+    pa_version_under11p0,
+)
 from pandas.compat._optional import import_optional_dependency
 from pandas.util._decorators import cache_readonly
 
@@ -258,9 +263,12 @@ def save(self) -> None:
             is_text=self.engine == "python",
         ) as handles:
             # Note: self.encoding is irrelevant here
-            self._save(handles.handle)
 
-    def _save_pyarrow(self, handle) -> None:
+            # This is a mypy bug?
+            # error: Cannot infer type argument 1 of "_save" of "CSVFormatter"  [misc]
+            self._save(handles.handle)  # type: ignore[misc]
+
+    def _save_pyarrow(self, handle: IO[AnyStr]) -> None:
         pa = import_optional_dependency("pyarrow")
         pa_csv = import_optional_dependency("pyarrow.csv")
         # Convert index to column and rename name to empty string
@@ -289,19 +297,21 @@ def _save_pyarrow(self, handle) -> None:
                 f"Quoting option {self.quoting} is not supported with engine='pyarrow'"
             )
 
-        kwargs = {
+        kwargs: dict[str, Any] = {
             "include_header": self._need_to_save_header,
             "batch_size": self.chunksize,
-            "delimiter": self.sep,
         }
 
+        if not pa_version_under8p0:
+            kwargs["delimiter"] = self.sep
+
         if not pa_version_under11p0:
             kwargs["quoting_style"] = pa_quoting
 
         write_options = pa_csv.WriteOptions(**kwargs)
         pa_csv.write_csv(table, handle, write_options)
 
-    def _save(self, handle) -> None:
+    def _save(self, handle: IO[AnyStr]) -> None:
         if self.engine == "pyarrow":
             self._save_pyarrow(handle)
         else:
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 5ede0e080302f..8506d305d90e1 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -31,6 +31,7 @@
     Any,
     Callable,
     Final,
+    Union,
     cast,
 )
 from unicodedata import east_asian_width
@@ -1157,11 +1158,14 @@ def to_csv(
         csv_formatter.save()
 
         if created_buffer:
+            path_or_buf = cast(Union[BytesIO, StringIO], path_or_buf)
             content = path_or_buf.getvalue()
-            if isinstance(path_or_buf, BytesIO):
+            if isinstance(content, bytes):
                 # Need to decode into string since the
                 # pyarrow engine only writes binary data
+                # content = cast(bytes, content)
                 content = content.decode("utf-8")
+                # content = cast(str, content)
             path_or_buf.close()
             return content
 

From 47d48f143e62b7bd773460b8c6781f9befd9b2e7 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Thu, 3 Aug 2023 20:18:55 -0700
Subject: [PATCH 06/22] green everything?

---
 doc/source/whatsnew/v2.1.0.rst         | 2 +-
 pandas/tests/io/formats/test_to_csv.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 17894914b44d1..fca5d14400468 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -212,8 +212,8 @@ Other enhancements
 - Improved error message when :meth:`DataFrameGroupBy.agg` failed (:issue:`52930`)
 - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`)
 - Reductions :meth:`Series.argmax`, :meth:`Series.argmin`, :meth:`Series.idxmax`, :meth:`Series.idxmin`, :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`DataFrame.idxmax`, :meth:`DataFrame.idxmin` are now supported for object-dtype objects (:issue:`4279`, :issue:`18021`, :issue:`40685`, :issue:`43697`)
+- Allow using pyarrow to serialize :class:`DataFrame` and :class:`Series` to CSV with ``engine="pyarrow"`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` (:issue:`53618`)
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`51722`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.notable_bug_fixes:
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 0472285968d68..29eac1ea4d930 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -18,6 +18,8 @@
 @pytest.fixture(params=["python", "pyarrow"])
 def engine(request):
     # TODO: Skip if pyarrow not found
+    if request.param == "pyarrow":
+        pytest.importorskip("pyarrow")
     return request.param
 
 

From ae9f87cbe7714092c668ce29339f2b58eb509835 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 4 Aug 2023 18:57:28 +0000
Subject: [PATCH 07/22] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 doc/source/whatsnew/v2.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 4f522f48569da..b7414f2d278dd 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -217,8 +217,8 @@ Other enhancements
 - Improved error message when :meth:`DataFrameGroupBy.agg` failed (:issue:`52930`)
 - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`)
 - Reductions :meth:`Series.argmax`, :meth:`Series.argmin`, :meth:`Series.idxmax`, :meth:`Series.idxmin`, :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`DataFrame.idxmax`, :meth:`DataFrame.idxmin` are now supported for object-dtype objects (:issue:`4279`, :issue:`18021`, :issue:`40685`, :issue:`43697`)
-- Allow using pyarrow to serialize :class:`DataFrame` and :class:`Series` to CSV with ``engine="pyarrow"`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` (:issue:`53618`)
 - :meth:`DataFrame.to_parquet` and :func:`read_parquet` will now write and read ``attrs`` respectively (:issue:`54346`)
+- Allow using pyarrow to serialize :class:`DataFrame` and :class:`Series` to CSV with ``engine="pyarrow"`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` (:issue:`53618`)
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`51722`)
 
 .. ---------------------------------------------------------------------------

From c49309ca57bb75674ddbd9838e22691a2767af08 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Tue, 8 Aug 2023 02:16:58 -0700
Subject: [PATCH 08/22] move option to end

---
 pandas/core/generic.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 798be98a4d447..e26f526c16270 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3663,7 +3663,6 @@ def to_csv(
         path_or_buf: None = ...,
         sep: str = ...,
         na_rep: str = ...,
-        engine: str = "python",
         float_format: str | Callable | None = ...,
         columns: Sequence[Hashable] | None = ...,
         header: bool_t | list[str] = ...,
@@ -3682,6 +3681,7 @@ def to_csv(
         decimal: str = ...,
         errors: OpenFileErrors = ...,
         storage_options: StorageOptions = ...,
+        engine: str = "python",
     ) -> str:
         ...
 
@@ -3691,7 +3691,6 @@ def to_csv(
         path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str],
         sep: str = ...,
         na_rep: str = ...,
-        engine: str = "python",
         float_format: str | Callable | None = ...,
         columns: Sequence[Hashable] | None = ...,
         header: bool_t | list[str] = ...,
@@ -3710,6 +3709,7 @@ def to_csv(
         decimal: str = ...,
         errors: OpenFileErrors = ...,
         storage_options: StorageOptions = ...,
+        engine: str = "python",
     ) -> None:
         ...
 
@@ -3723,7 +3723,6 @@ def to_csv(
         path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
         sep: str = ",",
         na_rep: str = "",
-        engine: str = "python",
         float_format: str | Callable | None = None,
         columns: Sequence[Hashable] | None = None,
         header: bool_t | list[str] = True,
@@ -3742,6 +3741,7 @@ def to_csv(
         decimal: str = ".",
         errors: OpenFileErrors = "strict",
         storage_options: StorageOptions | None = None,
+        engine: str = "python",
     ) -> str | None:
         r"""
         Write object to a comma-separated values (csv) file.
@@ -3763,13 +3763,6 @@ def to_csv(
             String of length 1. Field delimiter for the output file.
         na_rep : str, default ''
             Missing data representation.
-        engine : str, default 'python'
-            The engine to use. Available options are "pyarrow" or "python".
-            The pyarrow engine requires the pyarrow library to be installed
-            and is generally faster than the python engine.
-
-            However, the python engine may be more feature complete than the
-            pyarrow engine.
         float_format : str, Callable, default None
             Format string for floating point numbers. If a Callable is given, it takes
             precedence over other numeric formatting parameters, like decimal.
@@ -3856,6 +3849,16 @@ def to_csv(
 
             .. versionadded:: 1.2.0
 
+        engine : str, default 'python'
+            The engine to use. Available options are "pyarrow" or "python".
+            The pyarrow engine requires the pyarrow library to be installed
+            and is generally faster than the python engine.
+
+            However, the python engine may be more feature complete than the
+            pyarrow engine.
+
+            .. versionadded:: 2.1.0
+
         Returns
         -------
         None or str

From da130914c8f4df2834835e2e68eea6067eb15d7e Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Thu, 16 Nov 2023 14:33:17 -0500
Subject: [PATCH 09/22] Update csvs.py

---
 pandas/io/formats/csvs.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index e654617447376..8aa427e1170f0 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -23,6 +23,10 @@
 import numpy as np
 
 from pandas._libs import writers as libwriters
+from pandas.compat import (
+    pa_version_under8p0,
+    pa_version_under11p0,
+)
 from pandas.compat._optional import import_optional_dependency
 from pandas._typing import SequenceNotStr
 from pandas.util._decorators import cache_readonly

From 6345ab53aba3991e4123ded010d11ec3059ecb2b Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Wed, 22 Nov 2023 12:18:56 -0500
Subject: [PATCH 10/22] Update csvs.py

---
 pandas/io/formats/csvs.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 8aa427e1170f0..e50cb63c1bc77 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -23,10 +23,7 @@
 import numpy as np
 
 from pandas._libs import writers as libwriters
-from pandas.compat import (
-    pa_version_under8p0,
-    pa_version_under11p0,
-)
+from pandas.compat import pa_version_under11p0
 from pandas.compat._optional import import_optional_dependency
 from pandas._typing import SequenceNotStr
 from pandas.util._decorators import cache_readonly
@@ -306,9 +303,7 @@ def _save_pyarrow(self, handle: IO[AnyStr]) -> None:
             "include_header": self._need_to_save_header,
             "batch_size": self.chunksize,
         }
-
-        if not pa_version_under8p0:
-            kwargs["delimiter"] = self.sep
+        kwargs["delimiter"] = self.sep
 
         if not pa_version_under11p0:
             kwargs["quoting_style"] = pa_quoting

From bde1a2b2ae84683dca907b7f6f7001be8b9f9351 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Wed, 22 Nov 2023 17:37:16 -0500
Subject: [PATCH 11/22] green and move whatsnew

---
 doc/source/whatsnew/v2.1.0.rst | 1 -
 doc/source/whatsnew/v2.2.0.rst | 1 +
 pandas/io/formats/csvs.py      | 7 +++++--
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 9817b5a12f212..51b4c4f297b07 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -273,7 +273,6 @@ Other enhancements
 - :meth:`DataFrame.to_parquet` and :func:`read_parquet` will now write and read ``attrs`` respectively (:issue:`54346`)
 - :meth:`Index.all` and :meth:`Index.any` with floating dtypes and timedelta64 dtypes no longer raise ``TypeError``, matching the :meth:`Series.all` and :meth:`Series.any` behavior (:issue:`54566`)
 - :meth:`Series.cummax`, :meth:`Series.cummin` and :meth:`Series.cumprod` are now supported for pyarrow dtypes with pyarrow version 13.0 and above (:issue:`52085`)
-- Allow using pyarrow to serialize :class:`DataFrame` and :class:`Series` to CSV with ``engine="pyarrow"`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` (:issue:`53618`)
 - Added support for the DataFrame Consortium Standard (:issue:`54383`)
 - Performance improvement in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` (:issue:`51722`)
 - PyArrow-backed integer dtypes now support bitwise operations (:issue:`54495`)
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 280eb11abb781..3795784c64e75 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -102,6 +102,7 @@ Other enhancements
 - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
 - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
 - Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
+- Allow using pyarrow to serialize :class:`DataFrame` and :class:`Series` to CSV with ``engine="pyarrow"`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` (:issue:`53618`)
 - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
 - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
 - Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`)
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index e50cb63c1bc77..4a0484bbac972 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -23,9 +23,9 @@
 import numpy as np
 
 from pandas._libs import writers as libwriters
+from pandas._typing import SequenceNotStr
 from pandas.compat import pa_version_under11p0
 from pandas.compat._optional import import_optional_dependency
-from pandas._typing import SequenceNotStr
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.generic import (
@@ -316,7 +316,10 @@ def _save(self, handle: IO[AnyStr]) -> None:
             self._save_pyarrow(handle)
         else:
             self.writer = csvlib.writer(
-                handle,
+                # error: Argument of type "IO[AnyStr@_save]" cannot be assigned
+                # to parameter "csvfile" of type "SupportsWrite[str]"
+                # in function "writer"
+                handle,  # pyright: ignore[reportGeneralTypeIssues]
                 lineterminator=self.lineterminator,
                 delimiter=self.sep,
                 quoting=self.quoting,

From cb5f6cd36145628acba87db0b665aa3b7c5ac0b0 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Sun, 26 Nov 2023 13:54:53 -0500
Subject: [PATCH 12/22] updates

---
 pandas/core/generic.py                 |  3 ++-
 pandas/io/formats/csvs.py              |  9 +++++++++
 pandas/tests/io/formats/test_to_csv.py | 25 ++++++++++++++++++++-----
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index fe53bc6c08239..2f53ff68fc3e0 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3832,7 +3832,8 @@ def to_csv(
             - 'x', exclusive creation, failing if the file already exists.
             - 'a', append to the end of file if it exists.
 
-            NOTE: The pyarrow engine can only handle binary buffers.
+            .. note::
+                The pyarrow engine can only handle binary buffers.
 
         encoding : str, optional
             A string representing the encoding to use in the output file,
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 4a0484bbac972..119a0a16809d0 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -11,6 +11,7 @@
     Sequence,
 )
 import csv as csvlib
+import io
 import os
 from typing import (
     IO,
@@ -253,6 +254,14 @@ def save(self) -> None:
         """
         Create the writer & save.
         """
+        if self.engine == "pyarrow":
+            if "b" not in self.mode or isinstance(
+                self.filepath_or_buffer, io.TextIOBase
+            ):
+                raise ValueError(
+                    "The pyarrow engine can only open file in binary mode."
+                )
+
         # apply compression and byte/text conversion
         with get_handle(
             self.filepath_or_buffer,
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 79f51f1047492..c12cac6c82937 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -1,3 +1,4 @@
+import contextlib
 import io
 import os
 import sys
@@ -17,7 +18,6 @@
 
 @pytest.fixture(params=["python", "pyarrow"])
 def engine(request):
-    # TODO: Skip if pyarrow not found
     if request.param == "pyarrow":
         pytest.importorskip("pyarrow")
     return request.param
@@ -31,7 +31,7 @@ def pyarrow_xfail(request):
     engine = request.getfixturevalue("engine")
     if engine == "pyarrow":
         mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
@@ -582,10 +582,17 @@ def test_to_csv_write_to_open_file(self, engine):
 z
 """
         with tm.ensure_clean("test.txt") as path:
-            # TODO: open in bytes mode for pyarrow
             with open(path, "w", encoding="utf-8") as f:
                 f.write("manual header\n")
-                df.to_csv(f, header=None, index=None, engine=engine)
+                if engine == "pyarrow":
+                    raise_if_pyarrow = pytest.raises(
+                        ValueError,
+                        match="The pyarrow engine can only open file in abinary mode.",
+                    )
+                else:
+                    raise_if_pyarrow = contextlib.nullcontext()
+                with raise_if_pyarrow:
+                    df.to_csv(f, header=None, index=None, engine=engine)
             with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
@@ -600,7 +607,15 @@ def test_to_csv_write_to_open_file_with_newline_py3(self, engine):
             # TODO: Open in bytes mode for pyarrow
             with open(path, "w", newline="", encoding="utf-8") as f:
                 f.write("manual header\n")
-                df.to_csv(f, header=None, index=None, engine=engine)
+                if engine == "pyarrow":
+                    raise_if_pyarrow = pytest.raises(
+                        ValueError,
+                        match="The pyarrow engine can only open file in abinary mode.",
+                    )
+                else:
+                    raise_if_pyarrow = contextlib.nullcontext()
+                with raise_if_pyarrow:
+                    df.to_csv(f, header=None, index=None, engine=engine)
 
             with open(path, "rb") as f:
                 assert f.read() == bytes(expected, "utf-8")

From 3d95a92709f92db598a147ee9b6b46cbe9fe767c Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Sun, 26 Nov 2023 15:10:22 -0500
Subject: [PATCH 13/22] address code review

---
 pandas/io/formats/csvs.py              |  34 ++-
 pandas/io/formats/format.py            |   2 -
 pandas/tests/io/formats/test_to_csv.py | 320 ++++++++++++++++---------
 3 files changed, 239 insertions(+), 117 deletions(-)

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 119a0a16809d0..3606aaaf92983 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -254,13 +254,10 @@ def save(self) -> None:
         """
         Create the writer & save.
         """
-        if self.engine == "pyarrow":
-            if "b" not in self.mode or isinstance(
-                self.filepath_or_buffer, io.TextIOBase
-            ):
-                raise ValueError(
-                    "The pyarrow engine can only open file in binary mode."
-                )
+        if self.engine == "pyarrow" and (
+            "b" not in self.mode or isinstance(self.filepath_or_buffer, io.TextIOBase)
+        ):
+            raise ValueError("The pyarrow engine can only open files in binary mode.")
 
         # apply compression and byte/text conversion
         with get_handle(
@@ -282,6 +279,27 @@ def save(self) -> None:
     def _save_pyarrow(self, handle: IO[AnyStr]) -> None:
         pa = import_optional_dependency("pyarrow")
         pa_csv = import_optional_dependency("pyarrow.csv")
+
+        if self.quotechar is not None and self.quotechar != '"':
+            raise ValueError('The pyarrow engine only supports " as a quotechar.')
+
+        unsupported_options = [
+            # each pair is (option value, default, option name)
+            (self.decimal, ".", "decimal"),
+            (self.float_format, None, "float_format"),
+            (self.na_rep, "", "na_rep"),
+            (self.date_format, None, "date_foramt"),
+            (self.lineterminator, os.linesep, "lineterminator"),
+            (self.encoding, None, "encoding"),
+            (self.errors, "strict", "errors"),
+        ]
+
+        for opt_val, default, option in unsupported_options:
+            if opt_val != default:
+                raise ValueError(
+                    f"The {option} option is not supported with the pyarrow engine."
+                )
+
         # Convert index to column and rename name to empty string
         # since we serialize the index as basically a column with no name
         # TODO: this won't work for multi-indexes (without names)
@@ -297,6 +315,8 @@ def _save_pyarrow(self, handle: IO[AnyStr]) -> None:
         # Map quoting arg to pyarrow equivalents
         if self.quoting == csvlib.QUOTE_MINIMAL:
             pa_quoting = "needed"
+        elif self.quotechar is None:
+            raise TypeError("quotechar must be set if quoting enabled")
         elif self.quoting == csvlib.QUOTE_ALL:
             # TODO: Is this a 1-1 mapping?
             # This doesn't quote nulls, check if Python does this
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 87bbd6e170da4..55061e3ff37e7 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1028,9 +1028,7 @@ def to_csv(
             if isinstance(content, bytes):
                 # Need to decode into string since the
                 # pyarrow engine only writes binary data
-                # content = cast(bytes, content)
                 content = content.decode("utf-8")
-                # content = cast(str, content)
             path_or_buf.close()
             return content
 
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index c12cac6c82937..9e818201e62cf 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -34,6 +34,21 @@ def pyarrow_xfail(request):
         request.applymarker(mark)
 
 
+def check_raises_if_pyarrow(option, engine):
+    """
+    Returns a context manager that ensures that the pyarrow engine raises an
+    exception for unsupported options.
+    """
+    if engine == "pyarrow":
+        raises_if_pyarrow = pytest.raises(
+            ValueError,
+            match=f"The {option} option is not supported with the pyarrow engine.",
+        )
+    else:
+        raises_if_pyarrow = contextlib.nullcontext()
+    return raises_if_pyarrow
+
+
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
 
@@ -76,7 +91,6 @@ def test_to_csv_default_encoding(self, engine):
             df.to_csv(path, engine=engine)
             tm.assert_frame_equal(pd.read_csv(path, index_col=0), df)
 
-    @xfail_pyarrow
     def test_to_csv_quotechar(self, engine):
         df = DataFrame({"col": [1, 2]})
         expected = """\
@@ -97,9 +111,18 @@ def test_to_csv_quotechar(self, engine):
 """
 
         with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, quoting=1, quotechar="$", engine=engine)
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected
+            if engine == "pyarrow":
+                raises_if_pyarrow = pytest.raises(
+                    ValueError,
+                    match='The pyarrow engine only supports " as a quotechar.',
+                )
+            else:
+                raises_if_pyarrow = contextlib.nullcontext()
+            with raises_if_pyarrow:
+                df.to_csv(path, quoting=1, quotechar="$", engine=engine)
+            if engine != "pyarrow":
+                with open(path, encoding="utf-8") as f:
+                    assert f.read() == expected
 
         with tm.ensure_clean("test.csv") as path:
             with pytest.raises(TypeError, match="quotechar"):
@@ -159,104 +182,137 @@ def test_csv_to_string(self, engine):
     @xfail_pyarrow
     def test_to_csv_decimal(self, engine):
         # see gh-781
+        raises_if_pyarrow = check_raises_if_pyarrow("decimal", engine)
         df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]})
 
         expected_rows = [",col1,col2,col3", "0,1,a,10.1"]
         expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
+        # This assert fails for the pyarrow engine since it quotes strings
+        # and the Python engine doesn't
         assert df.to_csv(engine=engine) == expected_default
 
         expected_rows = [";col1;col2;col3", "0;1;a;10,1"]
         expected_european_excel = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df.to_csv(engine=engine, decimal=",", sep=";") == expected_european_excel
+        with raises_if_pyarrow:
+            assert (
+                df.to_csv(engine=engine, decimal=",", sep=";")
+                == expected_european_excel
+            )
 
         expected_rows = [",col1,col2,col3", "0,1,a,10.10"]
         expected_float_format_default = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert (
-            df.to_csv(engine=engine, float_format="%.2f")
-            == expected_float_format_default
-        )
+        with raises_if_pyarrow:
+            assert (
+                df.to_csv(engine=engine, float_format="%.2f")
+                == expected_float_format_default
+            )
 
         expected_rows = [";col1;col2;col3", "0;1;a;10,10"]
         expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert (
-            df.to_csv(engine=engine, decimal=",", sep=";", float_format="%.2f")
-            == expected_float_format
-        )
+        with raises_if_pyarrow:
+            assert (
+                df.to_csv(engine=engine, decimal=",", sep=";", float_format="%.2f")
+                == expected_float_format
+            )
 
         # see gh-11553: testing if decimal is taken into account for '0.0'
         df = DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1})
 
         expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df.to_csv(engine=engine, index=False, decimal="^") == expected
+        with raises_if_pyarrow:
+            assert df.to_csv(engine=engine, index=False, decimal="^") == expected
 
         # same but for an index
-        assert df.set_index("a").to_csv(engine=engine, decimal="^") == expected
+        with raises_if_pyarrow:
+            assert df.set_index("a").to_csv(engine=engine, decimal="^") == expected
 
         # same for a multi-index
-        assert df.set_index(["a", "b"]).to_csv(engine=engine, decimal="^") == expected
+        with raises_if_pyarrow:
+            assert (
+                df.set_index(["a", "b"]).to_csv(engine=engine, decimal="^") == expected
+            )
 
-    @xfail_pyarrow
     def test_to_csv_float_format(self, engine):
         # testing if float_format is taken into account for the index
         # GH 11553
+        raises_if_pyarrow = check_raises_if_pyarrow("float_format", engine)
         df = DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1})
 
         expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df.set_index("a").to_csv(engine=engine, float_format="%.2f") == expected
+        with raises_if_pyarrow:
+            assert (
+                df.set_index("a").to_csv(engine=engine, float_format="%.2f") == expected
+            )
 
         # same for a multi-index
-        assert (
-            df.set_index(["a", "b"]).to_csv(engine=engine, float_format="%.2f")
-            == expected
-        )
+        with raises_if_pyarrow:
+            assert (
+                df.set_index(["a", "b"]).to_csv(engine=engine, float_format="%.2f")
+                == expected
+            )
 
-    @xfail_pyarrow
     def test_to_csv_na_rep(self, engine):
         # see gh-11553
         #
         # Testing if NaN values are correctly represented in the index.
+        raises_if_pyarrow = check_raises_if_pyarrow("na_rep", engine)
         df = DataFrame({"a": [0, np.nan], "b": [0, 1], "c": [2, 3]})
         expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
 
-        assert df.set_index("a").to_csv(engine=engine, na_rep="_") == expected
-        assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
+        with raises_if_pyarrow:
+            assert df.set_index("a").to_csv(engine=engine, na_rep="_") == expected
+        with raises_if_pyarrow:
+            assert (
+                df.set_index(["a", "b"]).to_csv(engine=engine, na_rep="_") == expected
+            )
 
         # now with an index containing only NaNs
         df = DataFrame({"a": np.nan, "b": [0, 1], "c": [2, 3]})
         expected_rows = ["a,b,c", "_,0,2", "_,1,3"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
 
-        assert df.set_index("a").to_csv(engine=engine, na_rep="_") == expected
-        assert df.set_index(["a", "b"]).to_csv(engine=engine, na_rep="_") == expected
+        with raises_if_pyarrow:
+            assert df.set_index("a").to_csv(engine=engine, na_rep="_") == expected
+        with raises_if_pyarrow:
+            assert (
+                df.set_index(["a", "b"]).to_csv(engine=engine, na_rep="_") == expected
+            )
 
         # check if na_rep parameter does not break anything when no NaN
         df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]})
         expected_rows = ["a,b,c", "0,0,2", "0,1,3"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
 
-        assert df.set_index("a").to_csv(engine=engine, na_rep="_") == expected
-        assert df.set_index(["a", "b"]).to_csv(engine=engine, na_rep="_") == expected
+        with raises_if_pyarrow:
+            assert df.set_index("a").to_csv(engine=engine, na_rep="_") == expected
+        with raises_if_pyarrow:
+            assert (
+                df.set_index(["a", "b"]).to_csv(engine=engine, na_rep="_") == expected
+            )
 
-        csv = pd.Series(["a", pd.NA, "c"]).to_csv(engine=engine, na_rep="ZZZZZ")
-        expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
-        assert expected == csv
+        with raises_if_pyarrow:
+            csv = pd.Series(["a", pd.NA, "c"]).to_csv(engine=engine, na_rep="ZZZZZ")
+            expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
+            assert expected == csv
 
-    @xfail_pyarrow
     def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype, engine):
         # GH 29975
         # Make sure full na_rep shows up when a dtype is provided
+        raises_if_pyarrow = check_raises_if_pyarrow("na_rep", engine)
         expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
-        csv = pd.Series(["a", pd.NA, "c"], dtype=nullable_string_dtype).to_csv(
-            engine=engine, na_rep="ZZZZZ"
-        )
-        assert expected == csv
+        with raises_if_pyarrow:
+            csv = pd.Series(["a", pd.NA, "c"], dtype=nullable_string_dtype).to_csv(
+                engine=engine, na_rep="ZZZZZ"
+            )
+            assert expected == csv
 
     @xfail_pyarrow
     def test_to_csv_date_format(self, engine):
         # GH 10209
+        raises_if_pyarrow = check_raises_if_pyarrow("date_format", engine)
         df_sec = DataFrame({"A": pd.date_range("20130101", periods=5, freq="s")})
         df_day = DataFrame({"A": pd.date_range("20130101", periods=5, freq="d")})
 
@@ -280,10 +336,11 @@ def test_to_csv_date_format(self, engine):
             "4,2013-01-05 00:00:00",
         ]
         expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert (
-            df_day.to_csv(date_format="%Y-%m-%d %H:%M:%S", engine=engine)
-            == expected_ymdhms_day
-        )
+        with raises_if_pyarrow:
+            assert (
+                df_day.to_csv(date_format="%Y-%m-%d %H:%M:%S", engine=engine)
+                == expected_ymdhms_day
+            )
 
         expected_rows = [
             ",A",
@@ -294,7 +351,10 @@ def test_to_csv_date_format(self, engine):
             "4,2013-01-01",
         ]
         expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df_sec.to_csv(date_format="%Y-%m-%d", engine=engine) == expected_ymd_sec
+        with raises_if_pyarrow:
+            assert (
+                df_sec.to_csv(date_format="%Y-%m-%d", engine=engine) == expected_ymd_sec
+            )
 
         expected_rows = [
             ",A",
@@ -305,10 +365,13 @@ def test_to_csv_date_format(self, engine):
             "4,2013-01-05",
         ]
         expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows)
-        assert df_day.to_csv(engine=engine) == expected_default_day
-        assert (
-            df_day.to_csv(date_format="%Y-%m-%d", engine=engine) == expected_default_day
-        )
+        with raises_if_pyarrow:
+            assert df_day.to_csv(engine=engine) == expected_default_day
+        with raises_if_pyarrow:
+            assert (
+                df_day.to_csv(date_format="%Y-%m-%d", engine=engine)
+                == expected_default_day
+            )
 
         # see gh-7791
         #
@@ -321,10 +384,11 @@ def test_to_csv_date_format(self, engine):
         expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
 
         df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"])
-        assert (
-            df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d", engine=engine)
-            == expected_ymd_sec
-        )
+        with raises_if_pyarrow:
+            assert (
+                df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d", engine=engine)
+                == expected_ymd_sec
+            )
 
     @xfail_pyarrow
     def test_to_csv_different_datetime_formats(self, engine):
@@ -346,6 +410,7 @@ def test_to_csv_different_datetime_formats(self, engine):
     @xfail_pyarrow
     def test_to_csv_date_format_in_categorical(self, engine):
         # GH#40754
+        raises_if_pyarrow = check_raises_if_pyarrow("date_format", engine)
         ser = pd.Series(pd.to_datetime(["2021-03-27", pd.NaT], format="%Y-%m-%d"))
         ser = ser.astype("category")
         expected = tm.convert_rows_list_to_csv_str(["0", "2021-03-27", '""'])
@@ -357,20 +422,23 @@ def test_to_csv_date_format_in_categorical(self, engine):
             ).append(pd.DatetimeIndex([pd.NaT]))
         )
         ser = ser.astype("category")
-        assert (
-            ser.to_csv(index=False, engine=engine, date_format="%Y-%m-%d") == expected
-        )
+        with raises_if_pyarrow:
+            assert (
+                ser.to_csv(index=False, engine=engine, date_format="%Y-%m-%d")
+                == expected
+            )
 
-    @xfail_pyarrow
     def test_to_csv_float_ea_float_format(self, engine):
         # GH#45991
+        raises_if_pyarrow = check_raises_if_pyarrow("float_format", engine)
         df = DataFrame({"a": [1.1, 2.02, pd.NA, 6.000006], "b": "c"})
         df["a"] = df["a"].astype("Float64")
-        result = df.to_csv(index=False, engine=engine, float_format="%.5f")
-        expected = tm.convert_rows_list_to_csv_str(
-            ["a,b", "1.10000,c", "2.02000,c", ",c", "6.00001,c"]
-        )
-        assert result == expected
+        with raises_if_pyarrow:
+            result = df.to_csv(index=False, engine=engine, float_format="%.5f")
+            expected = tm.convert_rows_list_to_csv_str(
+                ["a,b", "1.10000,c", "2.02000,c", ",c", "6.00001,c"]
+            )
+            assert result == expected
 
     @xfail_pyarrow
     def test_to_csv_float_ea_no_float_format(self, engine):
@@ -440,14 +508,16 @@ def test_to_csv_single_level_multi_index(
         self, ind, expected, frame_or_series, engine
     ):
         # see gh-19589
+        raises_if_pyarrow = check_raises_if_pyarrow("lineterminator", engine)
         obj = frame_or_series(pd.Series([1], ind, name="data"))
 
-        result = obj.to_csv(lineterminator="\n", header=True, engine=engine)
-        assert result == expected
+        with raises_if_pyarrow:
+            result = obj.to_csv(lineterminator="\n", header=True, engine=engine)
+            assert result == expected
 
-    @xfail_pyarrow
     def test_to_csv_string_array_ascii(self, engine):
         # GH 10813
+        raises_if_pyarrow = check_raises_if_pyarrow("encoding", engine)
         str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
         df = DataFrame(str_array)
         expected_ascii = """\
@@ -456,9 +526,10 @@ def test_to_csv_string_array_ascii(self, engine):
 1,"['baz', 'qux']"
 """
         with tm.ensure_clean("str_test.csv") as path:
-            df.to_csv(path, encoding="ascii", engine=engine)
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected_ascii
+            with raises_if_pyarrow:
+                df.to_csv(path, encoding="ascii", engine=engine)
+                with open(path, encoding="utf-8") as f:
+                    assert f.read() == expected_ascii
 
     @xfail_pyarrow
     def test_to_csv_string_array_utf8(self, engine):
@@ -478,6 +549,7 @@ def test_to_csv_string_array_utf8(self, engine):
     @xfail_pyarrow
     def test_to_csv_string_with_lf(self, engine):
         # GH 20353
+        raises_if_pyarrow = check_raises_if_pyarrow("lineterminator", engine)
         data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]}
         df = DataFrame(data)
         with tm.ensure_clean("lf_test.csv") as path:
@@ -499,20 +571,23 @@ def test_to_csv_string_with_lf(self, engine):
         with tm.ensure_clean("lf_test.csv") as path:
             # case 2: LF as line terminator
             expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n'
-            df.to_csv(path, lineterminator="\n", index=False, engine=engine)
-            with open(path, "rb") as f:
-                assert f.read() == expected_lf
+            with raises_if_pyarrow:
+                df.to_csv(path, lineterminator="\n", index=False, engine=engine)
+                with open(path, "rb") as f:
+                    assert f.read() == expected_lf
         with tm.ensure_clean("lf_test.csv") as path:
             # case 3: CRLF as line terminator
             # 'lineterminator' should not change inner element
             expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n'
-            df.to_csv(path, lineterminator="\r\n", index=False, engine=engine)
-            with open(path, "rb") as f:
-                assert f.read() == expected_crlf
+            with raises_if_pyarrow:
+                df.to_csv(path, lineterminator="\r\n", index=False, engine=engine)
+                with open(path, "rb") as f:
+                    assert f.read() == expected_crlf
 
     @xfail_pyarrow
     def test_to_csv_string_with_crlf(self, engine):
         # GH 20353
+        raises_if_pyarrow = check_raises_if_pyarrow("lineterminator", engine)
         data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]}
         df = DataFrame(data)
         with tm.ensure_clean("crlf_test.csv") as path:
@@ -534,9 +609,10 @@ def test_to_csv_string_with_crlf(self, engine):
         with tm.ensure_clean("crlf_test.csv") as path:
             # case 2: LF as line terminator
             expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n'
-            df.to_csv(path, lineterminator="\n", index=False, engine=engine)
-            with open(path, "rb") as f:
-                assert f.read() == expected_lf
+            with raises_if_pyarrow:
+                df.to_csv(path, lineterminator="\n", index=False, engine=engine)
+                with open(path, "rb") as f:
+                    assert f.read() == expected_lf
         with tm.ensure_clean("crlf_test.csv") as path:
             # case 3: CRLF as line terminator
             # 'lineterminator' should not change inner element
@@ -546,9 +622,10 @@ def test_to_csv_string_with_crlf(self, engine):
                 b'2,"d\r\nef"\r\n'
                 b'3,"g\r\nh\r\n\r\ni"\r\n'
             )
-            df.to_csv(path, lineterminator="\r\n", index=False, engine=engine)
-            with open(path, "rb") as f:
-                assert f.read() == expected_crlf
+            with raises_if_pyarrow:
+                df.to_csv(path, lineterminator="\r\n", index=False, engine=engine)
+                with open(path, "rb") as f:
+                    assert f.read() == expected_crlf
 
     @xfail_pyarrow
     def test_to_csv_stdout_file(self, capsys, engine):
@@ -587,7 +664,7 @@ def test_to_csv_write_to_open_file(self, engine):
                 if engine == "pyarrow":
                     raise_if_pyarrow = pytest.raises(
                         ValueError,
-                        match="The pyarrow engine can only open file in abinary mode.",
+                        match="The pyarrow engine can only open files in abinary mode.",
                     )
                 else:
                     raise_if_pyarrow = contextlib.nullcontext()
@@ -703,20 +780,21 @@ def test_to_csv_zip_infer_name(self, tmp_path, filename, expected_arcname, engin
             archived_file = zp.filelist[0].filename
             assert archived_file == expected_arcname
 
-    @xfail_pyarrow
     @pytest.mark.parametrize("df_new_type", ["Int64"])
     def test_to_csv_na_rep_long_string(self, df_new_type, engine):
         # see gh-25099
+        raises_if_pyarrow = check_raises_if_pyarrow("na_rep", engine)
         df = DataFrame({"c": [float("nan")] * 3})
         df = df.astype(df_new_type)
         expected_rows = ["c", "mynull", "mynull", "mynull"]
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
 
-        result = df.to_csv(
-            index=False, na_rep="mynull", encoding="ascii", engine=engine
-        )
+        with raises_if_pyarrow:
+            result = df.to_csv(
+                index=False, na_rep="mynull", encoding="ascii", engine=engine
+            )
 
-        assert expected == result
+            assert expected == result
 
     @xfail_pyarrow
     def test_to_csv_timedelta_precision(self, engine):
@@ -733,29 +811,35 @@ def test_to_csv_timedelta_precision(self, engine):
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
         assert result == expected
 
-    @xfail_pyarrow
     def test_na_rep_truncated(self, engine):
         # https://github.com/pandas-dev/pandas/issues/31447
-        result = pd.Series(range(8, 12)).to_csv(na_rep="-", engine=engine)
-        expected = tm.convert_rows_list_to_csv_str([",0", "0,8", "1,9", "2,10", "3,11"])
-        assert result == expected
+        raises_if_pyarrow = check_raises_if_pyarrow("na_rep", engine)
+        with raises_if_pyarrow:
+            result = pd.Series(range(8, 12)).to_csv(na_rep="-", engine=engine)
+            expected = tm.convert_rows_list_to_csv_str(
+                [",0", "0,8", "1,9", "2,10", "3,11"]
+            )
+            assert result == expected
 
-        result = pd.Series([True, False]).to_csv(na_rep="nan", engine=engine)
-        expected = tm.convert_rows_list_to_csv_str([",0", "0,True", "1,False"])
-        assert result == expected
+        with raises_if_pyarrow:
+            result = pd.Series([True, False]).to_csv(na_rep="nan", engine=engine)
+            expected = tm.convert_rows_list_to_csv_str([",0", "0,True", "1,False"])
+            assert result == expected
 
-        result = pd.Series([1.1, 2.2]).to_csv(na_rep=".", engine=engine)
-        expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"])
-        assert result == expected
+        with raises_if_pyarrow:
+            result = pd.Series([1.1, 2.2]).to_csv(na_rep=".", engine=engine)
+            expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"])
+            assert result == expected
 
-    @xfail_pyarrow
     @pytest.mark.parametrize("errors", ["surrogatepass", "ignore", "replace"])
     def test_to_csv_errors(self, errors, engine):
         # GH 22610
+        raises_if_pyarrow = check_raises_if_pyarrow("errors", engine)
         data = ["\ud800foo"]
         ser = pd.Series(data, index=pd.Index(data))
-        with tm.ensure_clean("test.csv") as path:
-            ser.to_csv(path, errors=errors, engine=engine)
+        with raises_if_pyarrow:
+            with tm.ensure_clean("test.csv") as path:
+                ser.to_csv(path, errors=errors, engine=engine)
         # No use in reading back the data as it is not the same anymore
         # due to the error handling
 
@@ -770,36 +854,56 @@ def test_to_csv_binary_handle(self, mode, engine):
         df = tm.makeDataFrame()
         with tm.ensure_clean() as path:
             with open(path, mode="w+b") as handle:
-                df.to_csv(handle, mode=mode, engine=engine)
-            tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
+                if engine == "pyarrow" and mode == "w":
+                    raises_if_pyarrow = pytest.raises(
+                        ValueError,
+                        match="The pyarrow engine can only open files in binary mode.",
+                    )
+                else:
+                    raises_if_pyarrow = contextlib.nullcontext()
+                with raises_if_pyarrow:
+                    df.to_csv(handle, mode=mode, engine=engine)
+            if not engine == "pyarrow" and mode == "w":
+                tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
 
-    @xfail_pyarrow
     @pytest.mark.parametrize("mode", ["wb", "w"])
-    def test_to_csv_encoding_binary_handle(self, mode, engine):
+    def test_to_csv_encoding_binary_handle(self, mode, engine, request):
         """
         Binary file objects should honor a specified encoding.
 
         GH 23854 and GH 13068 with binary handles
         """
+
+        if mode == "w" and engine == "pyarrow":
+            mark = pytest.mark.xfail(
+                reason="pyarrow doesn't support non-binary handles."
+            )
+            request.applymarker(mark)
+
+        raises_if_pyarrow = check_raises_if_pyarrow("encoding", engine)
         # example from GH 23854
         content = "a, b, 🐟".encode("utf-8-sig")
         buffer = io.BytesIO(content)
         df = pd.read_csv(buffer, encoding="utf-8-sig")
 
         buffer = io.BytesIO()
-        df.to_csv(buffer, mode=mode, encoding="utf-8-sig", index=False, engine=engine)
-        buffer.seek(0)  # tests whether file handle wasn't closed
-        assert buffer.getvalue().startswith(content)
+        with raises_if_pyarrow:
+            df.to_csv(
+                buffer, mode=mode, encoding="utf-8-sig", index=False, engine=engine
+            )
+            buffer.seek(0)  # tests whether file handle wasn't closed
+            assert buffer.getvalue().startswith(content)
 
         # example from GH 13068
         with tm.ensure_clean() as path:
             with open(path, "w+b") as handle:
-                DataFrame().to_csv(
-                    handle, mode=mode, encoding="utf-8-sig", engine=engine
-                )
+                with raises_if_pyarrow:
+                    DataFrame().to_csv(
+                        handle, mode=mode, encoding="utf-8-sig", engine=engine
+                    )
 
-                handle.seek(0)
-                assert handle.read().startswith(b'\xef\xbb\xbf""')
+                    handle.seek(0)
+                    assert handle.read().startswith(b'\xef\xbb\xbf""')
 
 
 def test_to_csv_iterative_compression_name(compression, engine):

From ba451e1a48d93b10a41479649c90b6a688d834e2 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Thu, 7 Dec 2023 10:06:16 -0500
Subject: [PATCH 14/22] fix tests

---
 pandas/tests/io/formats/test_to_csv.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index bced5ec137e14..7f99b50a40f8c 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -838,9 +838,9 @@ def test_to_csv_errors(self, errors, engine):
         raises_if_pyarrow = check_raises_if_pyarrow("errors", engine)
         data = ["\ud800foo"]
         with raises_if_pyarrow:
-          ser = pd.Series(data, index=Index(data))
-          with tm.ensure_clean("test.csv") as path:
-              ser.to_csv(path, errors=errors)
+            ser = pd.Series(data, index=Index(data))
+            with tm.ensure_clean("test.csv") as path:
+                ser.to_csv(path, errors=errors, engine=engine)
         # No use in reading back the data as it is not the same anymore
         # due to the error handling
 

From 8ff04f7a542dfa5d085f7e47136952bef69c4ba8 Mon Sep 17 00:00:00 2001
From: Scott Talbert <swt@techie.net>
Date: Fri, 5 Sep 2025 15:55:58 -0400
Subject: [PATCH 15/22] Move whatsnew entry to v3.0.0

---
 doc/source/whatsnew/v2.2.0.rst | 1 -
 doc/source/whatsnew/v3.0.0.rst | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 15bba47bdea64..e32417e367427 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -343,7 +343,6 @@ Other enhancements
 - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
 - :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd party :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`)
 - Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
-- Allow using pyarrow to serialize :class:`DataFrame` and :class:`Series` to CSV with ``engine="pyarrow"`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` (:issue:`53618`)
 - Implement :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for :class:`ArrowDtype` and masked dtypes (:issue:`56267`)
 - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
 - Implemented :meth:`Series.dt` methods and attributes for :class:`ArrowDtype` with ``pyarrow.duration`` type (:issue:`52284`)
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 7ec50137c3039..a5f9f25e49032 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -225,6 +225,7 @@ Other enhancements
 - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
 - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
 - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
+- Allow using pyarrow to serialize :class:`DataFrame` and :class:`Series` to CSV with ``engine="pyarrow"`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` (:issue:`53618`)
 -
 
 .. ---------------------------------------------------------------------------

From 84b4e5930c935ee5974c1be88a4d9f25f004851a Mon Sep 17 00:00:00 2001
From: Scott Talbert <swt@techie.net>
Date: Fri, 5 Sep 2025 16:04:43 -0400
Subject: [PATCH 16/22] Update versionadded to 3.0.0

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 60f20772df2fc..f23fc346f0b51 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3876,7 +3876,7 @@ def to_csv(
             However, the python engine may be more feature complete than the
             pyarrow engine.
 
-            .. versionadded:: 2.1.0
+            .. versionadded:: 3.0.0
 
         Returns
         -------

From 5d6305ebdc6868a0ad1c668831fe7c8a992f8bd9 Mon Sep 17 00:00:00 2001
From: Scott Talbert <swt@techie.net>
Date: Fri, 5 Sep 2025 16:36:15 -0400
Subject: [PATCH 17/22] No need to support pyarrow < 11 anymore

---
 pandas/io/formats/csvs.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 531a2104bd6b9..42b678c8ca83d 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -25,7 +25,6 @@
 
 from pandas._libs import writers as libwriters
 from pandas._typing import SequenceNotStr
-from pandas.compat import pa_version_under11p0
 from pandas.compat._optional import import_optional_dependency
 from pandas.util._decorators import cache_readonly
 
@@ -333,9 +332,7 @@ def _save_pyarrow(self, handle: IO[AnyStr]) -> None:
             "batch_size": self.chunksize,
         }
         kwargs["delimiter"] = self.sep
-
-        if not pa_version_under11p0:
-            kwargs["quoting_style"] = pa_quoting
+        kwargs["quoting_style"] = pa_quoting
 
         write_options = pa_csv.WriteOptions(**kwargs)
         pa_csv.write_csv(table, handle, write_options)

From 7da6613fb87c0dae3a9966a339e5781f94f07155 Mon Sep 17 00:00:00 2001
From: Scott Talbert <swt@techie.net>
Date: Fri, 5 Sep 2025 17:02:04 -0400
Subject: [PATCH 18/22] Fixup test

---
 pandas/tests/io/formats/test_to_csv.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 2fa27d3769973..e4c21150e926c 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -835,9 +835,9 @@ def test_to_csv_errors(self, errors, engine):
         raises_if_pyarrow = check_raises_if_pyarrow("errors", engine)
         data = ["\ud800foo"]
         with raises_if_pyarrow:
-          ser = pd.Series(data, index=Index(data, dtype=object), dtype=object)
-          with tm.ensure_clean("test.csv") as path:
-              ser.to_csv(path, errors=errors)
+            ser = pd.Series(data, index=Index(data, dtype=object), dtype=object)
+            with tm.ensure_clean("test.csv") as path:
+                ser.to_csv(path, errors=errors, engine=engine)
         # No use in reading back the data as it is not the same anymore
         # due to the error handling
 

From 737000843ae375ca37309b64c704a49057471793 Mon Sep 17 00:00:00 2001
From: Scott Talbert <swt@techie.net>
Date: Fri, 5 Sep 2025 17:26:28 -0400
Subject: [PATCH 19/22] Add escapechar to unsupported options

---
 pandas/io/formats/csvs.py              |  1 +
 pandas/tests/io/formats/test_to_csv.py | 25 ++++++++++++++-----------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 42b678c8ca83d..842c143af8a4b 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -291,6 +291,7 @@ def _save_pyarrow(self, handle: IO[AnyStr]) -> None:
             (self.lineterminator, os.linesep, "lineterminator"),
             (self.encoding, None, "encoding"),
             (self.errors, "strict", "errors"),
+            (self.escapechar, None, "escapechar"),
         ]
 
         for opt_val, default, option in unsupported_options:
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index e4c21150e926c..6c88a01c77431 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -146,7 +146,8 @@ def test_to_csv_doublequote(self):
             with pytest.raises(Error, match="escapechar"):
                 df.to_csv(path, doublequote=False, engine=engine)  # no escapechar set
 
-    def test_to_csv_escapechar(self, engine=engine):
+    def test_to_csv_escapechar(self, engine):
+        raises_if_pyarrow = check_raises_if_pyarrow("escapechar", engine)
         df = DataFrame({"col": ['a"a', '"bb"']})
         expected = """\
 "","col"
@@ -154,12 +155,13 @@ def test_to_csv_escapechar(self, engine=engine):
 "1","\\"bb\\""
 """
 
-        with tm.ensure_clean("test.csv") as path:  # QUOTE_ALL
-            df.to_csv(
-                path, quoting=1, doublequote=False, escapechar="\\", engine=engine
-            )
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected
+        with raises_if_pyarrow:
+            with tm.ensure_clean("test.csv") as path:  # QUOTE_ALL
+                df.to_csv(
+                    path, quoting=1, doublequote=False, escapechar="\\", engine=engine
+                )
+                with open(path, encoding="utf-8") as f:
+                    assert f.read() == expected
 
         df = DataFrame({"col": ["a,a", ",bb,"]})
         expected = """\
@@ -168,10 +170,11 @@ def test_to_csv_escapechar(self, engine=engine):
 1,\\,bb\\,
 """
 
-        with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, quoting=3, escapechar="\\", engine=engine)  # QUOTE_NONE
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected
+        with raises_if_pyarrow:
+            with tm.ensure_clean("test.csv") as path:
+                df.to_csv(path, quoting=3, escapechar="\\", engine=engine)  # QUOTE_NONE
+                with open(path, encoding="utf-8") as f:
+                    assert f.read() == expected
 
     @xfail_pyarrow
     def test_csv_to_string(self, engine):

From a157861f136680ce008c1b677a67abe98f4e6566 Mon Sep 17 00:00:00 2001
From: Scott Talbert <swt@techie.net>
Date: Fri, 5 Sep 2025 17:33:25 -0400
Subject: [PATCH 20/22] Sort whatsnew

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index a5f9f25e49032..983284e0bcd92 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -216,6 +216,7 @@ Other enhancements
 - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
 - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`)
 - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`)
+- Allow using pyarrow to serialize :class:`DataFrame` and :class:`Series` to CSV with ``engine="pyarrow"`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` (:issue:`53618`)
 - Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
 - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
 - Improve the resulting dtypes in :meth:`DataFrame.where` and :meth:`DataFrame.mask` with :class:`ExtensionDtype` ``other`` (:issue:`62038`)
@@ -225,7 +226,6 @@ Other enhancements
 - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
 - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
 - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
-- Allow using pyarrow to serialize :class:`DataFrame` and :class:`Series` to CSV with ``engine="pyarrow"`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` (:issue:`53618`)
 -
 
 .. ---------------------------------------------------------------------------

From b19c5a3d1af51066a2c5760d68124ea13d76220a Mon Sep 17 00:00:00 2001
From: Scott Talbert <swt@techie.net>
Date: Sat, 6 Sep 2025 09:35:39 -0400
Subject: [PATCH 21/22] Fix type ignore

---
 pandas/io/formats/csvs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 842c143af8a4b..51a55da9bb2cd 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -348,7 +348,7 @@ def _save(self, handle: IO[AnyStr]) -> None:
                 # in function "writer"
                 # error: Argument "quoting" to "writer" has incompatible type "int";
                 # expected "Literal[0, 1, 2, 3]"
-                handle,  # pyright: ignore[reportGeneralTypeIssues]
+                handle,  # type: ignore[arg-type]
                 lineterminator=self.lineterminator,
                 delimiter=self.sep,
                 quoting=self.quoting,  # type: ignore[arg-type]

From 8a13c4b94b587054a30ff1405f5a112caa4b31b4 Mon Sep 17 00:00:00 2001
From: Scott Talbert <swt@techie.net>
Date: Sat, 6 Sep 2025 15:43:07 -0400
Subject: [PATCH 22/22] Hopefully fix test_to_csv_single_level_multi_index on
 Windows

This test was passing even though xfailed, so remove the
raises_if_pyarrow so it will fail due to that (hopefully).
---
 pandas/tests/io/formats/test_to_csv.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 6c88a01c77431..be31852ad14b9 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -512,12 +512,9 @@ def test_to_csv_single_level_multi_index(
         self, ind, expected, frame_or_series, engine
     ):
         # see gh-19589
-        raises_if_pyarrow = check_raises_if_pyarrow("lineterminator", engine)
         obj = frame_or_series(pd.Series([1], ind, name="data"))
-
-        with raises_if_pyarrow:
-            result = obj.to_csv(lineterminator="\n", header=True, engine=engine)
-            assert result == expected
+        result = obj.to_csv(lineterminator="\n", header=True, engine=engine)
+        assert result == expected
 
     def test_to_csv_string_array_ascii(self, engine):
         # GH 10813