pandas-dev
diff --git a/‎.pre-commit-config.yaml
Lines changed: 11 additions & 8 deletions b/‎.pre-commit-config.yaml
Lines changed: 11 additions & 8 deletions
diff --git a/‎asv_bench/benchmarks/strings.py
Lines changed: 4 additions & 3 deletions b/‎asv_bench/benchmarks/strings.py
Lines changed: 4 additions & 3 deletions
diff --git a/‎doc/source/whatsnew/index.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/index.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v2.3.3.rst
Lines changed: 32 additions & 0 deletions b/‎doc/source/whatsnew/v2.3.3.rst
Lines changed: 32 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 6 additions & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 6 additions & 0 deletions
diff --git a/‎environment.yml
Lines changed: 1 addition & 1 deletion b/‎environment.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_config/config.py
Lines changed: 11 additions & 3 deletions b/‎pandas/_config/config.py
Lines changed: 11 additions & 3 deletions
diff --git a/‎pandas/_libs/src/vendored/ujson/python/objToJSON.c
Lines changed: 38 additions & 10 deletions b/‎pandas/_libs/src/vendored/ujson/python/objToJSON.c
Lines changed: 38 additions & 10 deletions
@@ -19,7 +19,7 @@ ci:
     skip: [pyright, mypy]
 repos:
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.12.7
+    rev: v0.12.11
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -51,7 +51,7 @@ repos:
     -   id: cython-lint
     -   id: double-quote-cython-strings
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v6.0.0
     hooks:
     -   id: check-case-conflict
     -   id: check-toml
@@ -64,8 +64,6 @@ repos:
         args: [--fix=auto]
         exclude: ^pandas/tests/io/parser/data/utf16_ex.txt$
     -   id: fix-byte-order-marker
-    -   id: fix-encoding-pragma
-        args: [--remove]
     -   id: trailing-whitespace
         args: [--markdown-linebreak-ext=md]
 -   repo: https://github.com/PyCQA/isort
@@ -94,19 +92,19 @@ repos:
     - id: sphinx-lint
       args: ["--enable", "all", "--disable", "line-too-long"]
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v20.1.8
+    rev: v21.1.0
     hooks:
     - id: clang-format
       files: ^pandas/_libs/src|^pandas/_libs/include
       args: [-i]
       types_or: [c, c++]
 -   repo: https://github.com/trim21/pre-commit-mirror-meson
-    rev: v1.8.3
+    rev: v1.9.0
     hooks:
     - id: meson-fmt
       args: ['--inplace']
 -   repo: https://github.com/shellcheck-py/shellcheck-py
-    rev: v0.10.0.1
+    rev: v0.11.0.1
     hooks:
     -   id: shellcheck
         args: ["--severity=warning"]
@@ -121,7 +119,7 @@ repos:
         types: [python]
         stages: [manual]
         additional_dependencies: &pyright_dependencies
-        - [email protected].383
+        - [email protected].404
     -   id: pyright
         # note: assumes python env is setup and activated
         name: pyright reportGeneralTypeIssues
@@ -266,6 +264,11 @@ repos:
         language: python
         entry: python scripts/validate_unwanted_patterns.py --validation-type="nodefault_used_not_only_for_typing"
         types: [python]
+    -   id: unwanted-patterns-doesnt-use-pandas-warnings
+        name: Check that warning classes for deprecations use pandas' warning classes
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="doesnt_use_pandas_warnings"
+        types: [ python ]
     -   id: no-return-exception
         name: Use raise instead of return for exceptions
         language: pygrep
 
@@ -8,6 +8,7 @@
     DataFrame,
     Index,
     Series,
+    StringDtype,
 )
 from pandas.arrays import StringArray
 
@@ -290,10 +291,10 @@ def setup(self):
         self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])
 
     def time_string_array_construction(self):
-        StringArray(self.series_arr)
+        StringArray(self.series_arr, dtype=StringDtype())
 
     def time_string_array_with_nan_construction(self):
-        StringArray(self.series_arr_nan)
+        StringArray(self.series_arr_nan, dtype=StringDtype())
 
     def peakmem_stringarray_construction(self):
-        StringArray(self.series_arr)
+        StringArray(self.series_arr, dtype=StringDtype())
@@ -24,6 +24,7 @@ Version 2.3
 .. toctree::
    :maxdepth: 2
 
+   v2.3.3
    v2.3.2
    v2.3.1
    v2.3.0
 
@@ -0,0 +1,32 @@
+.. _whatsnew_233:
+
+What's new in 2.3.3 (September XX, 2025)
+----------------------------------------
+
+These are the changes in pandas 2.3.3. See :ref:`release` for a full changelog
+including other versions of pandas.
+
+{{ header }}
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_233.string_fixes:
+
+Improvements and fixes for the StringDtype
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Most changes in this release are related to :class:`StringDtype` which will
+become the default string dtype in pandas 3.0. See
+:ref:`whatsnew_230.upcoming_changes` for more details.
+
+.. _whatsnew_233.string_fixes.bugs:
+
+Bug fixes
+^^^^^^^^^
+- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
+  with a compiled regex and custom flags (:issue:`62240`)
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_233.contributors:
+
+Contributors
+~~~~~~~~~~~~
@@ -895,6 +895,7 @@ Timedelta
 - Accuracy improvement in :meth:`Timedelta.to_pytimedelta` to round microseconds consistently for large nanosecond based Timedelta (:issue:`57841`)
 - Bug in :class:`Timedelta` constructor failing to raise when passed an invalid keyword (:issue:`53801`)
 - Bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`)
+- Bug in multiplication operations with ``timedelta64`` dtype failing to raise ``TypeError`` when multiplying by ``bool`` objects or dtypes (:issue:`58054`)
 
 Timezones
 ^^^^^^^^^
@@ -920,6 +921,7 @@ Conversion
 - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`)
 - Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`)
 - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`)
+- Bug in :meth:`to_datetime` and :meth:`to_timedelta` with input ``None`` returning ``None`` instead of ``NaT``, inconsistent with other conversion methods (:issue:`23055`)
 
 Strings
 ^^^^^^^
@@ -945,11 +947,14 @@ Indexing
 - Bug in :meth:`Series.__setitem__` when assigning boolean series with boolean indexer will raise ``LossySetitemError`` (:issue:`57338`)
 - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
 - Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
+- Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`)
+- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
 
 Missing
 ^^^^^^^
 - Bug in :meth:`DataFrame.fillna` and :meth:`Series.fillna` that would ignore the ``limit`` argument on :class:`.ExtensionArray` dtypes (:issue:`58001`)
 - Bug in :meth:`NA.__and__`, :meth:`NA.__or__` and :meth:`NA.__xor__` when operating with ``np.bool_`` objects (:issue:`58427`)
+- Bug in ``divmod`` between :class:`NA` and ``Int64`` dtype objects (:issue:`62196`)
 -
 
 MultiIndex
@@ -1118,6 +1123,7 @@ Other
 - Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
 - Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`)
 - Bug in ``Series.replace`` when the Series was created from an :class:`Index` and Copy-On-Write is enabled (:issue:`61622`)
+- Bug in ``divmod`` and ``rdivmod`` with :class:`DataFrame`, :class:`Series`, and :class:`Index` with ``bool`` dtypes failing to raise, which was inconsistent with ``__floordiv__`` behavior (:issue:`46043`)
 - Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
 - Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
 - Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)
 
@@ -77,7 +77,7 @@ dependencies:
 
   # code checks
   - flake8=7.1.0  # run in subprocess over docstring examples
-  - mypy=1.13.0  # pre-commit uses locally installed mypy
+  - mypy=1.17.1  # pre-commit uses locally installed mypy
   - tokenize-rt  # scripts/check_for_inconsistent_pandas_namespace.py
   - pre-commit>=4.2.0
 
 
@@ -73,6 +73,7 @@
 
 class DeprecatedOption(NamedTuple):
     key: str
+    category: type[Warning]
     msg: str | None
     rkey: str | None
     removal_ver: str | None
@@ -589,6 +590,7 @@ def register_option(
 
 def deprecate_option(
     key: str,
+    category: type[Warning],
     msg: str | None = None,
     rkey: str | None = None,
     removal_ver: str | None = None,
@@ -608,6 +610,8 @@ def deprecate_option(
     key : str
         Name of the option to be deprecated.
         must be a fully-qualified option name (e.g "x.y.z.rkey").
+    category : Warning
+        Warning class for the deprecation.
     msg : str, optional
         Warning message to output when the key is referenced.
         if no message is given a default message will be emitted.
@@ -631,7 +635,7 @@ def deprecate_option(
     if key in _deprecated_options:
         raise OptionError(f"Option '{key}' has already been defined as deprecated.")
 
-    _deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver)
+    _deprecated_options[key] = DeprecatedOption(key, category, msg, rkey, removal_ver)
 
 
 #
@@ -716,7 +720,7 @@ def _warn_if_deprecated(key: str) -> bool:
         if d.msg:
             warnings.warn(
                 d.msg,
-                FutureWarning,
+                d.category,
                 stacklevel=find_stack_level(),
             )
         else:
@@ -728,7 +732,11 @@ def _warn_if_deprecated(key: str) -> bool:
             else:
                 msg += ", please refrain from using it."
 
-            warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
+            warnings.warn(
+                msg,
+                d.category,
+                stacklevel=find_stack_level(),
+            )
         return True
     return False
 
 
@@ -51,6 +51,8 @@ Numeric decoder derived from TCL library
 #include <numpy/ndarraytypes.h>
 #include <numpy/npy_math.h>
 
+static const int CSTR_SIZE = 20;
+
 npy_int64 get_nat(void) { return NPY_MIN_INT64; }
 
 typedef const char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
@@ -106,7 +108,7 @@ typedef struct __TypeContext {
   double doubleValue;
   JSINT64 longValue;
 
-  const char *cStr;
+  char *cStr;
   NpyArrContext *npyarr;
   PdBlockContext *pdblock;
   int transpose;
@@ -347,7 +349,8 @@ static const char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
   }
 
   NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
-  return PyDateTimeToIso(obj, base, len);
+  GET_TC(tc)->cStr = PyDateTimeToIso(obj, base, len);
+  return GET_TC(tc)->cStr;
 }
 
 static const char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc,
@@ -1007,16 +1010,24 @@ static const char *List_iterGetName(JSOBJ Py_UNUSED(obj),
 //=============================================================================
 static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
   GET_TC(tc)->index = 0;
+  GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE);
+  if (!GET_TC(tc)->cStr) {
+    PyErr_NoMemory();
+  }
 }
 
 static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
   const Py_ssize_t index = GET_TC(tc)->index;
   Py_XDECREF(GET_TC(tc)->itemValue);
+  if (!GET_TC(tc)->cStr) {
+    return 0;
+  }
+
   if (index == 0) {
-    GET_TC(tc)->cStr = "name";
+    strcpy(GET_TC(tc)->cStr, "name");
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
   } else if (index == 1) {
-    GET_TC(tc)->cStr = "data";
+    strcpy(GET_TC(tc)->cStr, "data");
     GET_TC(tc)->itemValue = get_values(obj);
     if (!GET_TC(tc)->itemValue) {
       return 0;
@@ -1049,19 +1060,27 @@ static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
   PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
   GET_TC(tc)->index = 0;
   enc->outputFormat = VALUES; // for contained series
+  GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE);
+  if (!GET_TC(tc)->cStr) {
+    PyErr_NoMemory();
+  }
 }
 
 static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
   const Py_ssize_t index = GET_TC(tc)->index;
   Py_XDECREF(GET_TC(tc)->itemValue);
+  if (!GET_TC(tc)->cStr) {
+    return 0;
+  }
+
   if (index == 0) {
-    GET_TC(tc)->cStr = "name";
+    strcpy(GET_TC(tc)->cStr, "name");
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
   } else if (index == 1) {
-    GET_TC(tc)->cStr = "index";
+    strcpy(GET_TC(tc)->cStr, "index");
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
   } else if (index == 2) {
-    GET_TC(tc)->cStr = "data";
+    strcpy(GET_TC(tc)->cStr, "data");
     GET_TC(tc)->itemValue = get_values(obj);
     if (!GET_TC(tc)->itemValue) {
       return 0;
@@ -1096,19 +1115,27 @@ static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
   PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
   GET_TC(tc)->index = 0;
   enc->outputFormat = VALUES; // for contained series & index
+  GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE);
+  if (!GET_TC(tc)->cStr) {
+    PyErr_NoMemory();
+  }
 }
 
 static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
   const Py_ssize_t index = GET_TC(tc)->index;
   Py_XDECREF(GET_TC(tc)->itemValue);
+  if (!GET_TC(tc)->cStr) {
+    return 0;
+  }
+
   if (index == 0) {
-    GET_TC(tc)->cStr = "columns";
+    strcpy(GET_TC(tc)->cStr, "columns");
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
   } else if (index == 1) {
-    GET_TC(tc)->cStr = "index";
+    strcpy(GET_TC(tc)->cStr, "index");
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
   } else if (index == 2) {
-    GET_TC(tc)->cStr = "data";
+    strcpy(GET_TC(tc)->cStr, "data");
     Py_INCREF(obj);
     GET_TC(tc)->itemValue = obj;
   } else {
@@ -1880,6 +1907,7 @@ static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
     GET_TC(tc)->rowLabels = NULL;
     NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
     GET_TC(tc)->columnLabels = NULL;
+    PyObject_Free(GET_TC(tc)->cStr);
     GET_TC(tc)->cStr = NULL;
     PyObject_Free(tc->prv);
     tc->prv = NULL;