Skip to content

Commit c8286fb

Browse files
authored
Merge branch 'main' into bugfix-arrowdtype-itemsize
2 parents 873a1ab + 7f670c1 commit c8286fb

File tree

92 files changed

+1078
-471
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+1078
-471
lines changed

.pre-commit-config.yaml

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.12.7
22+
rev: v0.12.11
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -51,7 +51,7 @@ repos:
5151
- id: cython-lint
5252
- id: double-quote-cython-strings
5353
- repo: https://github.com/pre-commit/pre-commit-hooks
54-
rev: v5.0.0
54+
rev: v6.0.0
5555
hooks:
5656
- id: check-case-conflict
5757
- id: check-toml
@@ -64,8 +64,6 @@ repos:
6464
args: [--fix=auto]
6565
exclude: ^pandas/tests/io/parser/data/utf16_ex.txt$
6666
- id: fix-byte-order-marker
67-
- id: fix-encoding-pragma
68-
args: [--remove]
6967
- id: trailing-whitespace
7068
args: [--markdown-linebreak-ext=md]
7169
- repo: https://github.com/PyCQA/isort
@@ -94,19 +92,19 @@ repos:
9492
- id: sphinx-lint
9593
args: ["--enable", "all", "--disable", "line-too-long"]
9694
- repo: https://github.com/pre-commit/mirrors-clang-format
97-
rev: v20.1.8
95+
rev: v21.1.0
9896
hooks:
9997
- id: clang-format
10098
files: ^pandas/_libs/src|^pandas/_libs/include
10199
args: [-i]
102100
types_or: [c, c++]
103101
- repo: https://github.com/trim21/pre-commit-mirror-meson
104-
rev: v1.8.3
102+
rev: v1.9.0
105103
hooks:
106104
- id: meson-fmt
107105
args: ['--inplace']
108106
- repo: https://github.com/shellcheck-py/shellcheck-py
109-
rev: v0.10.0.1
107+
rev: v0.11.0.1
110108
hooks:
111109
- id: shellcheck
112110
args: ["--severity=warning"]
@@ -121,7 +119,7 @@ repos:
121119
types: [python]
122120
stages: [manual]
123121
additional_dependencies: &pyright_dependencies
124-
122+
125123
- id: pyright
126124
# note: assumes python env is setup and activated
127125
name: pyright reportGeneralTypeIssues
@@ -266,6 +264,11 @@ repos:
266264
language: python
267265
entry: python scripts/validate_unwanted_patterns.py --validation-type="nodefault_used_not_only_for_typing"
268266
types: [python]
267+
- id: unwanted-patterns-doesnt-use-pandas-warnings
268+
name: Check that warning classes for deprecations use pandas' warning classes
269+
language: python
270+
entry: python scripts/validate_unwanted_patterns.py --validation-type="doesnt_use_pandas_warnings"
271+
types: [ python ]
269272
- id: no-return-exception
270273
name: Use raise instead of return for exceptions
271274
language: pygrep

asv_bench/benchmarks/strings.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
DataFrame,
99
Index,
1010
Series,
11+
StringDtype,
1112
)
1213
from pandas.arrays import StringArray
1314

@@ -290,10 +291,10 @@ def setup(self):
290291
self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])
291292

292293
def time_string_array_construction(self):
293-
StringArray(self.series_arr)
294+
StringArray(self.series_arr, dtype=StringDtype())
294295

295296
def time_string_array_with_nan_construction(self):
296-
StringArray(self.series_arr_nan)
297+
StringArray(self.series_arr_nan, dtype=StringDtype())
297298

298299
def peakmem_stringarray_construction(self):
299-
StringArray(self.series_arr)
300+
StringArray(self.series_arr, dtype=StringDtype())

doc/source/whatsnew/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 2.3
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v2.3.3
2728
v2.3.2
2829
v2.3.1
2930
v2.3.0

doc/source/whatsnew/v2.3.3.rst

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
.. _whatsnew_233:
2+
3+
What's new in 2.3.3 (September XX, 2025)
4+
----------------------------------------
5+
6+
These are the changes in pandas 2.3.3. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
.. _whatsnew_233.string_fixes:
13+
14+
Improvements and fixes for the StringDtype
15+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16+
17+
Most changes in this release are related to :class:`StringDtype` which will
18+
become the default string dtype in pandas 3.0. See
19+
:ref:`whatsnew_230.upcoming_changes` for more details.
20+
21+
.. _whatsnew_233.string_fixes.bugs:
22+
23+
Bug fixes
24+
^^^^^^^^^
25+
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
26+
with a compiled regex and custom flags (:issue:`62240`)
27+
28+
.. ---------------------------------------------------------------------------
29+
.. _whatsnew_233.contributors:
30+
31+
Contributors
32+
~~~~~~~~~~~~

doc/source/whatsnew/v3.0.0.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -895,6 +895,7 @@ Timedelta
895895
- Accuracy improvement in :meth:`Timedelta.to_pytimedelta` to round microseconds consistently for large nanosecond based Timedelta (:issue:`57841`)
896896
- Bug in :class:`Timedelta` constructor failing to raise when passed an invalid keyword (:issue:`53801`)
897897
- Bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`)
898+
- Bug in multiplication operations with ``timedelta64`` dtype failing to raise ``TypeError`` when multiplying by ``bool`` objects or dtypes (:issue:`58054`)
898899

899900
Timezones
900901
^^^^^^^^^
@@ -920,6 +921,7 @@ Conversion
920921
- Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`)
921922
- Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`)
922923
- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`)
924+
- Bug in :meth:`to_datetime` and :meth:`to_timedelta` with input ``None`` returning ``None`` instead of ``NaT``, inconsistent with other conversion methods (:issue:`23055`)
923925

924926
Strings
925927
^^^^^^^
@@ -945,11 +947,14 @@ Indexing
945947
- Bug in :meth:`Series.__setitem__` when assigning boolean series with boolean indexer will raise ``LossySetitemError`` (:issue:`57338`)
946948
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
947949
- Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
950+
- Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`)
951+
- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
948952

949953
Missing
950954
^^^^^^^
951955
- Bug in :meth:`DataFrame.fillna` and :meth:`Series.fillna` that would ignore the ``limit`` argument on :class:`.ExtensionArray` dtypes (:issue:`58001`)
952956
- Bug in :meth:`NA.__and__`, :meth:`NA.__or__` and :meth:`NA.__xor__` when operating with ``np.bool_`` objects (:issue:`58427`)
957+
- Bug in ``divmod`` between :class:`NA` and ``Int64`` dtype objects (:issue:`62196`)
953958
-
954959

955960
MultiIndex
@@ -1118,6 +1123,7 @@ Other
11181123
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
11191124
- Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`)
11201125
- Bug in ``Series.replace`` when the Series was created from an :class:`Index` and Copy-On-Write is enabled (:issue:`61622`)
1126+
- Bug in ``divmod`` and ``rdivmod`` with :class:`DataFrame`, :class:`Series`, and :class:`Index` with ``bool`` dtypes failing to raise, which was inconsistent with ``__floordiv__`` behavior (:issue:`46043`)
11211127
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
11221128
- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
11231129
- Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ dependencies:
7777

7878
# code checks
7979
- flake8=7.1.0 # run in subprocess over docstring examples
80-
- mypy=1.13.0 # pre-commit uses locally installed mypy
80+
- mypy=1.17.1 # pre-commit uses locally installed mypy
8181
- tokenize-rt # scripts/check_for_inconsistent_pandas_namespace.py
8282
- pre-commit>=4.2.0
8383

pandas/_config/config.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373

7474
class DeprecatedOption(NamedTuple):
7575
key: str
76+
category: type[Warning]
7677
msg: str | None
7778
rkey: str | None
7879
removal_ver: str | None
@@ -589,6 +590,7 @@ def register_option(
589590

590591
def deprecate_option(
591592
key: str,
593+
category: type[Warning],
592594
msg: str | None = None,
593595
rkey: str | None = None,
594596
removal_ver: str | None = None,
@@ -608,6 +610,8 @@ def deprecate_option(
608610
key : str
609611
Name of the option to be deprecated.
610612
must be a fully-qualified option name (e.g "x.y.z.rkey").
613+
category : Warning
614+
Warning class for the deprecation.
611615
msg : str, optional
612616
Warning message to output when the key is referenced.
613617
if no message is given a default message will be emitted.
@@ -631,7 +635,7 @@ def deprecate_option(
631635
if key in _deprecated_options:
632636
raise OptionError(f"Option '{key}' has already been defined as deprecated.")
633637

634-
_deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver)
638+
_deprecated_options[key] = DeprecatedOption(key, category, msg, rkey, removal_ver)
635639

636640

637641
#
@@ -716,7 +720,7 @@ def _warn_if_deprecated(key: str) -> bool:
716720
if d.msg:
717721
warnings.warn(
718722
d.msg,
719-
FutureWarning,
723+
d.category,
720724
stacklevel=find_stack_level(),
721725
)
722726
else:
@@ -728,7 +732,11 @@ def _warn_if_deprecated(key: str) -> bool:
728732
else:
729733
msg += ", please refrain from using it."
730734

731-
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
735+
warnings.warn(
736+
msg,
737+
d.category,
738+
stacklevel=find_stack_level(),
739+
)
732740
return True
733741
return False
734742

pandas/_libs/src/vendored/ujson/python/objToJSON.c

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ Numeric decoder derived from TCL library
5151
#include <numpy/ndarraytypes.h>
5252
#include <numpy/npy_math.h>
5353

54+
static const int CSTR_SIZE = 20;
55+
5456
npy_int64 get_nat(void) { return NPY_MIN_INT64; }
5557

5658
typedef const char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
@@ -106,7 +108,7 @@ typedef struct __TypeContext {
106108
double doubleValue;
107109
JSINT64 longValue;
108110

109-
const char *cStr;
111+
char *cStr;
110112
NpyArrContext *npyarr;
111113
PdBlockContext *pdblock;
112114
int transpose;
@@ -347,7 +349,8 @@ static const char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
347349
}
348350

349351
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
350-
return PyDateTimeToIso(obj, base, len);
352+
GET_TC(tc)->cStr = PyDateTimeToIso(obj, base, len);
353+
return GET_TC(tc)->cStr;
351354
}
352355

353356
static const char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc,
@@ -1007,16 +1010,24 @@ static const char *List_iterGetName(JSOBJ Py_UNUSED(obj),
10071010
//=============================================================================
10081011
static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
10091012
GET_TC(tc)->index = 0;
1013+
GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE);
1014+
if (!GET_TC(tc)->cStr) {
1015+
PyErr_NoMemory();
1016+
}
10101017
}
10111018

10121019
static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
10131020
const Py_ssize_t index = GET_TC(tc)->index;
10141021
Py_XDECREF(GET_TC(tc)->itemValue);
1022+
if (!GET_TC(tc)->cStr) {
1023+
return 0;
1024+
}
1025+
10151026
if (index == 0) {
1016-
GET_TC(tc)->cStr = "name";
1027+
strcpy(GET_TC(tc)->cStr, "name");
10171028
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
10181029
} else if (index == 1) {
1019-
GET_TC(tc)->cStr = "data";
1030+
strcpy(GET_TC(tc)->cStr, "data");
10201031
GET_TC(tc)->itemValue = get_values(obj);
10211032
if (!GET_TC(tc)->itemValue) {
10221033
return 0;
@@ -1049,19 +1060,27 @@ static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
10491060
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
10501061
GET_TC(tc)->index = 0;
10511062
enc->outputFormat = VALUES; // for contained series
1063+
GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE);
1064+
if (!GET_TC(tc)->cStr) {
1065+
PyErr_NoMemory();
1066+
}
10521067
}
10531068

10541069
static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
10551070
const Py_ssize_t index = GET_TC(tc)->index;
10561071
Py_XDECREF(GET_TC(tc)->itemValue);
1072+
if (!GET_TC(tc)->cStr) {
1073+
return 0;
1074+
}
1075+
10571076
if (index == 0) {
1058-
GET_TC(tc)->cStr = "name";
1077+
strcpy(GET_TC(tc)->cStr, "name");
10591078
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
10601079
} else if (index == 1) {
1061-
GET_TC(tc)->cStr = "index";
1080+
strcpy(GET_TC(tc)->cStr, "index");
10621081
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
10631082
} else if (index == 2) {
1064-
GET_TC(tc)->cStr = "data";
1083+
strcpy(GET_TC(tc)->cStr, "data");
10651084
GET_TC(tc)->itemValue = get_values(obj);
10661085
if (!GET_TC(tc)->itemValue) {
10671086
return 0;
@@ -1096,19 +1115,27 @@ static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
10961115
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
10971116
GET_TC(tc)->index = 0;
10981117
enc->outputFormat = VALUES; // for contained series & index
1118+
GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE);
1119+
if (!GET_TC(tc)->cStr) {
1120+
PyErr_NoMemory();
1121+
}
10991122
}
11001123

11011124
static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
11021125
const Py_ssize_t index = GET_TC(tc)->index;
11031126
Py_XDECREF(GET_TC(tc)->itemValue);
1127+
if (!GET_TC(tc)->cStr) {
1128+
return 0;
1129+
}
1130+
11041131
if (index == 0) {
1105-
GET_TC(tc)->cStr = "columns";
1132+
strcpy(GET_TC(tc)->cStr, "columns");
11061133
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
11071134
} else if (index == 1) {
1108-
GET_TC(tc)->cStr = "index";
1135+
strcpy(GET_TC(tc)->cStr, "index");
11091136
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
11101137
} else if (index == 2) {
1111-
GET_TC(tc)->cStr = "data";
1138+
strcpy(GET_TC(tc)->cStr, "data");
11121139
Py_INCREF(obj);
11131140
GET_TC(tc)->itemValue = obj;
11141141
} else {
@@ -1880,6 +1907,7 @@ static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
18801907
GET_TC(tc)->rowLabels = NULL;
18811908
NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
18821909
GET_TC(tc)->columnLabels = NULL;
1910+
PyObject_Free(GET_TC(tc)->cStr);
18831911
GET_TC(tc)->cStr = NULL;
18841912
PyObject_Free(tc->prv);
18851913
tc->prv = NULL;

0 commit comments

Comments
 (0)