Skip to content

Commit f3829fd

Browse files
committed
BUG : Fix Series.str.contains with compiled regex on Arrow string
1 parent e72c8a1 commit f3829fd

File tree

1 file changed

+33
-7
lines changed

1 file changed

+33
-7
lines changed

pandas/core/strings/accessor.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1350,13 +1350,39 @@ def contains(
13501350
4 False
13511351
dtype: bool
13521352
"""
1353-
if regex and re.compile(pat).groups:
1354-
warnings.warn(
1355-
"This pattern is interpreted as a regular expression, and has "
1356-
"match groups. To actually get the groups, use str.extract.",
1357-
UserWarning,
1358-
stacklevel=find_stack_level(),
1359-
)
1353+
from pandas.core.dtypes.dtypes import ArrowDtype
1354+
import re
1355+
1356+
# --- Handle Arrow-backed string arrays with compiled regex patterns ---
1357+
# Arrow backend does not support compiled regex objects or Python regex flags.
1358+
# If a compiled regex is passed, only allow it if no flags are set.
1359+
1360+
if isinstance(self._data.dtype, ArrowDtype) and isinstance(pat, re.Pattern):
1361+
if flags != 0:
1362+
raise NotImplementedError(
1363+
"Series.str.contains() with a compiled regex pattern and flag is "
1364+
"not supported for Arrow-backed string arrays."
1365+
)
1366+
pat = pat.pattern
1367+
regex = True
1368+
1369+
if regex:
1370+
try:
1371+
_compiled = pat if isinstance(pat, re.Pattern) else re.compile(
1372+
pat, flags=flags
1373+
)
1374+
if _compiled.groups:
1375+
warnings.warn(
1376+
"This pattern is interpreted as a regular expression, and has "
1377+
"match groups. To actually get the groups, use str.extract.",
1378+
UserWarning,
1379+
stacklevel=find_stack_level(),
1380+
)
1381+
except re.error as e:
1382+
raise ValueError(
1383+
f"Invalid regex pattern passed to str.contains(): {e}"
1384+
) from e
1385+
13601386

13611387
result = self._data.array._str_contains(pat, case, flags, na, regex)
13621388
return self._wrap_result(result, fill_value=na, returns_string=False)

0 commit comments

Comments
 (0)