@@ -283,16 +283,39 @@ def test_contains_nan(any_string_dtype):
283
283
284
284
def test_contains_compiled_regex (any_string_dtype ):
285
285
# GH#61942
286
- ser = Series (["foo" , "bar" , "baz" ], dtype = any_string_dtype )
287
- pat = re .compile ("ba." )
288
- result = ser .str .contains (pat )
289
-
290
286
expected_dtype = (
291
287
np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
292
288
)
289
+
290
+ ser = Series (["foo" , "bar" , "Baz" ], dtype = any_string_dtype )
291
+
292
+ pat = re .compile ("ba." )
293
+ result = ser .str .contains (pat )
294
+ expected = Series ([False , True , False ], dtype = expected_dtype )
295
+ tm .assert_series_equal (result , expected )
296
+
297
+ # TODO this currently works for pyarrow-backed dtypes but raises for python
298
+ if any_string_dtype == "string" and any_string_dtype .storage == "pyarrow" :
299
+ result = ser .str .contains (pat , case = False )
300
+ expected = Series ([False , True , True ], dtype = expected_dtype )
301
+ tm .assert_series_equal (result , expected )
302
+ else :
303
+ with pytest .raises (
304
+ ValueError , match = "cannot process flags argument with a compiled pattern"
305
+ ):
306
+ ser .str .contains (pat , case = False )
307
+
308
+ pat = re .compile ("ba." , flags = re .IGNORECASE )
309
+ result = ser .str .contains (pat )
293
310
expected = Series ([False , True , True ], dtype = expected_dtype )
294
311
tm .assert_series_equal (result , expected )
295
312
313
+ # TODO should this be supported?
314
+ with pytest .raises (
315
+ ValueError , match = "cannot process flags argument with a compiled pattern"
316
+ ):
317
+ ser .str .contains (pat , flags = re .IGNORECASE )
318
+
296
319
297
320
# --------------------------------------------------------------------------------------
298
321
# str.startswith
@@ -833,14 +856,36 @@ def test_match_case_kwarg(any_string_dtype):
833
856
834
857
def test_match_compiled_regex (any_string_dtype ):
835
858
# GH#61952
836
- values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
837
- result = values .str .match (re .compile (r"ab" ), case = False )
838
859
expected_dtype = (
839
860
np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
840
861
)
862
+
863
+ values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
864
+
865
+ result = values .str .match (re .compile ("ab" ))
866
+ expected = Series ([True , False , True , False ], dtype = expected_dtype )
867
+ tm .assert_series_equal (result , expected )
868
+
869
+ # TODO this currently works for pyarrow-backed dtypes but raises for python
870
+ if any_string_dtype == "string" and any_string_dtype .storage == "pyarrow" :
871
+ result = values .str .match (re .compile ("ab" ), case = False )
872
+ expected = Series ([True , True , True , True ], dtype = expected_dtype )
873
+ tm .assert_series_equal (result , expected )
874
+ else :
875
+ with pytest .raises (
876
+ ValueError , match = "cannot process flags argument with a compiled pattern"
877
+ ):
878
+ values .str .match (re .compile ("ab" ), case = False )
879
+
880
+ result = values .str .match (re .compile ("ab" , flags = re .IGNORECASE ))
841
881
expected = Series ([True , True , True , True ], dtype = expected_dtype )
842
882
tm .assert_series_equal (result , expected )
843
883
884
+ with pytest .raises (
885
+ ValueError , match = "cannot process flags argument with a compiled pattern"
886
+ ):
887
+ values .str .match (re .compile ("ab" ), flags = re .IGNORECASE )
888
+
844
889
845
890
# --------------------------------------------------------------------------------------
846
891
# str.fullmatch
@@ -913,14 +958,36 @@ def test_fullmatch_case_kwarg(any_string_dtype):
913
958
914
959
def test_fullmatch_compiled_regex (any_string_dtype ):
915
960
# GH#61952
916
- values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
917
- result = values .str .fullmatch (re .compile (r"ab" ), case = False )
918
961
expected_dtype = (
919
962
np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
920
963
)
964
+
965
+ values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
966
+
967
+ result = values .str .fullmatch (re .compile ("ab" ))
968
+ expected = Series ([True , False , False , False ], dtype = expected_dtype )
969
+ tm .assert_series_equal (result , expected )
970
+
971
+ # TODO this currently works for pyarrow-backed dtypes but raises for python
972
+ if any_string_dtype == "string" and any_string_dtype .storage == "pyarrow" :
973
+ result = values .str .fullmatch (re .compile ("ab" ), case = False )
974
+ expected = Series ([True , True , False , False ], dtype = expected_dtype )
975
+ tm .assert_series_equal (result , expected )
976
+ else :
977
+ with pytest .raises (
978
+ ValueError , match = "cannot process flags argument with a compiled pattern"
979
+ ):
980
+ values .str .fullmatch (re .compile ("ab" ), case = False )
981
+
982
+ result = values .str .fullmatch (re .compile ("ab" , flags = re .IGNORECASE ))
921
983
expected = Series ([True , True , False , False ], dtype = expected_dtype )
922
984
tm .assert_series_equal (result , expected )
923
985
986
+ with pytest .raises (
987
+ ValueError , match = "cannot process flags argument with a compiled pattern"
988
+ ):
989
+ values .str .fullmatch (re .compile ("ab" ), flags = re .IGNORECASE )
990
+
924
991
925
992
# --------------------------------------------------------------------------------------
926
993
# str.findall
0 commit comments