Skip to content

Commit 812ef66

Browse files
gh-145202: Fix crash in unicodedata's GraphemeBreakIterator and Segment (GH-145216)
Remove the tp_clear slots and make Segment members read-only. Also add tests for reference loops involving GraphemeBreakIterator and Segment.
1 parent 43fdb70 commit 812ef66

File tree

2 files changed

+26
-18
lines changed

2 files changed

+26
-18
lines changed

Lib/test/test_unicodedata.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
import sys
1313
import unicodedata
1414
import unittest
15+
import weakref
1516
from test.support import (
17+
gc_collect,
1618
open_urlresource,
1719
requires_resource,
1820
script_helper,
@@ -1338,6 +1340,28 @@ def run_grapheme_break_tests(self, testdata):
13381340
self.assertEqual([x.start for x in result], breaks[i:-1], comment)
13391341
self.assertEqual([x.end for x in result], breaks[i+1:], comment)
13401342

1343+
def test_reference_loops(self):
1344+
# Test that reference loops involving GraphemeBreakIterator or
1345+
# Segment can be broken by the garbage collector.
1346+
class S(str):
1347+
pass
1348+
1349+
s = S('abc')
1350+
s.ref = unicodedata.iter_graphemes(s)
1351+
wr = weakref.ref(s)
1352+
del s
1353+
self.assertIsNotNone(wr())
1354+
gc_collect()
1355+
self.assertIsNone(wr())
1356+
1357+
s = S('abc')
1358+
s.ref = next(unicodedata.iter_graphemes(s))
1359+
wr = weakref.ref(s)
1360+
del s
1361+
self.assertIsNotNone(wr())
1362+
gc_collect()
1363+
self.assertIsNone(wr())
1364+
13411365

13421366
if __name__ == "__main__":
13431367
unittest.main()

Modules/unicodedata.c

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1925,13 +1925,6 @@ Segment_traverse(PyObject *self, visitproc visit, void *arg)
19251925
return 0;
19261926
}
19271927

1928-
static int
1929-
Segment_clear(PyObject *self)
1930-
{
1931-
Py_CLEAR(((SegmentObject *)self)->string);
1932-
return 0;
1933-
}
1934-
19351928
static PyObject *
19361929
Segment_str(PyObject *self)
19371930
{
@@ -1947,17 +1940,16 @@ Segment_repr(PyObject *self)
19471940
}
19481941

19491942
static PyMemberDef Segment_members[] = {
1950-
{"start", Py_T_PYSSIZET, offsetof(SegmentObject, start), 0,
1943+
{"start", Py_T_PYSSIZET, offsetof(SegmentObject, start), Py_READONLY,
19511944
PyDoc_STR("grapheme start")},
1952-
{"end", Py_T_PYSSIZET, offsetof(SegmentObject, end), 0,
1945+
{"end", Py_T_PYSSIZET, offsetof(SegmentObject, end), Py_READONLY,
19531946
PyDoc_STR("grapheme end")},
19541947
{NULL} /* Sentinel */
19551948
};
19561949

19571950
static PyType_Slot Segment_slots[] = {
19581951
{Py_tp_dealloc, Segment_dealloc},
19591952
{Py_tp_traverse, Segment_traverse},
1960-
{Py_tp_clear, Segment_clear},
19611953
{Py_tp_str, Segment_str},
19621954
{Py_tp_repr, Segment_repr},
19631955
{Py_tp_members, Segment_members},
@@ -2001,13 +1993,6 @@ GBI_traverse(PyObject *self, visitproc visit, void *arg)
20011993
return 0;
20021994
}
20031995

2004-
static int
2005-
GBI_clear(PyObject *self)
2006-
{
2007-
Py_CLEAR(((GraphemeBreakIterator *)self)->iter.str);
2008-
return 0;
2009-
}
2010-
20111996
static PyObject *
20121997
GBI_iternext(PyObject *self)
20131998
{
@@ -2038,7 +2023,6 @@ static PyType_Slot GraphemeBreakIterator_slots[] = {
20382023
{Py_tp_iter, PyObject_SelfIter},
20392024
{Py_tp_iternext, GBI_iternext},
20402025
{Py_tp_traverse, GBI_traverse},
2041-
{Py_tp_clear, GBI_clear},
20422026
{0, 0},
20432027
};
20442028

0 commit comments

Comments
 (0)