Skip to content

Commit 7636a66

Browse files
gh-135661: Fix parsing unterminated bogus comments in HTMLParser (GH-137873)
Bogus comments that start with "<![CDATA[" should not include the starting "!" in its value.
1 parent eac37b4 commit 7636a66

File tree

2 files changed

+9
-15
lines changed

2 files changed

+9
-15
lines changed

Lib/html/parser.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -271,11 +271,8 @@ def goahead(self, end):
271271
j -= len(suffix)
272272
break
273273
self.handle_comment(rawdata[i+4:j])
274-
elif startswith("<![CDATA[", i):
275-
if self._support_cdata:
276-
self.unknown_decl(rawdata[i+3:])
277-
else:
278-
self.handle_comment(rawdata[i+1:])
274+
elif startswith("<![CDATA[", i) and self._support_cdata:
275+
self.unknown_decl(rawdata[i+3:])
279276
elif rawdata[i:i+9].lower() == '<!doctype':
280277
self.handle_decl(rawdata[i+2:])
281278
elif startswith("<!", i):
@@ -350,15 +347,12 @@ def parse_html_declaration(self, i):
350347
if rawdata[i:i+4] == '<!--':
351348
# this case is actually already handled in goahead()
352349
return self.parse_comment(i)
353-
elif rawdata[i:i+9] == '<![CDATA[':
354-
if self._support_cdata:
355-
j = rawdata.find(']]>', i+9)
356-
if j < 0:
357-
return -1
358-
self.unknown_decl(rawdata[i+3: j])
359-
return j + 3
360-
else:
361-
return self.parse_bogus_comment(i)
350+
elif rawdata[i:i+9] == '<![CDATA[' and self._support_cdata:
351+
j = rawdata.find(']]>', i+9)
352+
if j < 0:
353+
return -1
354+
self.unknown_decl(rawdata[i+3: j])
355+
return j + 3
362356
elif rawdata[i:i+9].lower() == '<!doctype':
363357
# find the closing >
364358
gtpos = rawdata.find('>', i+9)

Lib/test/test_htmlparser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,7 @@ def test_eof_in_cdata(self, content):
791791
self._run_check('<![CDATA[' + content,
792792
[('unknown decl', 'CDATA[' + content)])
793793
self._run_check('<![CDATA[' + content,
794-
[('comment', '![CDATA[' + content)],
794+
[('comment', '[CDATA[' + content)],
795795
collector=EventCollector(autocdata=True))
796796
self._run_check('<svg><text y="100"><![CDATA[' + content,
797797
[('starttag', 'svg', []),

0 commit comments

Comments
 (0)