1010
1111class EventCollector (html .parser .HTMLParser ):
1212
13- def __init__ (self , * args , ** kw ):
13+ def __init__ (self , * args , autocdata = False , ** kw ):
14+ self .autocdata = autocdata
1415 self .events = []
1516 self .append = self .events .append
1617 html .parser .HTMLParser .__init__ (self , * args , ** kw )
18+ if autocdata :
19+ self ._set_support_cdata (False )
1720
1821 def get_events (self ):
1922 # Normalize the list of events so that buffer artefacts don't
@@ -34,12 +37,16 @@ def get_events(self):
3437
3538 def handle_starttag (self , tag , attrs ):
3639 self .append (("starttag" , tag , attrs ))
40+ if self .autocdata and tag == 'svg' :
41+ self ._set_support_cdata (True )
3742
3843 def handle_startendtag (self , tag , attrs ):
3944 self .append (("startendtag" , tag , attrs ))
4045
4146 def handle_endtag (self , tag ):
4247 self .append (("endtag" , tag ))
48+ if self .autocdata and tag == 'svg' :
49+ self ._set_support_cdata (False )
4350
4451 # all other markup
4552
@@ -740,10 +747,6 @@ def test_eof_in_declarations(self):
740747 ('<!' , [('comment' , '' )]),
741748 ('<!-' , [('comment' , '-' )]),
742749 ('<![' , [('comment' , '[' )]),
743- ('<![CDATA[' , [('unknown decl' , 'CDATA[' )]),
744- ('<![CDATA[x' , [('unknown decl' , 'CDATA[x' )]),
745- ('<![CDATA[x]' , [('unknown decl' , 'CDATA[x]' )]),
746- ('<![CDATA[x]]' , [('unknown decl' , 'CDATA[x]]' )]),
747750 ('<!DOCTYPE' , [('decl' , 'DOCTYPE' )]),
748751 ('<!DOCTYPE ' , [('decl' , 'DOCTYPE ' )]),
749752 ('<!DOCTYPE html' , [('decl' , 'DOCTYPE html' )]),
@@ -756,6 +759,18 @@ def test_eof_in_declarations(self):
756759 for html , expected in data :
757760 self ._run_check (html , expected )
758761
762+ @support .subTests ('content' , ['' , 'x' , 'x]' , 'x]]' ])
763+ def test_eof_in_cdata (self , content ):
764+ self ._run_check ('<![CDATA[' + content ,
765+ [('unknown decl' , 'CDATA[' + content )])
766+ self ._run_check ('<![CDATA[' + content ,
767+ [('comment' , '![CDATA[' + content )],
768+ collector = EventCollector (autocdata = True ))
769+ self ._run_check ('<svg><text y="100"><![CDATA[' + content ,
770+ [('starttag' , 'svg' , []),
771+ ('starttag' , 'text' , [('y' , '100' )]),
772+ ('unknown decl' , 'CDATA[' + content )])
773+
759774 def test_bogus_comments (self ):
760775 html = ('<!ELEMENT br EMPTY>'
761776 '<! not really a comment >'
@@ -805,8 +820,57 @@ def test_broken_condcoms(self):
805820 ('startendtag' , 'img' , [('src' , 'mammoth.bmp' )]),
806821 ('unknown decl' , 'endif' )
807822 ]
823+
808824 self ._run_check (html , expected )
809825
826+ @support .subTests ('content' , [
827+ 'just some plain text' ,
828+ '<!-- not a comment -->' ,
829+ '¬-an-entity-ref;' ,
830+ "<not a='start tag'>" ,
831+ '' ,
832+ '[[I have many brackets]]' ,
833+ 'I have a > in the middle' ,
834+ 'I have a ]] in the middle' ,
835+ '] ]>' ,
836+ ']] >' ,
837+ ('\n '
838+ ' if (a < b && a > b) {\n '
839+ ' printf("[<marquee>How?</marquee>]");\n '
840+ ' }\n ' ),
841+ ])
842+ def test_cdata_section_content (self , content ):
843+ # See "13.2.5.42 Markup declaration open state",
844+ # "13.2.5.69 CDATA section state", and issue bpo-32876.
845+ html = f'<svg><text y="100"><![CDATA[{ content } ]]></text></svg>'
846+ expected = [
847+ ('starttag' , 'svg' , []),
848+ ('starttag' , 'text' , [('y' , '100' )]),
849+ ('unknown decl' , 'CDATA[' + content ),
850+ ('endtag' , 'text' ),
851+ ('endtag' , 'svg' ),
852+ ]
853+ self ._run_check (html , expected )
854+ self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
855+
856+ def test_cdata_section (self ):
857+ # See "13.2.5.42 Markup declaration open state".
858+ html = ('<![CDATA[foo<br>bar]]>'
859+ '<svg><text y="100"><![CDATA[foo<br>bar]]></text></svg>'
860+ '<![CDATA[foo<br>bar]]>' )
861+ expected = [
862+ ('comment' , '[CDATA[foo<br' ),
863+ ('data' , 'bar]]>' ),
864+ ('starttag' , 'svg' , []),
865+ ('starttag' , 'text' , [('y' , '100' )]),
866+ ('unknown decl' , 'CDATA[foo<br>bar' ),
867+ ('endtag' , 'text' ),
868+ ('endtag' , 'svg' ),
869+ ('comment' , '[CDATA[foo<br' ),
870+ ('data' , 'bar]]>' ),
871+ ]
872+ self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
873+
810874 def test_convert_charrefs_dropped_text (self ):
811875 # #23144: make sure that all the events are triggered when
812876 # convert_charrefs is True, even if we don't call .close()
0 commit comments