10
10
11
11
class EventCollector (html .parser .HTMLParser ):
12
12
13
- def __init__ (self , * args , ** kw ):
13
+ def __init__ (self , * args , autocdata = False , ** kw ):
14
+ self .autocdata = autocdata
14
15
self .events = []
15
16
self .append = self .events .append
16
17
html .parser .HTMLParser .__init__ (self , * args , ** kw )
18
+ if autocdata :
19
+ self ._set_support_cdata (False )
17
20
18
21
def get_events (self ):
19
22
# Normalize the list of events so that buffer artefacts don't
@@ -34,12 +37,16 @@ def get_events(self):
34
37
35
38
def handle_starttag (self , tag , attrs ):
36
39
self .append (("starttag" , tag , attrs ))
40
+ if self .autocdata and tag == 'svg' :
41
+ self ._set_support_cdata (True )
37
42
38
43
def handle_startendtag (self , tag , attrs ):
39
44
self .append (("startendtag" , tag , attrs ))
40
45
41
46
def handle_endtag (self , tag ):
42
47
self .append (("endtag" , tag ))
48
+ if self .autocdata and tag == 'svg' :
49
+ self ._set_support_cdata (False )
43
50
44
51
# all other markup
45
52
@@ -740,10 +747,6 @@ def test_eof_in_declarations(self):
740
747
('<!' , [('comment' , '' )]),
741
748
('<!-' , [('comment' , '-' )]),
742
749
('<![' , [('comment' , '[' )]),
743
- ('<![CDATA[' , [('unknown decl' , 'CDATA[' )]),
744
- ('<![CDATA[x' , [('unknown decl' , 'CDATA[x' )]),
745
- ('<![CDATA[x]' , [('unknown decl' , 'CDATA[x]' )]),
746
- ('<![CDATA[x]]' , [('unknown decl' , 'CDATA[x]]' )]),
747
750
('<!DOCTYPE' , [('decl' , 'DOCTYPE' )]),
748
751
('<!DOCTYPE ' , [('decl' , 'DOCTYPE ' )]),
749
752
('<!DOCTYPE html' , [('decl' , 'DOCTYPE html' )]),
@@ -756,6 +759,18 @@ def test_eof_in_declarations(self):
756
759
for html , expected in data :
757
760
self ._run_check (html , expected )
758
761
762
+ @support .subTests ('content' , ['' , 'x' , 'x]' , 'x]]' ])
763
+ def test_eof_in_cdata (self , content ):
764
+ self ._run_check ('<![CDATA[' + content ,
765
+ [('unknown decl' , 'CDATA[' + content )])
766
+ self ._run_check ('<![CDATA[' + content ,
767
+ [('comment' , '[CDATA[' + content )],
768
+ collector = EventCollector (autocdata = True ))
769
+ self ._run_check ('<svg><text y="100"><![CDATA[' + content ,
770
+ [('starttag' , 'svg' , []),
771
+ ('starttag' , 'text' , [('y' , '100' )]),
772
+ ('unknown decl' , 'CDATA[' + content )])
773
+
759
774
def test_bogus_comments (self ):
760
775
html = ('<!ELEMENT br EMPTY>'
761
776
'<! not really a comment >'
@@ -805,8 +820,57 @@ def test_broken_condcoms(self):
805
820
('startendtag' , 'img' , [('src' , 'mammoth.bmp' )]),
806
821
('unknown decl' , 'endif' )
807
822
]
823
+
808
824
self ._run_check (html , expected )
809
825
826
+ @support .subTests ('content' , [
827
+ 'just some plain text' ,
828
+ '<!-- not a comment -->' ,
829
+ '¬-an-entity-ref;' ,
830
+ "<not a='start tag'>" ,
831
+ '' ,
832
+ '[[I have many brackets]]' ,
833
+ 'I have a > in the middle' ,
834
+ 'I have a ]] in the middle' ,
835
+ '] ]>' ,
836
+ ']] >' ,
837
+ ('\n '
838
+ ' if (a < b && a > b) {\n '
839
+ ' printf("[<marquee>How?</marquee>]");\n '
840
+ ' }\n ' ),
841
+ ])
842
+ def test_cdata_section_content (self , content ):
843
+ # See "13.2.5.42 Markup declaration open state",
844
+ # "13.2.5.69 CDATA section state", and issue bpo-32876.
845
+ html = f'<svg><text y="100"><![CDATA[{ content } ]]></text></svg>'
846
+ expected = [
847
+ ('starttag' , 'svg' , []),
848
+ ('starttag' , 'text' , [('y' , '100' )]),
849
+ ('unknown decl' , 'CDATA[' + content ),
850
+ ('endtag' , 'text' ),
851
+ ('endtag' , 'svg' ),
852
+ ]
853
+ self ._run_check (html , expected )
854
+ self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
855
+
856
+ def test_cdata_section (self ):
857
+ # See "13.2.5.42 Markup declaration open state".
858
+ html = ('<![CDATA[foo<br>bar]]>'
859
+ '<svg><text y="100"><![CDATA[foo<br>bar]]></text></svg>'
860
+ '<![CDATA[foo<br>bar]]>' )
861
+ expected = [
862
+ ('comment' , '[CDATA[foo<br' ),
863
+ ('data' , 'bar]]>' ),
864
+ ('starttag' , 'svg' , []),
865
+ ('starttag' , 'text' , [('y' , '100' )]),
866
+ ('unknown decl' , 'CDATA[foo<br>bar' ),
867
+ ('endtag' , 'text' ),
868
+ ('endtag' , 'svg' ),
869
+ ('comment' , '[CDATA[foo<br' ),
870
+ ('data' , 'bar]]>' ),
871
+ ]
872
+ self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
873
+
810
874
def test_convert_charrefs_dropped_text (self ):
811
875
# #23144: make sure that all the events are triggered when
812
876
# convert_charrefs is True, even if we don't call .close()
0 commit comments