10
10
11
11
class EventCollector (html .parser .HTMLParser ):
12
12
13
- def __init__ (self , * args , ** kw ):
13
+ def __init__ (self , * args , autocdata = False , ** kw ):
14
+ self .autocdata = autocdata
14
15
self .events = []
15
16
self .append = self .events .append
16
17
html .parser .HTMLParser .__init__ (self , * args , ** kw )
18
+ if autocdata :
19
+ self ._set_support_cdata (False )
17
20
18
21
def get_events (self ):
19
22
# Normalize the list of events so that buffer artefacts don't
@@ -34,12 +37,16 @@ def get_events(self):
34
37
35
38
def handle_starttag (self , tag , attrs ):
36
39
self .append (("starttag" , tag , attrs ))
40
+ if self .autocdata and tag == 'svg' :
41
+ self ._set_support_cdata (True )
37
42
38
43
def handle_startendtag (self , tag , attrs ):
39
44
self .append (("startendtag" , tag , attrs ))
40
45
41
46
def handle_endtag (self , tag ):
42
47
self .append (("endtag" , tag ))
48
+ if self .autocdata and tag == 'svg' :
49
+ self ._set_support_cdata (False )
43
50
44
51
# all other markup
45
52
@@ -767,10 +774,6 @@ def test_eof_in_declarations(self):
767
774
('<!' , [('comment' , '' )]),
768
775
('<!-' , [('comment' , '-' )]),
769
776
('<![' , [('comment' , '[' )]),
770
- ('<![CDATA[' , [('unknown decl' , 'CDATA[' )]),
771
- ('<![CDATA[x' , [('unknown decl' , 'CDATA[x' )]),
772
- ('<![CDATA[x]' , [('unknown decl' , 'CDATA[x]' )]),
773
- ('<![CDATA[x]]' , [('unknown decl' , 'CDATA[x]]' )]),
774
777
('<!DOCTYPE' , [('decl' , 'DOCTYPE' )]),
775
778
('<!DOCTYPE ' , [('decl' , 'DOCTYPE ' )]),
776
779
('<!DOCTYPE html' , [('decl' , 'DOCTYPE html' )]),
@@ -783,6 +786,18 @@ def test_eof_in_declarations(self):
783
786
for html , expected in data :
784
787
self ._run_check (html , expected )
785
788
789
+ @support .subTests ('content' , ['' , 'x' , 'x]' , 'x]]' ])
790
+ def test_eof_in_cdata (self , content ):
791
+ self ._run_check ('<![CDATA[' + content ,
792
+ [('unknown decl' , 'CDATA[' + content )])
793
+ self ._run_check ('<![CDATA[' + content ,
794
+ [('comment' , '![CDATA[' + content )],
795
+ collector = EventCollector (autocdata = True ))
796
+ self ._run_check ('<svg><text y="100"><![CDATA[' + content ,
797
+ [('starttag' , 'svg' , []),
798
+ ('starttag' , 'text' , [('y' , '100' )]),
799
+ ('unknown decl' , 'CDATA[' + content )])
800
+
786
801
def test_bogus_comments (self ):
787
802
html = ('<!ELEMENT br EMPTY>'
788
803
'<! not really a comment >'
@@ -845,28 +860,53 @@ def test_broken_condcoms(self):
845
860
]
846
861
self ._run_check (html , expected )
847
862
848
- def test_cdata_declarations (self ):
849
- # More tests should be added. See also "8.2.4.42. Markup
850
- # declaration open state", "8.2.4.69. CDATA section state",
851
- # and issue 32876
852
- html = ('<![CDATA[just some plain text]]>' )
853
- expected = [('unknown decl' , 'CDATA[just some plain text' )]
863
+ @support .subTests ('content' , [
864
+ 'just some plain text' ,
865
+ '<!-- not a comment -->' ,
866
+ '¬-an-entity-ref;' ,
867
+ "<not a='start tag'>" ,
868
+ '' ,
869
+ '[[I have many brackets]]' ,
870
+ 'I have a > in the middle' ,
871
+ 'I have a ]] in the middle' ,
872
+ '] ]>' ,
873
+ ']] >' ,
874
+ ('\n '
875
+ ' if (a < b && a > b) {\n '
876
+ ' printf("[<marquee>How?</marquee>]");\n '
877
+ ' }\n ' ),
878
+ ])
879
+ def test_cdata_section_content (self , content ):
880
+ # See "13.2.5.42 Markup declaration open state",
881
+ # "13.2.5.69 CDATA section state", and issue bpo-32876.
882
+ html = f'<svg><text y="100"><![CDATA[{ content } ]]></text></svg>'
883
+ expected = [
884
+ ('starttag' , 'svg' , []),
885
+ ('starttag' , 'text' , [('y' , '100' )]),
886
+ ('unknown decl' , 'CDATA[' + content ),
887
+ ('endtag' , 'text' ),
888
+ ('endtag' , 'svg' ),
889
+ ]
854
890
self ._run_check (html , expected )
891
+ self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
855
892
856
- def test_cdata_declarations_multiline (self ):
857
- html = ('<code><![CDATA['
858
- ' if (a < b && a > b) {'
859
- ' printf("[<marquee>How?</marquee>]");'
860
- ' }'
861
- ']]></code>' )
893
+ def test_cdata_section (self ):
894
+ # See "13.2.5.42 Markup declaration open state".
895
+ html = ('<![CDATA[foo<br>bar]]>'
896
+ '<svg><text y="100"><![CDATA[foo<br>bar]]></text></svg>'
897
+ '<![CDATA[foo<br>bar]]>' )
862
898
expected = [
863
- ('starttag' , 'code' , []),
864
- ('unknown decl' ,
865
- 'CDATA[ if (a < b && a > b) { '
866
- 'printf("[<marquee>How?</marquee>]"); }' ),
867
- ('endtag' , 'code' )
899
+ ('comment' , '[CDATA[foo<br' ),
900
+ ('data' , 'bar]]>' ),
901
+ ('starttag' , 'svg' , []),
902
+ ('starttag' , 'text' , [('y' , '100' )]),
903
+ ('unknown decl' , 'CDATA[foo<br>bar' ),
904
+ ('endtag' , 'text' ),
905
+ ('endtag' , 'svg' ),
906
+ ('comment' , '[CDATA[foo<br' ),
907
+ ('data' , 'bar]]>' ),
868
908
]
869
- self ._run_check (html , expected )
909
+ self ._run_check (html , expected , collector = EventCollector ( autocdata = True ) )
870
910
871
911
def test_convert_charrefs_dropped_text (self ):
872
912
# #23144: make sure that all the events are triggered when
0 commit comments