Skip to content

Commit cec02c3

Browse files
committed
Add duplicate attribute tracking for CSP nonce validation.
Implements detection and propagation of duplicate attributes through the tokenizer, tree builder, and TreeSink interface to support CSP (Content Security Policy) nonce validation. This enables html5ever consumers (e.g., Servo) to properly implement step 3 of the CSP "is element nonceable" algorithm by checking the `ElementFlags.had_duplicate_attrs` field during nonce validation. Reference: - https://www.w3.org/TR/CSP/#is-element-nonceable - servo/servo@4821bc0 Signed-off-by: Dyego Aurélio <[email protected]>
1 parent fa69d1a commit cec02c3

File tree

8 files changed

+415
-15
lines changed

8 files changed

+415
-15
lines changed

html5ever/src/tokenizer/interface.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ pub struct Tag {
4040
pub name: LocalName,
4141
pub self_closing: bool,
4242
pub attrs: Vec<Attribute>,
43+
/// Whether duplicate attributes were encountered during tokenization.
44+
/// This is used for CSP nonce validation - elements with duplicate
45+
/// attributes are not nonceable per the CSP spec.
46+
pub had_duplicate_attrs: bool,
4347
}
4448

4549
impl Tag {

html5ever/src/tokenizer/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ pub struct Tokenizer<Sink> {
133133
/// Current tag is self-closing?
134134
current_tag_self_closing: Cell<bool>,
135135

136+
/// Current tag had duplicate attributes?
137+
current_tag_had_duplicate_attrs: Cell<bool>,
138+
136139
/// Current tag attributes.
137140
current_tag_attrs: RefCell<Vec<Attribute>>,
138141

@@ -186,6 +189,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
186189
current_tag_kind: Cell::new(StartTag),
187190
current_tag_name: RefCell::new(StrTendril::new()),
188191
current_tag_self_closing: Cell::new(false),
192+
current_tag_had_duplicate_attrs: Cell::new(false),
189193
current_tag_attrs: RefCell::new(vec![]),
190194
current_attr_name: RefCell::new(StrTendril::new()),
191195
current_attr_value: RefCell::new(StrTendril::new()),
@@ -440,6 +444,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
440444
name,
441445
self_closing: self.current_tag_self_closing.get(),
442446
attrs: std::mem::take(&mut self.current_tag_attrs.borrow_mut()),
447+
had_duplicate_attrs: self.current_tag_had_duplicate_attrs.get(),
443448
});
444449

445450
match self.process_token(token) {
@@ -481,6 +486,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
481486
fn discard_tag(&self) {
482487
self.current_tag_name.borrow_mut().clear();
483488
self.current_tag_self_closing.set(false);
489+
self.current_tag_had_duplicate_attrs.set(false);
484490
*self.current_tag_attrs.borrow_mut() = vec![];
485491
}
486492

@@ -523,6 +529,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
523529

524530
if dup {
525531
self.emit_error(Borrowed("Duplicate attribute"));
532+
self.current_tag_had_duplicate_attrs.set(true);
526533
self.current_attr_name.borrow_mut().clear();
527534
self.current_attr_value.borrow_mut().clear();
528535
} else {
@@ -2217,6 +2224,7 @@ mod test {
22172224
name,
22182225
self_closing: false,
22192226
attrs: vec![],
2227+
had_duplicate_attrs: false,
22202228
})
22212229
}
22222230

html5ever/src/tree_builder/mod.rs

Lines changed: 65 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
pub use crate::interface::{create_element, ElemName, ElementFlags, Tracer, TreeSink};
1313
pub use crate::interface::{AppendNode, AppendText, Attribute, NodeOrText};
1414
pub use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
15+
pub use markup5ever::interface::tree_builder::create_element_with_flags;
1516

1617
use self::types::*;
1718

@@ -733,6 +734,7 @@ where
733734
name: subject,
734735
self_closing: false,
735736
attrs: vec![],
737+
had_duplicate_attrs: false,
736738
});
737739
};
738740

@@ -828,10 +830,11 @@ where
828830
};
829831
// FIXME: Is there a way to avoid cloning the attributes twice here (once on their
830832
// own, once as part of t.clone() above)?
831-
let new_element = create_element(
833+
let new_element = create_element_with_flags(
832834
&self.sink,
833835
QualName::new(None, ns!(html), tag.name.clone()),
834836
tag.attrs.clone(),
837+
tag.had_duplicate_attrs,
835838
);
836839
self.open_elems.borrow_mut()[node_index] = new_element.clone();
837840
self.active_formatting.borrow_mut()[node_formatting_index] =
@@ -860,10 +863,11 @@ where
860863
// 15.
861864
// FIXME: Is there a way to avoid cloning the attributes twice here (once on their own,
862865
// once as part of t.clone() above)?
863-
let new_element = create_element(
866+
let new_element = create_element_with_flags(
864867
&self.sink,
865868
QualName::new(None, ns!(html), fmt_elem_tag.name.clone()),
866869
fmt_elem_tag.attrs.clone(),
870+
fmt_elem_tag.had_duplicate_attrs,
867871
);
868872
let new_entry = FormatEntry::Element(new_element.clone(), fmt_elem_tag);
869873

@@ -1010,6 +1014,7 @@ where
10101014
ns!(html),
10111015
tag.name.clone(),
10121016
tag.attrs.clone(),
1017+
tag.had_duplicate_attrs,
10131018
);
10141019

10151020
// Step 9. Replace the entry for entry in the list with an entry for new element.
@@ -1363,6 +1368,7 @@ where
13631368
ns: Namespace,
13641369
name: LocalName,
13651370
attrs: Vec<Attribute>,
1371+
had_duplicate_attrs: bool,
13661372
) -> Handle {
13671373
declare_tag_set!(form_associatable =
13681374
"button" "fieldset" "input" "object"
@@ -1372,7 +1378,12 @@ where
13721378

13731379
// Step 7.
13741380
let qname = QualName::new(None, ns, name);
1375-
let elem = create_element(&self.sink, qname.clone(), attrs.clone());
1381+
let elem = create_element_with_flags(
1382+
&self.sink,
1383+
qname.clone(),
1384+
attrs.clone(),
1385+
had_duplicate_attrs,
1386+
);
13761387

13771388
let insertion_point = self.appropriate_place_for_insertion(None);
13781389
let (node1, node2) = match insertion_point {
@@ -1410,15 +1421,27 @@ where
14101421
}
14111422

14121423
fn insert_element_for(&self, tag: Tag) -> Handle {
1413-
self.insert_element(PushFlag::Push, ns!(html), tag.name, tag.attrs)
1424+
self.insert_element(
1425+
PushFlag::Push,
1426+
ns!(html),
1427+
tag.name,
1428+
tag.attrs,
1429+
tag.had_duplicate_attrs,
1430+
)
14141431
}
14151432

14161433
fn insert_and_pop_element_for(&self, tag: Tag) -> Handle {
1417-
self.insert_element(PushFlag::NoPush, ns!(html), tag.name, tag.attrs)
1434+
self.insert_element(
1435+
PushFlag::NoPush,
1436+
ns!(html),
1437+
tag.name,
1438+
tag.attrs,
1439+
tag.had_duplicate_attrs,
1440+
)
14181441
}
14191442

14201443
fn insert_phantom(&self, name: LocalName) -> Handle {
1421-
self.insert_element(PushFlag::Push, ns!(html), name, vec![])
1444+
self.insert_element(PushFlag::Push, ns!(html), name, vec![], false)
14221445
}
14231446

14241447
/// <https://html.spec.whatwg.org/multipage/parsing.html#insert-an-element-at-the-adjusted-insertion-location>
@@ -1429,8 +1452,13 @@ where
14291452
only_add_to_element_stack: bool,
14301453
) -> Handle {
14311454
let adjusted_insertion_location = self.appropriate_place_for_insertion(None);
1432-
let qname = QualName::new(None, ns, tag.name);
1433-
let elem = create_element(&self.sink, qname.clone(), tag.attrs.clone());
1455+
let qname = QualName::new(None, ns, tag.name.clone());
1456+
let elem = create_element_with_flags(
1457+
&self.sink,
1458+
qname.clone(),
1459+
tag.attrs.clone(),
1460+
tag.had_duplicate_attrs,
1461+
);
14341462

14351463
if !only_add_to_element_stack {
14361464
self.insert_at(adjusted_insertion_location, AppendNode(elem.clone()));
@@ -1520,6 +1548,7 @@ where
15201548
ns!(html),
15211549
tag.name.clone(),
15221550
tag.attrs.clone(),
1551+
tag.had_duplicate_attrs,
15231552
);
15241553
self.active_formatting
15251554
.borrow_mut()
@@ -1655,10 +1684,22 @@ where
16551684
self.adjust_foreign_attributes(&mut tag);
16561685

16571686
if tag.self_closing {
1658-
self.insert_element(PushFlag::NoPush, ns, tag.name, tag.attrs);
1687+
self.insert_element(
1688+
PushFlag::NoPush,
1689+
ns,
1690+
tag.name,
1691+
tag.attrs,
1692+
tag.had_duplicate_attrs,
1693+
);
16591694
ProcessResult::DoneAckSelfClosing
16601695
} else {
1661-
self.insert_element(PushFlag::Push, ns, tag.name, tag.attrs);
1696+
self.insert_element(
1697+
PushFlag::Push,
1698+
ns,
1699+
tag.name,
1700+
tag.attrs,
1701+
tag.had_duplicate_attrs,
1702+
);
16621703
ProcessResult::Done
16631704
}
16641705
}
@@ -1823,10 +1864,22 @@ where
18231864
self.adjust_foreign_attributes(&mut tag);
18241865
if tag.self_closing {
18251866
// FIXME(#118): <script /> in SVG
1826-
self.insert_element(PushFlag::NoPush, current_ns, tag.name, tag.attrs);
1867+
self.insert_element(
1868+
PushFlag::NoPush,
1869+
current_ns,
1870+
tag.name,
1871+
tag.attrs,
1872+
tag.had_duplicate_attrs,
1873+
);
18271874
ProcessResult::DoneAckSelfClosing
18281875
} else {
1829-
self.insert_element(PushFlag::Push, current_ns, tag.name, tag.attrs);
1876+
self.insert_element(
1877+
PushFlag::Push,
1878+
current_ns,
1879+
tag.name,
1880+
tag.attrs,
1881+
tag.had_duplicate_attrs,
1882+
);
18301883
ProcessResult::Done
18311884
}
18321885
}

html5ever/src/tree_builder/rules.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ use crate::tokenizer::TagKind::{EndTag, StartTag};
1515
use crate::tree_builder::tag_sets::*;
1616
use crate::tree_builder::types::*;
1717
use crate::tree_builder::{
18-
create_element, html_elem, ElemName, NodeOrText::AppendNode, StrTendril, Tag, TreeBuilder,
19-
TreeSink,
18+
html_elem, ElemName, NodeOrText::AppendNode, StrTendril, Tag, TreeBuilder, TreeSink,
2019
};
2120
use crate::QualName;
21+
use markup5ever::interface::tree_builder::create_element_with_flags;
2222
use markup5ever::{expanded_name, local_name, ns};
2323
use std::borrow::Cow::Borrowed;
2424

@@ -207,10 +207,11 @@ where
207207
},
208208

209209
Token::Tag(tag @ tag!(<script>)) => {
210-
let elem = create_element(
210+
let elem = create_element_with_flags(
211211
&self.sink,
212212
QualName::new(None, ns!(html), local_name!("script")),
213213
tag.attrs,
214+
tag.had_duplicate_attrs,
214215
);
215216
if self.is_fragment() {
216217
self.sink.mark_script_already_started(&elem);

markup5ever/interface/tree_builder.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,13 @@ pub struct ElementFlags {
6262
///
6363
/// [whatwg integration-point]: https://html.spec.whatwg.org/multipage/#html-integration-point
6464
pub mathml_annotation_xml_integration_point: bool,
65+
66+
/// Whether duplicate attributes were encountered during tokenization.
67+
/// This is used for CSP nonce validation - elements with duplicate
68+
/// attributes are not nonceable per the CSP spec.
69+
///
70+
/// See [CSP Level 3 - Is element nonceable](https://www.w3.org/TR/CSP/#is-element-nonceable)
71+
pub had_duplicate_attrs: bool,
6572
}
6673

6774
/// A constructor for an element.
@@ -70,6 +77,22 @@ pub struct ElementFlags {
7077
///
7178
/// Create an element like `<div class="test-class-name"></div>`:
7279
pub fn create_element<Sink>(sink: &Sink, name: QualName, attrs: Vec<Attribute>) -> Sink::Handle
80+
where
81+
Sink: TreeSink,
82+
{
83+
create_element_with_flags(sink, name, attrs, false)
84+
}
85+
86+
/// A constructor for an element with duplicate attribute information.
87+
///
88+
/// This variant allows passing whether duplicate attributes were encountered
89+
/// during tokenization, which is needed for CSP nonce validation.
90+
pub fn create_element_with_flags<Sink>(
91+
sink: &Sink,
92+
name: QualName,
93+
attrs: Vec<Attribute>,
94+
had_duplicate_attrs: bool,
95+
) -> Sink::Handle
7396
where
7497
Sink: TreeSink,
7598
{
@@ -85,6 +108,7 @@ where
85108
},
86109
_ => {},
87110
}
111+
flags.had_duplicate_attrs = had_duplicate_attrs;
88112
sink.create_element(name, attrs, flags)
89113
}
90114

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
{"tests": [
2+
3+
{"description": "Duplicate attribute - simple case",
4+
"input": "<div id='first' id='second'>",
5+
"output": [
6+
["StartTag", "div", {"id": "first"}]
7+
],
8+
"errors": [
9+
{"code": "duplicate-attribute"}
10+
]},
11+
12+
{"description": "Duplicate attribute - three duplicates",
13+
"input": "<div id='first' id='second' id='third'>",
14+
"output": [
15+
["StartTag", "div", {"id": "first"}]
16+
],
17+
"errors": [
18+
{"code": "duplicate-attribute"},
19+
{"code": "duplicate-attribute"}
20+
]},
21+
22+
{"description": "Duplicate attribute - mixed with other attrs",
23+
"input": "<div class='foo' id='first' title='bar' id='second'>",
24+
"output": [
25+
["StartTag", "div", {"class": "foo", "id": "first", "title": "bar"}]
26+
],
27+
"errors": [
28+
{"code": "duplicate-attribute"}
29+
]},
30+
31+
{"description": "Duplicate nonce attribute",
32+
"input": "<script nonce='abc123' nonce='xyz789'>",
33+
"output": [
34+
["StartTag", "script", {"nonce": "abc123"}]
35+
],
36+
"errors": [
37+
{"code": "duplicate-attribute"}
38+
]},
39+
40+
{"description": "No duplicate - different attributes",
41+
"input": "<div id='test' class='foo'>",
42+
"output": [
43+
["StartTag", "div", {"id": "test", "class": "foo"}]
44+
],
45+
"errors": []},
46+
47+
{"description": "Duplicate attribute - case insensitive (ID vs id)",
48+
"input": "<div ID='first' id='second'>",
49+
"output": [
50+
["StartTag", "div", {"id": "first"}]
51+
],
52+
"errors": [
53+
{"code": "duplicate-attribute"}
54+
]},
55+
56+
{"description": "Multiple different duplicates",
57+
"input": "<div id='a' id='b' class='x' class='y'>",
58+
"output": [
59+
["StartTag", "div", {"id": "a", "class": "x"}]
60+
],
61+
"errors": [
62+
{"code": "duplicate-attribute"},
63+
{"code": "duplicate-attribute"}
64+
]}
65+
66+
]}

0 commit comments

Comments
 (0)