From aa2bf4d71854124b801b4aee6d7435dfb5515cb1 Mon Sep 17 00:00:00 2001 From: Josiah McMenamy Date: Mon, 14 Jul 2025 16:55:26 -0400 Subject: [PATCH] internal/sanitizer: allow picture and source elements in readme The existing implementation sanitizes picture and source elements, causing readmes with different images for light and dark mode to not display correctly Fixes #74276 --- internal/frontend/markdown.go | 4 ++-- internal/frontend/overview.go | 4 ++-- internal/frontend/readme_test.go | 36 ++++++++++++++++++++++++++++ internal/sanitizer/sanitizer.go | 11 +++++++-- internal/sanitizer/sanitizer_test.go | 12 ++++++++++ 5 files changed, 61 insertions(+), 6 deletions(-) diff --git a/internal/frontend/markdown.go b/internal/frontend/markdown.go index 94274476d..75953973b 100644 --- a/internal/frontend/markdown.go +++ b/internal/frontend/markdown.go @@ -75,8 +75,8 @@ func processReadme(ctx context.Context, readme *internal.Readme, info *source.In }, nil } -// rewriteImgSrc rewrites the HTML in the markdown document to replace img -// src keys with a value that properly represents the source of the image +// rewriteImgSrc rewrites the HTML in the markdown document to replace img and source +// src and srcset keys with a value that properly represents the source of the image // from the repo. func rewriteImgSrc(doc *markdown.Document, info *source.Info, readme *internal.Readme) { walkBlocks(doc.Blocks, func(b markdown.Block) error { diff --git a/internal/frontend/overview.go b/internal/frontend/overview.go index eb7bd84ce..856654f21 100644 --- a/internal/frontend/overview.go +++ b/internal/frontend/overview.go @@ -129,10 +129,10 @@ func translateHTML(htmlText []byte, info *source.Info, readme *internal.Readme) // It reports whether it made a change. func walkHTML(n *html.Node, info *source.Info, readme *internal.Readme) bool { changed := false - if n.Type == html.ElementNode && n.DataAtom == atom.Img { + if n.Type == html.ElementNode && (n.DataAtom == atom.Img || n.DataAtom == atom.Source) { var attrs []html.Attribute for _, a := range n.Attr { - if a.Key == "src" { + if a.Key == "src" || a.Key == "srcset" { if v := translateLink(a.Val, info, true, readme); v != "" { a.Val = v changed = true diff --git a/internal/frontend/readme_test.go b/internal/frontend/readme_test.go index 60591ed9a..0f6d89b1e 100644 --- a/internal/frontend/readme_test.go +++ b/internal/frontend/readme_test.go @@ -479,6 +479,42 @@ func TestReadme(t *testing.T) { wantHTML: `

alt

`, wantOutline: nil, }, + { + name: "picture and source elements with escaped image source", + unit: unit, + readme: &internal.Readme{ + Filepath: "README.md", + Contents: `` + "\n" + + `` + "\n" + + `` + "\n" + + `GoVector Logo` + "\n" + + ``, + }, + wantHTML: `` + "\n" + + `` + "\n" + + `` + "\n" + + `GoVector Logo` + "\n" + + ``, + wantOutline: nil, + }, + { + name: "picture and source elements with invalid media query", + unit: unit, + readme: &internal.Readme{ + Filepath: "README.md", + Contents: `` + "\n" + + `` + "\n" + + `` + "\n" + + `GoVector Logo` + "\n" + + ``, + }, + wantHTML: `` + "\n" + + `` + "\n" + + `` + "\n" + + `GoVector Logo` + "\n" + + ``, + wantOutline: nil, + }, } { t.Run(test.name, func(t *testing.T) { test.unit.Readme = test.readme diff --git a/internal/sanitizer/sanitizer.go b/internal/sanitizer/sanitizer.go index 5b3df2350..4d2513f8b 100644 --- a/internal/sanitizer/sanitizer.go +++ b/internal/sanitizer/sanitizer.go @@ -225,6 +225,7 @@ var allowElems = []string{ "mark", "ol", "p", + "picture", "pre", "q", "rp", @@ -234,6 +235,7 @@ var allowElems = []string{ "samp", "section", "small", + "source", "span", "strike", "strong", @@ -292,6 +294,8 @@ var allowAttrs = []allowAttr{ {"p", "width", flexiblewidth}, // pkgsite allows all values {"q", "cite", validURL}, {"time", "datetime", iso8601}, + {"source", "media", mediaQuery}, + {"source", "srcset", validURL}, {"ol", "type", re(`(?i)^(circle|disc|square|a|A|i|I|1)$`)}, {"ul", "type", re(`(?i)^(circle|disc|square|a|A|i|I|1)$`)}, {"li", "type", re(`(?i)^(circle|disc|square|a|A|i|I|1)$`)}, @@ -352,8 +356,9 @@ var allowAttrs = []allowAttr{ // roundtripAttrs is a map from attribute keys which should be checked // against roundtripURL to maps of tags which are allowed to have them. var roundtripAttrs = map[string]map[string]bool{ - "src": {"img": true}, - "href": {"a": true}, + "src": {"img": true}, + "srcset": {"source": true}, + "href": {"a": true}, "cite": { "blockquote": true, "del": true, @@ -385,6 +390,8 @@ var integer = re(`^[0-9]+$`) var iso8601 = re(`^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` + `?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`) +var mediaQuery = re(`^[\(\)\s\w\-:./]+$`) + func re(rx string) func(string) bool { return regexp.MustCompile(rx).MatchString } diff --git a/internal/sanitizer/sanitizer_test.go b/internal/sanitizer/sanitizer_test.go index aa6acba6f..bfbad38b1 100644 --- a/internal/sanitizer/sanitizer_test.go +++ b/internal/sanitizer/sanitizer_test.go @@ -146,6 +146,18 @@ func TestSanitizeBytes(t *testing.T) { }, {`

hello

middle

goodbye`, `

hello

middle

goodbye`}, + { + ` + + + Logo + `, + ` + + + Logo + `, + }, } for _, tc := range testCases {