|
1 |
| -use crate::web::{ |
2 |
| - page::templates::{Body, Head, Vendored}, |
3 |
| - rustdoc::RustdocPage, |
| 1 | +use crate::{ |
| 2 | + InstanceMetrics, |
| 3 | + web::{ |
| 4 | + page::{ |
| 5 | + TemplateData, |
| 6 | + templates::{Body, Head, Vendored}, |
| 7 | + }, |
| 8 | + rustdoc::RustdocPage, |
| 9 | + }, |
4 | 10 | };
|
5 | 11 | use askama::Template;
|
| 12 | +use async_stream::stream; |
| 13 | +use axum::body::Bytes; |
| 14 | +use futures_util::{Stream, StreamExt as _}; |
6 | 15 | use lol_html::{element, errors::RewritingError};
|
| 16 | +use std::sync::Arc; |
| 17 | +use tokio::{io::AsyncRead, task::JoinHandle}; |
| 18 | +use tokio_util::io::ReaderStream; |
| 19 | +use tracing::error; |
| 20 | + |
| 21 | +#[derive(thiserror::Error, Debug)] |
| 22 | +pub(crate) enum RustdocRewritingError { |
| 23 | + #[error("HTML rewriter error: {0}")] |
| 24 | + RewritingError(#[from] lol_html::errors::RewritingError), |
| 25 | + #[error("generic error while rewriting rustdoc HTML: {0}")] |
| 26 | + Other(#[from] anyhow::Error), |
| 27 | +} |
7 | 28 |
|
8 | 29 | /// Rewrite a rustdoc page to have the docs.rs topbar
|
9 | 30 | ///
|
10 | 31 | /// Given a rustdoc HTML page and a context to serialize it with,
|
11 | 32 | /// render the `rustdoc/` templates with the `html`.
|
12 | 33 | /// The output is an HTML page which has not yet been UTF-8 validated.
|
13 | 34 | /// In practice, the output should always be valid UTF-8.
|
14 |
| -pub(crate) fn rewrite_lol( |
15 |
| - html: &[u8], |
| 35 | +pub(crate) fn rewrite_rustdoc_html_stream<R>( |
| 36 | + template_data: Arc<TemplateData>, |
| 37 | + mut reader: R, |
16 | 38 | max_allowed_memory_usage: usize,
|
17 |
| - data: &RustdocPage, |
18 |
| -) -> Result<Vec<u8>, RewritingError> { |
19 |
| - use lol_html::html_content::{ContentType, Element}; |
20 |
| - use lol_html::{HtmlRewriter, MemorySettings, Settings}; |
21 |
| - |
22 |
| - let head_html = Head::new(data).render().unwrap(); |
23 |
| - let vendored_html = Vendored.render().unwrap(); |
24 |
| - let body_html = Body.render().unwrap(); |
25 |
| - let topbar_html = data.render().unwrap(); |
26 |
| - |
27 |
| - // Before: <body> ... rustdoc content ... </body> |
28 |
| - // After: |
29 |
| - // ```html |
30 |
| - // <div id="rustdoc_body_wrapper" class="{{ rustdoc_body_class }}" tabindex="-1"> |
31 |
| - // ... rustdoc content ... |
32 |
| - // </div> |
33 |
| - // ``` |
34 |
| - let body_handler = |rustdoc_body_class: &mut Element| { |
35 |
| - // Add the `rustdoc` classes to the html body |
36 |
| - let mut tmp; |
37 |
| - let klass = if let Some(classes) = rustdoc_body_class.get_attribute("class") { |
38 |
| - tmp = classes; |
39 |
| - tmp.push_str(" container-rustdoc"); |
40 |
| - &tmp |
41 |
| - } else { |
42 |
| - "container-rustdoc" |
43 |
| - }; |
44 |
| - rustdoc_body_class.set_attribute("class", klass)?; |
45 |
| - rustdoc_body_class.set_attribute("id", "rustdoc_body_wrapper")?; |
46 |
| - rustdoc_body_class.set_attribute("tabindex", "-1")?; |
47 |
| - // Change the `body` to a `div` |
48 |
| - rustdoc_body_class.set_tag_name("div")?; |
49 |
| - // Prepend the askama content |
50 |
| - rustdoc_body_class.prepend(&body_html, ContentType::Html); |
51 |
| - // Wrap the transformed body and topbar into a <body> element |
52 |
| - rustdoc_body_class.before(r#"<body class="rustdoc-page">"#, ContentType::Html); |
53 |
| - // Insert the topbar outside of the rustdoc div |
54 |
| - rustdoc_body_class.before(&topbar_html, ContentType::Html); |
55 |
| - // Finalize body with </body> |
56 |
| - rustdoc_body_class.after("</body>", ContentType::Html); |
57 |
| - |
58 |
| - Ok(()) |
59 |
| - }; |
60 |
| - |
61 |
| - let settings = Settings { |
62 |
| - element_content_handlers: vec![ |
63 |
| - // Append `style.css` stylesheet after all head elements. |
64 |
| - element!("head", |head: &mut Element| { |
65 |
| - head.append(&head_html, ContentType::Html); |
66 |
| - Ok(()) |
67 |
| - }), |
68 |
| - element!("body", body_handler), |
69 |
| - // Append `vendored.css` before `rustdoc.css`, so that the duplicate copy of |
70 |
| - // `normalize.css` will be overridden by the later version. |
71 |
| - // |
72 |
| - // Later rustdoc has `#mainThemeStyle` that could be used, but pre-2018 docs |
73 |
| - // don't have this: |
74 |
| - // |
75 |
| - // https://github.com/rust-lang/rust/commit/003b2bc1c65251ec2fc80b78ed91c43fb35402ec |
76 |
| - // |
77 |
| - // Pre-2018 rustdoc also didn't have the resource suffix, but docs.rs was using a fork |
78 |
| - // that had implemented it already then, so we can assume the css files are |
79 |
| - // `<some path>/rustdoc-<some suffix>.css` and use the `-` to distinguish from the |
80 |
| - // `rustdoc.static` path. |
81 |
| - element!( |
82 |
| - "link[rel='stylesheet'][href*='rustdoc-']", |
83 |
| - |rustdoc_css: &mut Element| { |
84 |
| - rustdoc_css.before(&vendored_html, ContentType::Html); |
| 39 | + data: Arc<RustdocPage>, |
| 40 | + metrics: Arc<InstanceMetrics>, |
| 41 | +) -> impl Stream<Item = Result<Bytes, RustdocRewritingError>> |
| 42 | +where |
| 43 | + R: AsyncRead + Unpin + 'static, |
| 44 | +{ |
| 45 | + stream!({ |
| 46 | + let (input_sender, input_receiver) = std::sync::mpsc::channel::<Option<Vec<u8>>>(); |
| 47 | + let (result_sender, mut result_receiver) = tokio::sync::mpsc::unbounded_channel::<Bytes>(); |
| 48 | + |
| 49 | + let join_handle: JoinHandle<anyhow::Result<_>> = tokio::spawn(async move { |
| 50 | + // we're using the rendering threadpool to limit CPU usage on the server, and to |
| 51 | + // offload potentially CPU intensive stuff from the tokio runtime. |
| 52 | + // Also this lets us limit the threadpool size and through that the CPU usage. |
| 53 | + template_data |
| 54 | + .render_in_threadpool(move || { |
| 55 | + use lol_html::html_content::{ContentType, Element}; |
| 56 | + use lol_html::{HtmlRewriter, MemorySettings, Settings}; |
| 57 | + |
| 58 | + let head_html = Head::new(&data).render().unwrap(); |
| 59 | + let vendored_html = Vendored.render().unwrap(); |
| 60 | + let body_html = Body.render().unwrap(); |
| 61 | + let topbar_html = data.render().unwrap(); |
| 62 | + |
| 63 | + // Before: <body> ... rustdoc content ... </body> |
| 64 | + // After: |
| 65 | + // ```html |
| 66 | + // <div id="rustdoc_body_wrapper" class="{{ rustdoc_body_class }}" tabindex="-1"> |
| 67 | + // ... rustdoc content ... |
| 68 | + // </div> |
| 69 | + // ``` |
| 70 | + let body_handler = |rustdoc_body_class: &mut Element| { |
| 71 | + // Add the `rustdoc` classes to the html body |
| 72 | + let mut tmp; |
| 73 | + let klass = if let Some(classes) = rustdoc_body_class.get_attribute("class") |
| 74 | + { |
| 75 | + tmp = classes; |
| 76 | + tmp.push_str(" container-rustdoc"); |
| 77 | + &tmp |
| 78 | + } else { |
| 79 | + "container-rustdoc" |
| 80 | + }; |
| 81 | + rustdoc_body_class.set_attribute("class", klass)?; |
| 82 | + rustdoc_body_class.set_attribute("id", "rustdoc_body_wrapper")?; |
| 83 | + rustdoc_body_class.set_attribute("tabindex", "-1")?; |
| 84 | + // Change the `body` to a `div` |
| 85 | + rustdoc_body_class.set_tag_name("div")?; |
| 86 | + // Prepend the askama content |
| 87 | + rustdoc_body_class.prepend(&body_html, ContentType::Html); |
| 88 | + // Wrap the transformed body and topbar into a <body> element |
| 89 | + rustdoc_body_class |
| 90 | + .before(r#"<body class="rustdoc-page">"#, ContentType::Html); |
| 91 | + // Insert the topbar outside of the rustdoc div |
| 92 | + rustdoc_body_class.before(&topbar_html, ContentType::Html); |
| 93 | + // Finalize body with </body> |
| 94 | + rustdoc_body_class.after("</body>", ContentType::Html); |
| 95 | + |
| 96 | + Ok(()) |
| 97 | + }; |
| 98 | + |
| 99 | + let settings = Settings { |
| 100 | + element_content_handlers: vec![ |
| 101 | + // Append `style.css` stylesheet after all head elements. |
| 102 | + element!("head", |head: &mut Element| { |
| 103 | + head.append(&head_html, ContentType::Html); |
| 104 | + Ok(()) |
| 105 | + }), |
| 106 | + element!("body", body_handler), |
| 107 | + // Append `vendored.css` before `rustdoc.css`, so that the duplicate copy of |
| 108 | + // `normalize.css` will be overridden by the later version. |
| 109 | + // |
| 110 | + // Later rustdoc has `#mainThemeStyle` that could be used, but pre-2018 docs |
| 111 | + // don't have this: |
| 112 | + // |
| 113 | + // https://github.com/rust-lang/rust/commit/003b2bc1c65251ec2fc80b78ed91c43fb35402ec |
| 114 | + // |
| 115 | + // Pre-2018 rustdoc also didn't have the resource suffix, but docs.rs was using a fork |
| 116 | + // that had implemented it already then, so we can assume the css files are |
| 117 | + // `<some path>/rustdoc-<some suffix>.css` and use the `-` to distinguish from the |
| 118 | + // `rustdoc.static` path. |
| 119 | + element!( |
| 120 | + "link[rel='stylesheet'][href*='rustdoc-']", |
| 121 | + move |rustdoc_css: &mut Element| { |
| 122 | + rustdoc_css.before(&vendored_html, ContentType::Html); |
| 123 | + Ok(()) |
| 124 | + } |
| 125 | + ), |
| 126 | + ], |
| 127 | + memory_settings: MemorySettings { |
| 128 | + max_allowed_memory_usage, |
| 129 | + ..MemorySettings::default() |
| 130 | + }, |
| 131 | + ..Settings::default() |
| 132 | + }; |
| 133 | + |
| 134 | + let mut rewriter = HtmlRewriter::new(settings, move |chunk: &[u8]| { |
| 135 | + // send the result back to the main rewriter when its coming in. |
| 136 | + // this can fail only when the receiver is dropped, in which case |
| 137 | + // we exit this thread anyways. |
| 138 | + let _ = result_sender.send(Bytes::from(chunk.to_vec())); |
| 139 | + }); |
| 140 | + while let Some(chunk) = input_receiver.recv()? { |
| 141 | + // receive data from the input receiver. |
| 142 | + // `input_receiver` is a non-async one. |
| 143 | + // Since we're in a normal background thread, we can use the blocking `.recv` |
| 144 | + // here. |
| 145 | + // We will get `None` when the reader is done reading, |
| 146 | + // so that's our signal to exit this loop and call `rewriter.end()` below. |
| 147 | + rewriter.write(&chunk)?; |
| 148 | + } |
| 149 | + // finalize everything. Will trigger the output sink (and through that, |
| 150 | + // sending data to the `result_sender`). |
| 151 | + rewriter.end()?; |
85 | 152 | Ok(())
|
86 |
| - } |
87 |
| - ), |
88 |
| - ], |
89 |
| - memory_settings: MemorySettings { |
90 |
| - max_allowed_memory_usage, |
91 |
| - ..MemorySettings::default() |
92 |
| - }, |
93 |
| - ..Settings::default() |
94 |
| - }; |
| 153 | + }) |
| 154 | + .await?; |
| 155 | + Ok(()) |
| 156 | + }); |
| 157 | + |
| 158 | + let mut reader_stream = ReaderStream::new(&mut reader); |
| 159 | + while let Some(chunk) = reader_stream.next().await { |
| 160 | + let chunk = chunk.map_err(|err| { |
| 161 | + error!(?err, "error while reading from rustdoc HTML reader"); |
| 162 | + RustdocRewritingError::Other(err.into()) |
| 163 | + })?; |
95 | 164 |
|
96 |
| - // The input and output are always strings, we just use `&[u8]` so we only have to validate once. |
97 |
| - let mut buffer = Vec::new(); |
98 |
| - // TODO: Make the rewriter persistent? |
99 |
| - let mut writer = HtmlRewriter::new(settings, |bytes: &[u8]| { |
100 |
| - buffer.extend_from_slice(bytes); |
101 |
| - }); |
| 165 | + if let Err(err) = input_sender.send(Some(chunk.to_vec())) { |
| 166 | + error!( |
| 167 | + ?err, |
| 168 | + "error when trying to send chunk to html rewriter thread" |
| 169 | + ); |
| 170 | + yield Err(RustdocRewritingError::Other(err.into())); |
| 171 | + break; |
| 172 | + } |
102 | 173 |
|
103 |
| - writer.write(html)?; |
104 |
| - writer.end()?; |
| 174 | + while let Ok(bytes) = result_receiver.try_recv() { |
| 175 | + yield Ok(bytes); |
| 176 | + } |
| 177 | + } |
| 178 | + // This signals the renderer thread to finalize & exit. |
| 179 | + if let Err(err) = input_sender.send(None) { |
| 180 | + error!( |
| 181 | + ?err, |
| 182 | + "error when trying to send end signal to html rewriter thread" |
| 183 | + ); |
| 184 | + yield Err(RustdocRewritingError::Other(err.into())); |
| 185 | + } |
| 186 | + while let Some(bytes) = result_receiver.recv().await { |
| 187 | + yield Ok(bytes); |
| 188 | + } |
105 | 189 |
|
106 |
| - Ok(buffer) |
| 190 | + join_handle.await.expect("Task panicked").map_err(|e| { |
| 191 | + error!( |
| 192 | + ?e, |
| 193 | + memory_limit = max_allowed_memory_usage, |
| 194 | + "error while rewriting rustdoc HTML" |
| 195 | + ); |
| 196 | + // our `render_in_threadpool` and so the async tokio task return an `anyhow::Result`. |
| 197 | + // In most cases this will be an error from the `HtmlRewriter`, which we'll get as a |
| 198 | + // `RewritingError` which we extract here again. The other cases remain an |
| 199 | + // `anyhow::Error`. |
| 200 | + match e.downcast::<RewritingError>() { |
| 201 | + Ok(e) => { |
| 202 | + if matches!(e, RewritingError::MemoryLimitExceeded(_)) { |
| 203 | + metrics.html_rewrite_ooms.inc(); |
| 204 | + } |
| 205 | + RustdocRewritingError::RewritingError(e) |
| 206 | + } |
| 207 | + Err(e) => RustdocRewritingError::Other(e), |
| 208 | + } |
| 209 | + })?; |
| 210 | + }) |
107 | 211 | }
|
108 | 212 |
|
109 | 213 | #[cfg(test)]
|
|
0 commit comments