diff --git a/Cargo.lock b/Cargo.lock index 0c7f2357..511b6581 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,12 +23,55 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +dependencies = [ + "anstyle", + "windows-sys 0.59.0", +] + [[package]] name = "arrayvec" version = "0.7.6" @@ -177,6 +220,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] @@ -185,8 +229,22 @@ version = "4.5.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" dependencies = [ + "anstream", "anstyle", "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -195,12 +253,27 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +[[package]] +name = "color" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "212bb4103d3dc3eca9f7b665588528dee3a42fc03272b2db5ffa3010dc84b39c" +dependencies = [ + "serde", +] + [[package]] name = "color_quant" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + [[package]] name = "console_error_panic_hook" version = "0.1.7" @@ -318,6 +391,16 @@ dependencies = [ "libm", ] +[[package]] +name = "cpu-sparse" +version = "0.1.0" +dependencies = [ + "flatten", + "piet-next", + "png", + "roxmltree 0.20.0", +] + [[package]] name = "crc32fast" version = "1.4.2" @@ -473,6 +556,17 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "flatten" +version = "0.1.0" +source = "git+https://github.com/linebender/gpu-stroke-expansion-paper?rev=827ccf6#827ccf6766179340a83f2de3417b7bdd8743706a" +dependencies = [ + "arrayvec", + "clap", + "kurbo", + "roxmltree 0.19.0", +] + [[package]] name = "float-ord" version = "0.3.2" @@ -790,6 +884,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.10.5" @@ -1014,6 +1114,17 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "peniko" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de2e49a1a6b7a55ec3ba866a423f46cd8f31472bfaabe42c68e144c27bc668" +dependencies = [ + "color", + "kurbo", + "smallvec", +] + [[package]] name = "pico-args" version = "0.5.0" @@ -1096,6 +1207,13 @@ dependencies = [ "wio", ] +[[package]] +name = "piet-next" +version = "0.1.0" +dependencies = [ + "peniko", +] + [[package]] name = "piet-svg" version = "0.7.0" @@ -1334,6 +1452,18 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "roxmltree" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cd14fd5e3b777a7422cca79358c57a8f6e3a703d9ac187448d0daf220c2407f" + +[[package]] +name = "roxmltree" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" + [[package]] name = "rustc_version" version = "0.4.1" @@ -1390,18 +1520,18 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.213" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" +checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.213" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" +checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" dependencies = [ "proc-macro2", "quote", @@ -1465,6 +1595,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "svg" version = "0.18.0" @@ -1689,6 +1825,12 @@ version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14706d2a800ee8ff38c1d3edb873cd616971ea59eb7c0d046bb44ef59b06a1ae" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "version-compare" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index d0b0b4e9..3cb6e6d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ members = [ "piet-web", "piet-web/examples/basic", "piet-svg" -] +, "piet-next", "cpu-sparse"] default-members = [ "piet", diff --git a/cpu-sparse/Cargo.toml b/cpu-sparse/Cargo.toml new file mode 100644 index 00000000..dbd58ce7 --- /dev/null +++ b/cpu-sparse/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "cpu-sparse" +version = "0.1.0" +authors = ["Raph Levien "] +description = "An experimental CPU 2D renderer based on sparse strips" +license = "Apache-2.0 OR MIT" +edition = "2021" +keywords = ["graphics", "2d"] +categories = ["graphics"] + +[dependencies] +piet-next = { path = "../piet-next" } +flatten = { git = "https://github.com/linebender/gpu-stroke-expansion-paper", rev = "827ccf6" } + +[dev-dependencies] +png = "0.17.14" +roxmltree = "0.20.0" diff --git a/cpu-sparse/examples/simple.rs b/cpu-sparse/examples/simple.rs new file mode 100644 index 00000000..52ae91f5 --- /dev/null +++ b/cpu-sparse/examples/simple.rs @@ -0,0 +1,38 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use std::io::BufWriter; + +use cpu_sparse::{CsRenderCtx, Pixmap}; +use piet_next::peniko::color::palette; +use piet_next::peniko::kurbo::{BezPath, Stroke}; +use piet_next::RenderCtx; + +const WIDTH: usize = 1024; +const HEIGHT: usize = 256; + +pub fn main() { + let mut ctx = CsRenderCtx::new(WIDTH, HEIGHT); + let mut path = BezPath::new(); + path.move_to((10.0, 10.0)); + path.line_to((180.0, 20.0)); + path.line_to((30.0, 40.0)); + path.close_path(); + let piet_path = path.into(); + ctx.fill(&piet_path, palette::css::REBECCA_PURPLE.into()); + let stroke = Stroke::new(5.0); + ctx.stroke(&piet_path, &stroke, palette::css::DARK_BLUE.into()); + if let Some(filename) = std::env::args().nth(1) { + let mut pixmap = Pixmap::new(WIDTH, HEIGHT); + ctx.render_to_pixmap(&mut pixmap); + pixmap.unpremultiply(); + let file = std::fs::File::create(filename).unwrap(); + let w = BufWriter::new(file); + let mut encoder = png::Encoder::new(w, WIDTH as u32, HEIGHT as u32); + encoder.set_color(png::ColorType::Rgba); + let mut writer = encoder.write_header().unwrap(); + writer.write_image_data(pixmap.data()).unwrap(); + } else { + ctx.debug_dump(); + } +} diff --git a/cpu-sparse/examples/svg.rs b/cpu-sparse/examples/svg.rs new file mode 100644 index 00000000..b9ddc917 --- /dev/null +++ b/cpu-sparse/examples/svg.rs @@ -0,0 +1,337 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use std::io::BufWriter; +use std::str::FromStr; + +use cpu_sparse::{CsRenderCtx, Pixmap}; +use piet_next::peniko::color::palette; +use piet_next::peniko::kurbo::{Affine, BezPath, Point, Size, Stroke, Vec2}; +use piet_next::peniko::Color; +use piet_next::RenderCtx; +use roxmltree::{Document, Node}; + +const WIDTH: usize = 1024; +const HEIGHT: usize = 1024; + +pub fn main() { + let mut ctx = CsRenderCtx::new(WIDTH, HEIGHT); + let mut args = std::env::args().skip(1); + let svg_filename = args.next().expect("svg filename is first arg"); + let out_filename = args.next().expect("png out filename is second arg"); + + let svg = std::fs::read_to_string(svg_filename).expect("error reading file"); + let parsed = PicoSvg::load(&svg, 1.0).expect("error parsing SVG"); + let mut pixmap = Pixmap::new(WIDTH, HEIGHT); + // Hacky code for crude measurements; change this to arg parsing + for i in 0..200 { + ctx.reset(); + let start = std::time::Instant::now(); + render_svg(&mut ctx, &parsed.items); + let coarse_time = start.elapsed(); + ctx.render_to_pixmap(&mut pixmap); + if i % 100 == 0 { + println!( + "time to coarse: {coarse_time:?}, time to fine: {:?}", + start.elapsed() + ); + } + } + pixmap.unpremultiply(); + let file = std::fs::File::create(out_filename).unwrap(); + let w = BufWriter::new(file); + let mut encoder = png::Encoder::new(w, WIDTH as u32, HEIGHT as u32); + encoder.set_color(png::ColorType::Rgba); + let mut writer = encoder.write_header().unwrap(); + writer.write_image_data(pixmap.data()).unwrap(); +} + +fn render_svg(ctx: &mut impl RenderCtx, items: &[Item]) { + for item in items { + match item { + Item::Fill(fill_item) => ctx.fill(&fill_item.path, fill_item.color.into()), + Item::Stroke(stroke_item) => { + let style = Stroke::new(stroke_item.width); + ctx.stroke(&stroke_item.path, &style, stroke_item.color.into()); + } + Item::Group(group_item) => { + // TODO: apply transform from group + render_svg(ctx, &group_item.children); + } + } + } +} + +// Below is copied, lightly adapted, from Vello. + +pub struct PicoSvg { + pub items: Vec, + #[allow(unused)] + pub size: Size, +} + +pub enum Item { + Fill(FillItem), + Stroke(StrokeItem), + Group(GroupItem), +} + +pub struct StrokeItem { + pub width: f64, + pub color: Color, + pub path: piet_next::Path, +} + +pub struct FillItem { + pub color: Color, + pub path: piet_next::Path, +} + +pub struct GroupItem { + #[allow(unused)] + pub affine: Affine, + pub children: Vec, +} + +struct Parser { + scale: f64, +} + +impl PicoSvg { + pub fn load(xml_string: &str, scale: f64) -> Result> { + let doc = Document::parse(xml_string)?; + let root = doc.root_element(); + let mut parser = Parser::new(scale); + let width = root.attribute("width").and_then(|s| f64::from_str(s).ok()); + let height = root.attribute("height").and_then(|s| f64::from_str(s).ok()); + let (origin, viewbox_size) = root + .attribute("viewBox") + .and_then(|vb_attr| { + let vs: Vec = vb_attr + .split(' ') + .map(|s| f64::from_str(s).unwrap()) + .collect(); + if let &[x, y, width, height] = vs.as_slice() { + Some((Point { x, y }, Size { width, height })) + } else { + None + } + }) + .unzip(); + + let mut transform = if let Some(origin) = origin { + Affine::translate(origin.to_vec2() * -1.0) + } else { + Affine::IDENTITY + }; + + transform *= match (width, height, viewbox_size) { + (None, None, Some(_)) => Affine::IDENTITY, + (Some(w), Some(h), Some(s)) => { + Affine::scale_non_uniform(1.0 / s.width * w, 1.0 / s.height * h) + } + (Some(w), None, Some(s)) => Affine::scale(1.0 / s.width * w), + (None, Some(h), Some(s)) => Affine::scale(1.0 / s.height * h), + _ => Affine::IDENTITY, + }; + + let size = match (width, height, viewbox_size) { + (None, None, Some(s)) => s, + (mw, mh, None) => Size { + width: mw.unwrap_or(300_f64), + height: mh.unwrap_or(150_f64), + }, + (Some(w), None, Some(s)) => Size { + width: w, + height: 1.0 / w * s.width * s.height, + }, + (None, Some(h), Some(s)) => Size { + width: 1.0 / h * s.height * s.width, + height: h, + }, + (Some(width), Some(height), Some(_)) => Size { width, height }, + }; + + transform *= if scale >= 0.0 { + Affine::scale(scale) + } else { + Affine::new([-scale, 0.0, 0.0, scale, 0.0, 0.0]) + }; + let props = RecursiveProperties { + fill: Some(Color::BLACK), + }; + // The root element is the svg document element, which we don't care about + let mut items = Vec::new(); + for node in root.children() { + parser.rec_parse(node, &props, &mut items)?; + } + let root_group = Item::Group(GroupItem { + affine: transform, + children: items, + }); + Ok(PicoSvg { + items: vec![root_group], + size, + }) + } +} + +#[derive(Clone)] +struct RecursiveProperties { + fill: Option, +} + +impl Parser { + fn new(scale: f64) -> Parser { + Parser { scale } + } + + fn rec_parse( + &mut self, + node: Node, + properties: &RecursiveProperties, + items: &mut Vec, + ) -> Result<(), Box> { + if node.is_element() { + let mut properties = properties.clone(); + if let Some(fill_color) = node.attribute("fill") { + if fill_color == "none" { + properties.fill = None; + } else { + let color = parse_color(fill_color); + let color = modify_opacity(color, "fill-opacity", node); + // TODO: Handle recursive opacity properly + let color = modify_opacity(color, "opacity", node); + properties.fill = Some(color); + } + } + match node.tag_name().name() { + "g" => { + let mut children = Vec::new(); + let mut affine = Affine::default(); + if let Some(transform) = node.attribute("transform") { + affine = parse_transform(transform); + } + for child in node.children() { + self.rec_parse(child, &properties, &mut children)?; + } + items.push(Item::Group(GroupItem { affine, children })); + } + "path" => { + let d = node.attribute("d").ok_or("missing 'd' attribute")?; + let bp = BezPath::from_svg(d)?; + let path: piet_next::Path = bp.into(); + if let Some(color) = properties.fill { + items.push(Item::Fill(FillItem { + color, + path: path.clone(), + })); + } + if let Some(stroke_color) = node.attribute("stroke") { + if stroke_color != "none" { + let width = node + .attribute("stroke-width") + .map(|a| f64::from_str(a).unwrap_or(1.0)) + .unwrap_or(1.0) + * self.scale.abs(); + let color = parse_color(stroke_color); + let color = modify_opacity(color, "stroke-opacity", node); + // TODO: Handle recursive opacity properly + let color = modify_opacity(color, "opacity", node); + items.push(Item::Stroke(StrokeItem { width, color, path })); + } + } + } + other => eprintln!("Unhandled node type {other}"), + } + } + Ok(()) + } +} + +fn parse_transform(transform: &str) -> Affine { + let mut nt = Affine::IDENTITY; + for ts in transform.split(')').map(str::trim) { + nt *= if let Some(s) = ts.strip_prefix("matrix(") { + let vals = s + .split([',', ' ']) + .map(str::parse) + .collect::, _>>() + .expect("Could parse all values of 'matrix' as floats"); + Affine::new( + vals.try_into() + .expect("Should be six arguments to `matrix`"), + ) + } else if let Some(s) = ts.strip_prefix("translate(") { + if let Ok(vals) = s + .split([',', ' ']) + .map(str::trim) + .map(str::parse) + .collect::, _>>() + { + match vals.as_slice() { + &[x, y] => Affine::translate(Vec2 { x, y }), + _ => Affine::IDENTITY, + } + } else { + Affine::IDENTITY + } + } else if let Some(s) = ts.strip_prefix("scale(") { + if let Ok(vals) = s + .split([',', ' ']) + .map(str::trim) + .map(str::parse) + .collect::, _>>() + { + match *vals.as_slice() { + [x, y] => Affine::scale_non_uniform(x, y), + [x] => Affine::scale(x), + _ => Affine::IDENTITY, + } + } else { + Affine::IDENTITY + } + } else if let Some(s) = ts.strip_prefix("scaleX(") { + s.trim() + .parse() + .ok() + .map(|x| Affine::scale_non_uniform(x, 1.0)) + .unwrap_or(Affine::IDENTITY) + } else if let Some(s) = ts.strip_prefix("scaleY(") { + s.trim() + .parse() + .ok() + .map(|y| Affine::scale_non_uniform(1.0, y)) + .unwrap_or(Affine::IDENTITY) + } else { + if !ts.is_empty() { + eprintln!("Did not understand transform attribute {ts:?})"); + } + Affine::IDENTITY + }; + } + nt +} + +fn parse_color(color: &str) -> Color { + let color = color.trim(); + if let Ok(c) = piet_next::peniko::color::parse_color(color) { + c.to_alpha_color() + } else { + palette::css::MAGENTA.with_alpha(0.5) + } +} + +fn modify_opacity(color: Color, attr_name: &str, node: Node) -> Color { + if let Some(opacity) = node.attribute(attr_name) { + let alpha: f64 = if let Some(o) = opacity.strip_suffix('%') { + let pctg = o.parse().unwrap_or(100.0); + pctg * 0.01 + } else { + opacity.parse().unwrap_or(1.0) + }; + color.with_alpha(alpha as f32) + } else { + color + } +} diff --git a/cpu-sparse/src/fine.rs b/cpu-sparse/src/fine.rs new file mode 100644 index 00000000..43a97faa --- /dev/null +++ b/cpu-sparse/src/fine.rs @@ -0,0 +1,117 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Fine rasterization + +use crate::wide_tile::{Cmd, STRIP_HEIGHT, WIDE_TILE_WIDTH}; + +const STRIP_HEIGHT_F32: usize = STRIP_HEIGHT * 4; + +pub(crate) struct Fine<'a> { + pub(crate) width: usize, + pub(crate) height: usize, + // rgba pixels + pub(crate) out_buf: &'a mut [u8], + // f32 RGBA pixels + // That said, if we use u8, then this is basically a block of + // untyped memory. + pub(crate) scratch: [f32; WIDE_TILE_WIDTH * STRIP_HEIGHT * 4], + /// Whether to use SIMD + /// + /// This is useful to toggle for performance evaluation reasons. It also + /// *must* be false if runtime detection fails, otherwise we have safety + /// problems. This is important for x86_64, as we'll be targeting Haswell + /// as the minimum. + #[allow(unused)] + // The allow(unused) lint exception is because some platforms may not have + // a SIMD implementation, and thus won't check the field. + pub(crate) use_simd: bool, +} + +impl<'a> Fine<'a> { + pub(crate) fn new(width: usize, height: usize, out_buf: &'a mut [u8]) -> Self { + let scratch = [0.0; WIDE_TILE_WIDTH * STRIP_HEIGHT * 4]; + Self { + width, + height, + out_buf, + scratch, + use_simd: true, + } + } + + pub(crate) fn clear_scalar(&mut self, color: [f32; 4]) { + for z in self.scratch.chunks_exact_mut(4) { + z.copy_from_slice(&color); + } + } + + pub(crate) fn pack_scalar(&mut self, x: usize, y: usize) { + // Note that these can trigger if the method is called on a pixmap that + // is not an integral multiple of the tile. + assert!((x + 1) * WIDE_TILE_WIDTH <= self.width); + assert!((y + 1) * STRIP_HEIGHT <= self.height); + let base_ix = (y * STRIP_HEIGHT * self.width + x * WIDE_TILE_WIDTH) * 4; + for j in 0..STRIP_HEIGHT { + let line_ix = base_ix + j * self.width * 4; + for i in 0..WIDE_TILE_WIDTH { + let mut rgba_f32 = [0.0; 4]; + rgba_f32.copy_from_slice(&self.scratch[(i * STRIP_HEIGHT + j) * 4..][..4]); + let rgba_u8 = rgba_f32.map(|x| (x * 255.0).round() as u8); + self.out_buf[line_ix + i * 4..][..4].copy_from_slice(&rgba_u8); + } + } + } + + pub(crate) fn run_cmd(&mut self, cmd: &Cmd, alphas: &[u32]) { + match cmd { + Cmd::Fill(f) => { + self.fill(f.x as usize, f.width as usize, f.color.components); + } + Cmd::Strip(s) => { + let aslice = &alphas[s.alpha_ix..]; + self.strip(s.x as usize, s.width as usize, aslice, s.color.components); + } + } + } + + pub(crate) fn fill_scalar(&mut self, x: usize, width: usize, color: [f32; 4]) { + if color[3] == 1.0 { + for z in + self.scratch[x * STRIP_HEIGHT_F32..][..STRIP_HEIGHT_F32 * width].chunks_exact_mut(4) + { + z.copy_from_slice(&color); + } + } else { + let one_minus_alpha = 1.0 - color[3]; + for z in + self.scratch[x * STRIP_HEIGHT_F32..][..STRIP_HEIGHT_F32 * width].chunks_exact_mut(4) + { + for i in 0..4 { + //z[i] = color[i] + one_minus_alpha * z[i]; + // Note: the mul_add will perform poorly on x86_64 default cpu target + // Probably right thing to do is craft a #cfg that detects fma, fcma, etc. + // What we really want is fmuladdf32 from intrinsics! + z[i] = z[i].mul_add(one_minus_alpha, color[i]); + } + } + } + } + + pub(crate) fn strip_scalar(&mut self, x: usize, width: usize, alphas: &[u32], color: [f32; 4]) { + debug_assert!(alphas.len() >= width); + let cs = color.map(|x| x * (1.0 / 255.0)); + for (z, a) in self.scratch[x * STRIP_HEIGHT_F32..][..STRIP_HEIGHT_F32 * width] + .chunks_exact_mut(16) + .zip(alphas) + { + for j in 0..4 { + let mask_alpha = ((*a >> (j * 8)) & 0xff) as f32; + let one_minus_alpha = 1.0 - mask_alpha * cs[3]; + for i in 0..4 { + z[j * 4 + i] = z[j * 4 + i].mul_add(one_minus_alpha, mask_alpha * cs[i]); + } + } + } + } +} diff --git a/cpu-sparse/src/flatten.rs b/cpu-sparse/src/flatten.rs new file mode 100644 index 00000000..02234a71 --- /dev/null +++ b/cpu-sparse/src/flatten.rs @@ -0,0 +1,51 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Utilities for flattening + +use flatten::stroke::LoweredPath; +use piet_next::peniko::kurbo::{self, Affine, BezPath, Line, Point, Stroke}; + +use crate::tiling::FlatLine; + +/// The flattening tolerance +const TOL: f64 = 0.25; + +pub fn fill(path: &BezPath, affine: Affine, line_buf: &mut Vec) { + line_buf.clear(); + let mut start = Point::default(); + let mut p0 = Point::default(); + let iter = path.iter().map(|el| affine * el); + kurbo::flatten(iter, TOL, |el| match el { + kurbo::PathEl::MoveTo(p) => { + start = p; + p0 = p; + } + kurbo::PathEl::LineTo(p) => { + let pt0 = [p0.x as f32, p0.y as f32]; + let pt1 = [p.x as f32, p.y as f32]; + line_buf.push(FlatLine::new(pt0, pt1)); + p0 = p; + } + kurbo::PathEl::QuadTo(_, _) => unreachable!(), + kurbo::PathEl::CurveTo(_, _, _) => unreachable!(), + kurbo::PathEl::ClosePath => { + let pt0 = [p0.x as f32, p0.y as f32]; + let pt1 = [start.x as f32, start.y as f32]; + if pt0 != pt1 { + line_buf.push(FlatLine::new(pt0, pt1)); + } + } + }); +} + +pub fn stroke(path: &BezPath, style: &Stroke, affine: Affine, line_buf: &mut Vec) { + line_buf.clear(); + let iter = path.iter().map(|el| affine * el); + let lines: LoweredPath = flatten::stroke::stroke_undashed(iter, style, TOL); + for line in &lines.path { + let p0 = [line.p0.x as f32, line.p0.y as f32]; + let p1 = [line.p1.x as f32, line.p1.y as f32]; + line_buf.push(FlatLine::new(p0, p1)); + } +} diff --git a/cpu-sparse/src/lib.rs b/cpu-sparse/src/lib.rs new file mode 100644 index 00000000..33457f23 --- /dev/null +++ b/cpu-sparse/src/lib.rs @@ -0,0 +1,14 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +mod fine; +mod flatten; +mod pixmap; +mod render; +mod simd; +mod strip; +mod tiling; +mod wide_tile; + +pub use pixmap::Pixmap; +pub use render::CsRenderCtx; diff --git a/cpu-sparse/src/pixmap.rs b/cpu-sparse/src/pixmap.rs new file mode 100644 index 00000000..76c8f0e9 --- /dev/null +++ b/cpu-sparse/src/pixmap.rs @@ -0,0 +1,39 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A simple pixmap type + +pub struct Pixmap { + pub(crate) width: usize, + pub(crate) height: usize, + pub(crate) buf: Vec, +} + +impl Pixmap { + pub fn new(width: usize, height: usize) -> Self { + let buf = vec![0; width * height * 4]; + Self { width, height, buf } + } + + pub fn data(&self) -> &[u8] { + &self.buf + } + + pub fn data_mut(&mut self) -> &mut [u8] { + &mut self.buf + } + + /// Convert from premultiplied to separate alpha. + /// + /// Not fast, but useful for saving to PNG etc. + pub fn unpremultiply(&mut self) { + for rgba in self.buf.chunks_exact_mut(4) { + let alpha = rgba[3] as f32 * (1.0 / 255.0); + if alpha != 0.0 { + rgba[0] = (rgba[0] as f32 / alpha).round().min(255.0) as u8; + rgba[1] = (rgba[1] as f32 / alpha).round().min(255.0) as u8; + rgba[2] = (rgba[2] as f32 / alpha).round().min(255.0) as u8; + } + } + } +} diff --git a/cpu-sparse/src/render.rs b/cpu-sparse/src/render.rs new file mode 100644 index 00000000..46721d3f --- /dev/null +++ b/cpu-sparse/src/render.rs @@ -0,0 +1,282 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Lots of unused arguments from todo methods. Remove when all methods are implemented. +#![allow(unused)] + +use std::collections::BTreeMap; + +use piet_next::{ + peniko::{ + color::{palette, AlphaColor, Srgb}, + kurbo::Affine, + BrushRef, + }, + GenericRecorder, RenderCtx, ResourceCtx, +}; + +use crate::{ + fine::Fine, + strip::{self, Strip, Tile}, + tiling::{self, FlatLine}, + wide_tile::{Cmd, CmdStrip, WideTile, STRIP_HEIGHT, WIDE_TILE_WIDTH}, + Pixmap, +}; + +pub struct CsRenderCtx { + width: usize, + height: usize, + tiles: Vec, + alphas: Vec, + + /// These are all scratch buffers, to be used for path rendering. They're here solely + /// so the allocations can be reused. + line_buf: Vec, + tile_buf: Vec, + strip_buf: Vec, +} + +pub struct CsResourceCtx; + +impl CsRenderCtx { + pub fn new(width: usize, height: usize) -> Self { + let width_tiles = (width + WIDE_TILE_WIDTH - 1) / WIDE_TILE_WIDTH; + let height_tiles = (height + STRIP_HEIGHT - 1) / STRIP_HEIGHT; + let tiles = (0..width_tiles * height_tiles) + .map(|_| WideTile::default()) + .collect(); + let alphas = vec![]; + let line_buf = vec![]; + let tile_buf = vec![]; + let strip_buf = vec![]; + Self { + width, + height, + tiles, + alphas, + line_buf, + tile_buf, + strip_buf, + } + } + + pub fn reset(&mut self) { + for tile in &mut self.tiles { + tile.bg = AlphaColor::TRANSPARENT; + tile.cmds.clear(); + } + } + + pub fn render_to_pixmap(&self, pixmap: &mut Pixmap) { + let mut fine = Fine::new(pixmap.width, pixmap.height, &mut pixmap.buf); + let width_tiles = (self.width + WIDE_TILE_WIDTH - 1) / WIDE_TILE_WIDTH; + let height_tiles = (self.height + STRIP_HEIGHT - 1) / STRIP_HEIGHT; + for y in 0..height_tiles { + for x in 0..width_tiles { + let tile = &self.tiles[y * width_tiles + x]; + fine.clear(tile.bg.components); + for cmd in &tile.cmds { + fine.run_cmd(cmd, &self.alphas); + } + fine.pack(x, y); + } + } + } + + pub fn tile_stats(&self) { + let mut histo = BTreeMap::new(); + let mut total = 0; + for tile in &self.tiles { + let count = tile.cmds.len(); + total += count; + *histo.entry(count).or_insert(0) += 1; + } + println!("total = {total}, {histo:?}"); + } + + /// Render a path, which has already been flattened into `line_buf`. + fn render_path(&mut self, brush: BrushRef) { + // TODO: need to make sure tiles contained in viewport - we'll likely + // panic otherwise. + tiling::make_tiles(&self.line_buf, &mut self.tile_buf); + self.tile_buf.sort_by(Tile::cmp); + crate::simd::render_strips(&self.tile_buf, &mut self.strip_buf, &mut self.alphas); + let color = brush_to_color(brush); + let width_tiles = (self.width + WIDE_TILE_WIDTH - 1) / WIDE_TILE_WIDTH; + for i in 0..self.strip_buf.len() - 1 { + let strip = &self.strip_buf[i]; + let next_strip = &self.strip_buf[i + 1]; + let x0 = strip.x(); + let y = strip.strip_y(); + let row_start = y as usize * width_tiles; + let strip_width = next_strip.col - strip.col; + let x1 = x0 + strip_width; + let xtile0 = x0 as usize / WIDE_TILE_WIDTH; + let xtile1 = (x1 as usize + WIDE_TILE_WIDTH - 1) / WIDE_TILE_WIDTH; + let mut x = x0; + let mut col = strip.col; + for xtile in xtile0..xtile1 { + let x_tile_rel = x % WIDE_TILE_WIDTH as u32; + let width = x1.min(((xtile + 1) * WIDE_TILE_WIDTH) as u32) - x; + let cmd = CmdStrip { + x: x_tile_rel, + width, + alpha_ix: col as usize, + color, + }; + x += width; + col += width; + self.tiles[row_start + xtile].push(Cmd::Strip(cmd)); + } + if next_strip.winding != 0 && y == next_strip.strip_y() { + x = x1; + let x2 = next_strip.x(); + let fxt0 = x1 as usize / WIDE_TILE_WIDTH; + let fxt1 = (x2 as usize + WIDE_TILE_WIDTH - 1) / WIDE_TILE_WIDTH; + for xtile in fxt0..fxt1 { + let x_tile_rel = x % WIDE_TILE_WIDTH as u32; + let width = x2.min(((xtile + 1) * WIDE_TILE_WIDTH) as u32) - x; + x += width; + self.tiles[row_start + xtile].fill(x_tile_rel, width, color); + } + } + } + } + + pub fn debug_dump(&self) { + let width_tiles = (self.width + WIDE_TILE_WIDTH - 1) / WIDE_TILE_WIDTH; + for (i, tile) in self.tiles.iter().enumerate() { + if !tile.cmds.is_empty() || tile.bg.components[3] != 0.0 { + let x = i % width_tiles; + let y = i / width_tiles; + println!("tile {x}, {y} bg {}", tile.bg.to_rgba8()); + for cmd in &tile.cmds { + println!("{cmd:?}") + } + } + } + } + + fn get_affine(&self) -> Affine { + // TODO: get from graphics state + Affine::scale(5.0) + } +} + +impl RenderCtx for CsRenderCtx { + type Resource = CsResourceCtx; + + fn playback( + &mut self, + recording: &std::sync::Arc<::Recording>, + ) { + recording.play(self); + } + + fn fill(&mut self, path: &piet_next::Path, brush: BrushRef) { + let affine = self.get_affine(); + crate::flatten::fill(&path.path, affine, &mut self.line_buf); + self.render_path(brush); + } + + fn stroke( + &mut self, + path: &piet_next::Path, + stroke: &piet_next::peniko::kurbo::Stroke, + brush: BrushRef, + ) { + let affine = self.get_affine(); + crate::flatten::stroke(&path.path, stroke, affine, &mut self.line_buf); + self.render_path(brush); + } + + fn draw_image( + &mut self, + image: &::Image, + dst_rect: piet_next::peniko::kurbo::Rect, + interp: piet_next::InterpolationMode, + ) { + todo!() + } + + fn clip(&mut self, path: &piet_next::Path) { + todo!() + } + + fn save(&mut self) { + todo!() + } + + fn restore(&mut self) { + todo!() + } + + fn transform(&mut self, affine: piet_next::peniko::kurbo::Affine) { + todo!() + } + + fn begin_draw_glyphs(&mut self, font: &piet_next::peniko::Font) { + todo!() + } + + fn font_size(&mut self, size: f32) { + todo!() + } + + fn hint(&mut self, hint: bool) { + todo!() + } + + fn glyph_brush(&mut self, brush: BrushRef) { + todo!() + } + + fn draw_glyphs( + &mut self, + style: piet_next::peniko::StyleRef, + glyphs: &dyn Iterator, + ) { + todo!() + } + + fn end_draw_glyphs(&mut self) { + todo!() + } +} + +impl ResourceCtx for CsResourceCtx { + type Image = (); + + type Recording = GenericRecorder; + + type Record = GenericRecorder; + + fn record(&mut self) -> Self::Record { + GenericRecorder::new() + } + + fn make_image_with_stride( + &mut self, + width: usize, + height: usize, + stride: usize, + buf: &[u8], + format: piet_next::ImageFormat, + ) -> Result { + todo!() + } +} + +/// Get the color from the brush. +/// +/// This is a hacky function that will go away when we implement +/// other brushes. The general form is to match on whether it's a +/// solid color. If not, then issue a cmd to render the brush into +/// a brush buffer, then fill/strip as needed to composite into +/// the main buffer. +fn brush_to_color(brush: BrushRef) -> AlphaColor { + match brush { + BrushRef::Solid(c) => c, + _ => palette::css::MAGENTA, + } +} diff --git a/cpu-sparse/src/simd.rs b/cpu-sparse/src/simd.rs new file mode 100644 index 00000000..e9ef746e --- /dev/null +++ b/cpu-sparse/src/simd.rs @@ -0,0 +1,84 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! SIMD speedups + +use crate::{ + fine::Fine, + strip::{Strip, Tile}, +}; + +#[cfg(target_arch = "aarch64")] +mod neon; + +// This block is when we have SIMD +#[cfg(target_arch = "aarch64")] +impl<'a> Fine<'a> { + pub(crate) fn pack(&mut self, x: usize, y: usize) { + if self.use_simd { + self.pack_simd(x, y); + } else { + self.pack_scalar(x, y); + } + } + + pub(crate) fn clear(&mut self, color: [f32; 4]) { + if self.use_simd { + unsafe { + self.clear_simd(color); + } + } else { + self.clear_scalar(color); + } + } + + pub(crate) fn fill(&mut self, x: usize, width: usize, color: [f32; 4]) { + if self.use_simd { + unsafe { + self.fill_simd(x, width, color); + } + } else { + self.fill_scalar(x, width, color); + } + } + + pub(crate) fn strip(&mut self, x: usize, width: usize, alphas: &[u32], color: [f32; 4]) { + if self.use_simd { + unsafe { + self.strip_simd(x, width, alphas, color); + } + } else { + self.strip_scalar(x, width, alphas, color); + } + } +} + +#[cfg(target_arch = "aarch64")] +pub fn render_strips(tiles: &[Tile], strip_buf: &mut Vec, alpha_buf: &mut Vec) { + neon::render_strips_simd(tiles, strip_buf, alpha_buf); +} + +#[cfg(not(target_arch = "aarch64"))] +pub fn render_strips(tiles: &[Tile], strip_buf: &mut Vec, alpha_buf: &mut Vec) { + crate::strip::render_strips_scalar(tiles, strip_buf, alpha_buf); +} + +// This block is the fallback, no SIMD +#[cfg(not(target_arch = "aarch64"))] +impl<'a> Fine<'a> { + pub(crate) fn pack(&mut self, x: usize, y: usize) { + self.pack_scalar(x, y); + } + + pub(crate) fn clear(&mut self, color: [f32; 4]) { + self.clear_scalar(color); + } + + pub(crate) fn fill(&mut self, x: usize, y: usize, color: [f32; 4]) { + self.fill_scalar(x, y, color); + } + + pub(crate) fn strip(&mut self, x: usize, width: usize, alphas: &[u32], color: [f32; 4]) { + self.strip_scalar(x, width, alphas, color); + } +} diff --git a/cpu-sparse/src/simd/neon.rs b/cpu-sparse/src/simd/neon.rs new file mode 100644 index 00000000..c8ca78cc --- /dev/null +++ b/cpu-sparse/src/simd/neon.rs @@ -0,0 +1,213 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! SIMD speedups for Neon + +use core::arch::aarch64::*; + +use crate::{ + fine::Fine, + strip::{Strip, Tile}, + tiling::Vec2, + wide_tile::{STRIP_HEIGHT, WIDE_TILE_WIDTH}, +}; + +impl<'a> Fine<'a> { + pub unsafe fn clear_simd(&mut self, color: [f32; 4]) { + let v_color = vld1q_f32(color.as_ptr()); + let v_color_4 = float32x4x4_t(v_color, v_color, v_color, v_color); + for i in 0..WIDE_TILE_WIDTH { + vst1q_f32_x4(self.scratch.as_mut_ptr().add(i * 16), v_color_4); + } + } + + pub fn pack_simd(&mut self, x: usize, y: usize) { + unsafe fn cvt(v: float32x4_t) -> uint8x16_t { + let clamped = vminq_f32(v, vdupq_n_f32(1.0)); + let scaled = vmulq_f32(clamped, vdupq_n_f32(255.0)); + vreinterpretq_u8_u32(vcvtnq_u32_f32(scaled)) + } + + unsafe fn cvt2(v0: float32x4_t, v1: float32x4_t) -> uint8x16_t { + vuzp1q_u8(cvt(v0), cvt(v1)) + } + + unsafe { + let base_ix = (y * STRIP_HEIGHT * self.width + x * WIDE_TILE_WIDTH) * 4; + for i in (0..WIDE_TILE_WIDTH).step_by(4) { + let chunk_ix = base_ix + i * 4; + let v0 = vld1q_f32_x4(self.scratch.as_ptr().add(i * 16)); + let v1 = vld1q_f32_x4(self.scratch.as_ptr().add((i + 1) * 16)); + let x0 = cvt2(v0.0, v1.0); + let x1 = cvt2(v0.1, v1.1); + let x2 = cvt2(v0.2, v1.2); + let x3 = cvt2(v0.3, v1.3); + let v2 = vld1q_f32_x4(self.scratch.as_ptr().add((i + 2) * 16)); + let v3 = vld1q_f32_x4(self.scratch.as_ptr().add((i + 3) * 16)); + let x4 = cvt2(v2.0, v3.0); + let y0 = vuzp1q_u8(x0, x4); + vst1q_u8(self.out_buf.as_mut_ptr().add(chunk_ix), y0); + let x5 = cvt2(v2.1, v3.1); + let y1 = vuzp1q_u8(x1, x5); + vst1q_u8(self.out_buf.as_mut_ptr().add(chunk_ix + self.width * 4), y1); + let x6 = cvt2(v2.2, v3.2); + let y2 = vuzp1q_u8(x2, x6); + vst1q_u8(self.out_buf.as_mut_ptr().add(chunk_ix + self.width * 8), y2); + let x7 = cvt2(v2.3, v3.3); + let y3 = vuzp1q_u8(x3, x7); + vst1q_u8( + self.out_buf.as_mut_ptr().add(chunk_ix + self.width * 12), + y3, + ); + } + } + } + + pub unsafe fn fill_simd(&mut self, x: usize, width: usize, color: [f32; 4]) { + let v_color = vld1q_f32(color.as_ptr()); + let alpha = color[3]; + if alpha == 1.0 { + let v_color_4 = float32x4x4_t(v_color, v_color, v_color, v_color); + for i in x..x + width { + vst1q_f32_x4(self.scratch.as_mut_ptr().add(i * 16), v_color_4); + } + } else { + let one_minus_alpha = vdupq_n_f32(1.0 - alpha); + for i in x..x + width { + let ix = (x + i) * 16; + let mut v = vld1q_f32_x4(self.scratch.as_ptr().add(ix)); + v.0 = vfmaq_f32(v_color, v.0, one_minus_alpha); + v.1 = vfmaq_f32(v_color, v.1, one_minus_alpha); + v.2 = vfmaq_f32(v_color, v.2, one_minus_alpha); + v.3 = vfmaq_f32(v_color, v.3, one_minus_alpha); + vst1q_f32_x4(self.scratch.as_mut_ptr().add(ix), v); + } + } + } + + pub unsafe fn strip_simd(&mut self, x: usize, width: usize, alphas: &[u32], color: [f32; 4]) { + debug_assert!(alphas.len() >= width); + let v_color = vmulq_f32(vld1q_f32(color.as_ptr()), vdupq_n_f32(1.0 / 255.0)); + for i in 0..width { + let a = *alphas.get_unchecked(i); + // all this zipping compiles to tbl, we should probably just write that + let a1 = vreinterpret_u8_u32(vdup_n_u32(a)); + let a2 = vreinterpret_u16_u8(vzip1_u8(a1, vdup_n_u8(0))); + let a3 = vcombine_u16(a2, vdup_n_u16(0)); + let a4 = vreinterpretq_u32_u16(vzip1q_u16(a3, vdupq_n_u16(0))); + let alpha = vcvtq_f32_u32(a4); + let ix = (x + i) * 16; + let mut v = vld1q_f32_x4(self.scratch.as_ptr().add(ix)); + let one_minus_alpha = vfmsq_laneq_f32(vdupq_n_f32(1.0), alpha, v_color, 3); + v.0 = vfmaq_laneq_f32(vmulq_laneq_f32(v_color, alpha, 0), v.0, one_minus_alpha, 0); + v.1 = vfmaq_laneq_f32(vmulq_laneq_f32(v_color, alpha, 1), v.1, one_minus_alpha, 1); + v.2 = vfmaq_laneq_f32(vmulq_laneq_f32(v_color, alpha, 2), v.2, one_minus_alpha, 2); + v.3 = vfmaq_laneq_f32(vmulq_laneq_f32(v_color, alpha, 3), v.3, one_minus_alpha, 3); + vst1q_f32_x4(self.scratch.as_mut_ptr().add(ix), v); + } + } +} + +#[inline(never)] +pub fn render_strips_simd(tiles: &[Tile], strip_buf: &mut Vec, alpha_buf: &mut Vec) { + unsafe { + strip_buf.clear(); + let mut strip_start = true; + let mut cols = alpha_buf.len() as u32; + let mut prev_tile = &tiles[0]; + let mut fp = prev_tile.footprint().0; + let mut seg_start = 0; + let mut delta = 0; + // Note: the input should contain a sentinel tile, to avoid having + // logic here to process the final strip. + const IOTA: [f32; 4] = [0.0, 1.0, 2.0, 3.0]; + let iota = vld1q_f32(IOTA.as_ptr()); + for i in 1..tiles.len() { + let tile = &tiles[i]; + if prev_tile.loc() != tile.loc() { + let start_delta = delta; + let same_strip = prev_tile.loc().same_strip(&tile.loc()); + if same_strip { + fp |= 8; + } + let x0 = fp.trailing_zeros(); + let x1 = 32 - fp.leading_zeros(); + let mut areas = [[start_delta as f32; 4]; 4]; + for tile in &tiles[seg_start..i] { + // small gain possible here to unpack in simd, but llvm goes halfway + delta += tile.delta(); + let p0 = Vec2::unpack(tile.p0); + let p1 = Vec2::unpack(tile.p1); + let slope = (p1.x - p0.x) / (p1.y - p0.y); + let vstarty = vsubq_f32(vdupq_n_f32(p0.y), iota); + let vy0 = vminq_f32(vmaxq_f32(vstarty, vdupq_n_f32(0.0)), vdupq_n_f32(1.0)); + let vy1a = vsubq_f32(vdupq_n_f32(p1.y), iota); + let vy1 = vminq_f32(vmaxq_f32(vy1a, vdupq_n_f32(0.0)), vdupq_n_f32(1.0)); + let vdy = vsubq_f32(vy0, vy1); + let mask = vceqzq_f32(vdy); + let vslope = vbslq_f32(mask, vdupq_n_f32(0.0), vdupq_n_f32(slope)); + let vdy0 = vsubq_f32(vy0, vstarty); + let vdy1 = vsubq_f32(vy1, vstarty); + let mut vyedge = vdupq_n_f32(0.0); + if p0.x == 0.0 { + let ye = vsubq_f32(vdupq_n_f32(1.0), vstarty); + vyedge = vminq_f32(vmaxq_f32(ye, vdupq_n_f32(0.0)), vdupq_n_f32(1.0)); + } else if p1.x == 0.0 { + let ye = vsubq_f32(vy1a, vdupq_n_f32(1.0)); + vyedge = vminq_f32(vmaxq_f32(ye, vdupq_n_f32(-1.0)), vdupq_n_f32(0.0)); + } + for x in x0..x1 { + let mut varea = vld1q_f32(areas.as_ptr().add(x as usize) as *const f32); + varea = vaddq_f32(varea, vyedge); + let vstartx = vdupq_n_f32(p0.x - x as f32); + let vxx0 = vfmaq_f32(vstartx, vdy0, vslope); + let vxx1 = vfmaq_f32(vstartx, vdy1, vslope); + let vxmin0 = vminq_f32(vxx0, vxx1); + let vxmax = vmaxq_f32(vxx0, vxx1); + let vxmin = + vsubq_f32(vminq_f32(vxmin0, vdupq_n_f32(1.0)), vdupq_n_f32(1e-6)); + let vb = vminq_f32(vxmax, vdupq_n_f32(1.0)); + let vc = vmaxq_f32(vb, vdupq_n_f32(0.0)); + let vd = vmaxq_f32(vxmin, vdupq_n_f32(0.0)); + let vd2 = vmulq_f32(vd, vd); + let vd2c2 = vfmsq_f32(vd2, vc, vc); + let vax = vfmaq_f32(vb, vd2c2, vdupq_n_f32(0.5)); + let va = vdivq_f32(vsubq_f32(vax, vxmin), vsubq_f32(vxmax, vxmin)); + varea = vfmaq_f32(varea, va, vdy); + vst1q_f32(areas.as_mut_ptr().add(x as usize) as *mut f32, varea); + } + } + for x in x0..x1 { + let mut alphas = 0u32; + let varea = vld1q_f32(areas.as_ptr().add(x as usize) as *const f32); + let vnzw = vminq_f32(vabsq_f32(varea), vdupq_n_f32(1.0)); + let vscaled = vmulq_f32(vnzw, vdupq_n_f32(255.0)); + let vbits = vreinterpretq_u8_u32(vcvtnq_u32_f32(vscaled)); + let vbits2 = vuzp1q_u8(vbits, vbits); + let vbits3 = vreinterpretq_u32_u8(vuzp1q_u8(vbits2, vbits2)); + vst1q_lane_u32::<0>(&mut alphas, vbits3); + alpha_buf.push(alphas); + } + + if strip_start { + let xy = (1 << 18) * prev_tile.y as u32 + 4 * prev_tile.x as u32 + x0; + let strip = Strip { + xy, + col: cols, + winding: start_delta, + }; + strip_buf.push(strip); + } + cols += x1 - x0; + fp = if same_strip { 1 } else { 0 }; + strip_start = !same_strip; + seg_start = i; + if !prev_tile.loc().same_row(&tile.loc()) { + delta = 0; + } + } + fp |= tile.footprint().0; + prev_tile = tile; + } + } +} diff --git a/cpu-sparse/src/strip.rs b/cpu-sparse/src/strip.rs new file mode 100644 index 00000000..7dbdc8b3 --- /dev/null +++ b/cpu-sparse/src/strip.rs @@ -0,0 +1,205 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! CPU implementation of sparse strip rendering +//! +//! This is copied from the most recent GPU implementation, but has +//! path_id stripped out, as on CPU we'll be doing one path at a time. +//! That decision makes sense to some extent even when uploading to +//! GPU, though some mechanism is required to tie the strips to paint. +//! +//! If there becomes a single, unified code base for this, then the +//! path_id type should probably become a generic parameter. + +use crate::{tiling::Vec2, wide_tile::STRIP_HEIGHT}; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub(crate) struct Loc { + x: u16, + y: u16, +} + +pub(crate) struct Footprint(pub(crate) u32); + +pub struct Tile { + pub x: u16, + pub y: u16, + pub p0: u32, // packed + pub p1: u32, // packed +} + +impl std::fmt::Debug for Tile { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let p0 = Vec2::unpack(self.p0); + let p1 = Vec2::unpack(self.p1); + write!( + f, + "Tile {{ xy: ({}, {}), p0: ({:.4}, {:.4}), p1: ({:.4}, {:.4}) }}", + self.x, self.y, p0.x, p0.y, p1.x, p1.y + ) + } +} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct Strip { + pub xy: u32, // this could be u16's on the Rust side + pub col: u32, + pub winding: i32, +} + +impl Loc { + pub(crate) fn same_strip(&self, other: &Self) -> bool { + self.same_row(other) && (other.x - self.x) / 2 == 0 + } + + pub(crate) fn same_row(&self, other: &Self) -> bool { + self.y == other.y + } +} + +impl Tile { + #[allow(unused)] + /// Create a tile from synthetic data. + fn new(loc: Loc, footprint: Footprint, delta: i32) -> Self { + let p0 = (delta == -1) as u32 * 65536 + footprint.0.trailing_zeros() * 8192; + let p1 = (delta == 1) as u32 * 65536 + (32 - footprint.0.leading_zeros()) * 8192; + Tile { + x: loc.x, + y: loc.y, + p0, + p1, + } + } + + pub(crate) fn loc(&self) -> Loc { + Loc { + x: self.x, + y: self.y, + } + } + + pub(crate) fn footprint(&self) -> Footprint { + let x0 = (self.p0 & 0xffff) as f32 * (1.0 / 8192.0); + let x1 = (self.p1 & 0xffff) as f32 * (1.0 / 8192.0); + // On CPU, might be better to do this as fixed point + let xmin = x0.min(x1).floor() as u32; + let xmax = (xmin + 1).max(x0.max(x1).ceil() as u32); + Footprint((1 << xmax) - (1 << xmin)) + } + + pub(crate) fn delta(&self) -> i32 { + ((self.p1 >> 16) == 0) as i32 - ((self.p0 >> 16) == 0) as i32 + } + + // Comparison function for sorting. Only compares loc, doesn't care + // about points. Unpacking code has been validated to be efficient in + // Godbolt. + pub fn cmp(&self, b: &Tile) -> std::cmp::Ordering { + let xya = ((self.y as u32) << 16) + (self.x as u32); + let xyb = ((b.y as u32) << 16) + (b.x as u32); + xya.cmp(&xyb) + } +} + +// This can be unused when SIMD is selected. Probably a good idea to make it +// selectable at runtime; will be needed for AVX2. +#[allow(unused)] +pub fn render_strips_scalar(tiles: &[Tile], strip_buf: &mut Vec, alpha_buf: &mut Vec) { + strip_buf.clear(); + let mut strip_start = true; + let mut cols = alpha_buf.len() as u32; + let mut prev_tile = &tiles[0]; + let mut fp = prev_tile.footprint().0; + let mut seg_start = 0; + let mut delta = 0; + // Note: the input should contain a sentinel tile, to avoid having + // logic here to process the final strip. + for i in 1..tiles.len() { + let tile = &tiles[i]; + if prev_tile.loc() != tile.loc() { + let start_delta = delta; + let same_strip = prev_tile.loc().same_strip(&tile.loc()); + if same_strip { + fp |= 8; + } + let x0 = fp.trailing_zeros(); + let x1 = 32 - fp.leading_zeros(); + let mut areas = [[start_delta as f32; 4]; 4]; + for tile in &tiles[seg_start..i] { + delta += tile.delta(); + let p0 = Vec2::unpack(tile.p0); + let p1 = Vec2::unpack(tile.p1); + let slope = (p1.x - p0.x) / (p1.y - p0.y); + for x in x0..x1 { + let startx = p0.x - x as f32; + for y in 0..4 { + let starty = p0.y - y as f32; + let y0 = starty.clamp(0.0, 1.0); + let y1 = (p1.y - y as f32).clamp(0.0, 1.0); + let dy = y0 - y1; + // Note: getting rid of this predicate might help with + // auto-vectorization. That said, just getting rid of + // it causes artifacts (which may be divide by zero). + if dy != 0.0 { + let xx0 = startx + (y0 - starty) * slope; + let xx1 = startx + (y1 - starty) * slope; + let xmin0 = xx0.min(xx1); + let xmax = xx0.max(xx1); + let xmin = xmin0.min(1.0) - 1e-6; + let b = xmax.min(1.0); + let c = b.max(0.0); + let d = xmin.max(0.0); + let a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin); + areas[x as usize][y] += a * dy; + } + if p0.x == 0.0 { + areas[x as usize][y] += (y as f32 - p0.y + 1.0).clamp(0.0, 1.0); + } else if p1.x == 0.0 { + areas[x as usize][y] -= (y as f32 - p1.y + 1.0).clamp(0.0, 1.0); + } + } + } + } + for x in x0..x1 { + let mut alphas = 0u32; + for y in 0..4 { + let area = areas[x as usize][y]; + // nonzero winding number rule + let area_u8 = (area.abs().min(1.0) * 255.0).round() as u32; + alphas += area_u8 << (y * 8); + } + alpha_buf.push(alphas); + } + + if strip_start { + let xy = (1 << 18) * prev_tile.y as u32 + 4 * prev_tile.x as u32 + x0; + let strip = Strip { + xy, + col: cols, + winding: start_delta, + }; + strip_buf.push(strip); + } + cols += x1 - x0; + fp = if same_strip { 1 } else { 0 }; + strip_start = !same_strip; + seg_start = i; + if !prev_tile.loc().same_row(&tile.loc()) { + delta = 0; + } + } + fp |= tile.footprint().0; + prev_tile = tile; + } +} + +impl Strip { + pub fn x(&self) -> u32 { + self.xy & 0xffff + } + + pub fn strip_y(&self) -> u32 { + self.xy / ((1 << 16) * STRIP_HEIGHT as u32) + } +} diff --git a/cpu-sparse/src/tiling.rs b/cpu-sparse/src/tiling.rs new file mode 100644 index 00000000..a08f360f --- /dev/null +++ b/cpu-sparse/src/tiling.rs @@ -0,0 +1,304 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::strip::Tile; + +const TILE_WIDTH: u32 = 4; +const TILE_HEIGHT: u32 = 4; + +const TILE_SCALE_X: f32 = 1.0 / TILE_WIDTH as f32; +const TILE_SCALE_Y: f32 = 1.0 / TILE_HEIGHT as f32; + +/// This is just Line but f32 +#[derive(Clone, Copy)] +pub struct FlatLine { + // should these be vec2? + pub p0: [f32; 2], + pub p1: [f32; 2], +} + +impl FlatLine { + pub fn new(p0: [f32; 2], p1: [f32; 2]) -> Self { + Self { p0, p1 } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct Vec2 { + pub x: f32, + pub y: f32, +} + +const TILE_SCALE: f32 = 8192.0; +// scale factor relative to unit square in tile +const FRAC_TILE_SCALE: f32 = 8192.0 * 4.0; + +fn scale_up(z: f32) -> u32 { + (z * FRAC_TILE_SCALE).round() as u32 +} + +impl Vec2 { + fn new(x: f32, y: f32) -> Self { + Vec2 { x, y } + } + + fn from_array(xy: [f32; 2]) -> Self { + Vec2::new(xy[0], xy[1]) + } + + #[allow(unused)] + // Note: this assumes values in range. + fn pack(self) -> u32 { + // TODO: scale should depend on tile size + let x = (self.x * TILE_SCALE).round() as u32; + let y = (self.y * TILE_SCALE).round() as u32; + (y << 16) + x + } + + pub fn unpack(packed: u32) -> Self { + let x = (packed & 0xffff) as f32 * (1.0 / TILE_SCALE); + let y = (packed >> 16) as f32 * (1.0 / TILE_SCALE); + Vec2::new(x, y) + } +} + +impl std::ops::Add for Vec2 { + type Output = Self; + + fn add(self, rhs: Vec2) -> Self { + Vec2::new(self.x + rhs.x, self.y + rhs.y) + } +} + +impl std::ops::Sub for Vec2 { + type Output = Self; + + fn sub(self, rhs: Vec2) -> Self { + Vec2::new(self.x - rhs.x, self.y - rhs.y) + } +} + +impl std::ops::Mul for Vec2 { + type Output = Self; + + fn mul(self, rhs: f32) -> Self { + Vec2::new(self.x * rhs, self.y * rhs) + } +} + +fn span(a: f32, b: f32) -> u32 { + (a.max(b).ceil() - a.min(b).floor()).max(1.0) as u32 +} + +pub fn make_tiles(lines: &[FlatLine], tile_buf: &mut Vec) { + tile_buf.clear(); + for line in lines { + let p0 = Vec2::from_array(line.p0); + let p1 = Vec2::from_array(line.p1); + let s0 = p0 * TILE_SCALE_X; + let s1 = p1 * TILE_SCALE_Y; + let count_x = span(s0.x, s1.x); + let count_y = span(s0.y, s1.y); + let mut x = s0.x.floor(); + if s0.x == x && s1.x < x { + // s0.x is on right side of first tile + x -= 1.0; + } + let mut y = s0.y.floor(); + if s0.y == y && s1.y < y { + // s0.y is on bottom of first tile + y -= 1.0; + } + let xfrac0 = scale_up(s0.x - x); + let yfrac0 = scale_up(s0.y - y); + let packed0 = (yfrac0 << 16) + xfrac0; + // These could be replaced with <2 and the max(1.0) in span removed + if count_x == 1 { + let xfrac1 = scale_up(s1.x - x); + if count_y == 1 { + let yfrac1 = scale_up(s1.y - y); + let packed1 = (yfrac1 << 16) + xfrac1; + // 1x1 tile + tile_buf.push(Tile { + x: x as u16, + y: y as u16, + p0: packed0, + p1: packed1, + }); + } else { + // vertical column + let slope = (s1.x - s0.x) / (s1.y - s0.y); + let sign = (s1.y - s0.y).signum(); + let mut xclip0 = (s0.x - x) + (y - s0.y) * slope; + let yclip = if sign > 0.0 { + xclip0 += slope; + scale_up(1.0) + } else { + 0 + }; + let mut last_packed = packed0; + for i in 0..count_y - 1 { + let xclip = xclip0 + i as f32 * sign * slope; + let xfrac = scale_up(xclip).max(1); + let packed = (yclip << 16) + xfrac; + tile_buf.push(Tile { + x: x as u16, + y: (y + i as f32 * sign) as u16, + p0: last_packed, + p1: packed, + }); + // flip y between top and bottom of tile + last_packed = packed ^ ((FRAC_TILE_SCALE as u32) << 16); + } + let yfrac1 = scale_up(s1.y - (y + (count_y - 1) as f32 * sign)); + let packed1 = (yfrac1 << 16) + xfrac1; + + tile_buf.push(Tile { + x: x as u16, + y: (y + (count_y - 1) as f32 * sign) as u16, + p0: last_packed, + p1: packed1, + }); + } + } else if count_y == 1 { + // horizontal row + let slope = (s1.y - s0.y) / (s1.x - s0.x); + let sign = (s1.x - s0.x).signum(); + let mut yclip0 = (s0.y - y) + (x - s0.x) * slope; + let xclip = if sign > 0.0 { + yclip0 += slope; + scale_up(1.0) + } else { + 0 + }; + let mut last_packed = packed0; + for i in 0..count_x - 1 { + let yclip = yclip0 + i as f32 * sign * slope; + let yfrac = scale_up(yclip).max(1); + let packed = (yfrac << 16) + xclip; + tile_buf.push(Tile { + x: (x + i as f32 * sign) as u16, + y: y as u16, + p0: last_packed, + p1: packed, + }); + // flip x between left and right of tile + last_packed = packed ^ (FRAC_TILE_SCALE as u32); + } + let xfrac1 = scale_up(s1.x - (x + (count_x - 1) as f32 * sign)); + let yfrac1 = scale_up(s1.y - y); + let packed1 = (yfrac1 << 16) + xfrac1; + + tile_buf.push(Tile { + x: (x + (count_x - 1) as f32 * sign) as u16, + y: y as u16, + p0: last_packed, + p1: packed1, + }); + } else { + // general case + let recip_dx = 1.0 / (s1.x - s0.x); + let signx = (s1.x - s0.x).signum(); + let recip_dy = 1.0 / (s1.y - s0.y); + let signy = (s1.y - s0.y).signum(); + // t parameter for next intersection with a vertical grid line + let mut t_clipx = (x - s0.x) * recip_dx; + let xclip = if signx > 0.0 { + t_clipx += recip_dx; + scale_up(1.0) + } else { + 0 + }; + // t parameter for next intersection with a horizontal grid line + let mut t_clipy = (y - s0.y) * recip_dy; + let yclip = if signy > 0.0 { + t_clipy += recip_dy; + scale_up(1.0) + } else { + 0 + }; + let x1 = x + (count_x - 1) as f32 * signx; + let y1 = y + (count_y - 1) as f32 * signy; + let mut xi = x; + let mut yi = y; + let mut last_packed = packed0; + let mut count = 0; + while xi != x1 || yi != y1 { + count += 1; + if count == 400 { + panic!(); + } + if t_clipy < t_clipx { + // intersected with horizontal grid line + let x_intersect = s0.x + (s1.x - s0.x) * t_clipy - xi; + let xfrac = scale_up(x_intersect).max(1); // maybe should clamp? + let packed = (yclip << 16) + xfrac; + tile_buf.push(Tile { + x: xi as u16, + y: yi as u16, + p0: last_packed, + p1: packed, + }); + t_clipy += recip_dy.abs(); + yi += signy; + last_packed = packed ^ ((FRAC_TILE_SCALE as u32) << 16); + } else { + // intersected with vertical grid line + let y_intersect = s0.y + (s1.y - s0.y) * t_clipx - yi; + let yfrac = scale_up(y_intersect).max(1); // maybe should clamp? + let packed = (yfrac << 16) + xclip; + tile_buf.push(Tile { + x: xi as u16, + y: yi as u16, + p0: last_packed, + p1: packed, + }); + t_clipx += recip_dx.abs(); + xi += signx; + last_packed = packed ^ (FRAC_TILE_SCALE as u32); + } + } + let xfrac1 = scale_up(s1.x - xi); + let yfrac1 = scale_up(s1.y - yi); + let packed1 = (yfrac1 << 16) + xfrac1; + + tile_buf.push(Tile { + x: xi as u16, + y: yi as u16, + p0: last_packed, + p1: packed1, + }); + } + } + // This particular choice of sentinel tiles generates a sentinel strip. + tile_buf.push(Tile { + x: 0x3ffd, + y: 0x3fff, + p0: 0, + p1: 0, + }); + tile_buf.push(Tile { + x: 0x3fff, + y: 0x3fff, + p0: 0, + p1: 0, + }); +} + +#[test] +fn tiling() { + let l = FlatLine { + p0: [1.3, 1.4], + p1: [20.1, 50.2], + }; + let mut tiles = vec![]; + make_tiles(&[l], &mut tiles); + for tile in &tiles { + let p0 = Vec2::unpack(tile.p0); + let p1 = Vec2::unpack(tile.p1); + println!( + "@{}, {}: ({}, {}) - ({}, {})", + tile.x, tile.y, p0.x, p0.y, p1.x, p1.y + ); + } +} diff --git a/cpu-sparse/src/wide_tile.rs b/cpu-sparse/src/wide_tile.rs new file mode 100644 index 00000000..a5ed6d85 --- /dev/null +++ b/cpu-sparse/src/wide_tile.rs @@ -0,0 +1,57 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use piet_next::peniko::color::{AlphaColor, Srgb}; + +pub const WIDE_TILE_WIDTH: usize = 256; +pub const STRIP_HEIGHT: usize = 4; + +pub(crate) struct WideTile { + pub(crate) bg: AlphaColor, + pub(crate) cmds: Vec, +} + +#[derive(Debug)] +pub(crate) enum Cmd { + Fill(CmdFill), + Strip(CmdStrip), +} + +#[derive(Debug)] +pub(crate) struct CmdFill { + pub(crate) x: u32, + pub(crate) width: u32, + pub(crate) color: AlphaColor, +} + +#[derive(Debug)] +pub(crate) struct CmdStrip { + pub(crate) x: u32, + pub(crate) width: u32, + pub(crate) alpha_ix: usize, + pub(crate) color: AlphaColor, +} + +impl Default for WideTile { + fn default() -> Self { + Self { + bg: AlphaColor::TRANSPARENT, + cmds: vec![], + } + } +} + +impl WideTile { + pub(crate) fn fill(&mut self, x: u32, width: u32, color: AlphaColor) { + if x == 0 && width == WIDE_TILE_WIDTH as u32 && color.components[3] == 1.0 { + self.cmds.clear(); + self.bg = color; + } else { + self.cmds.push(Cmd::Fill(CmdFill { x, width, color })); + } + } + + pub(crate) fn push(&mut self, cmd: Cmd) { + self.cmds.push(cmd) + } +} diff --git a/piet-next/Cargo.toml b/piet-next/Cargo.toml new file mode 100644 index 00000000..6781a867 --- /dev/null +++ b/piet-next/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "piet-next" +version = "0.1.0" +authors = ["Raph Levien "] +description = "A testbend for next-generation 2D renderer ideas" +license = "Apache-2.0 OR MIT" +edition = "2021" +keywords = ["graphics", "2d"] +categories = ["graphics"] + +[dependencies] +peniko = "0.3.0" diff --git a/piet-next/src/any.rs b/piet-next/src/any.rs new file mode 100644 index 00000000..58254058 --- /dev/null +++ b/piet-next/src/any.rs @@ -0,0 +1,245 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +#![allow(unused)] +use std::{any::Any, sync::Arc}; + +use peniko::{kurbo::Affine, BrushRef}; + +use crate::{Id, Path, Record, RenderCtx, ResourceCtx}; + +#[derive(Clone)] +pub struct AnyImage { + // TODO: move id into trait + id: Id, + body: Arc, +} + +pub trait AnyRecord: Send { + fn as_any(&mut self) -> &mut dyn std::any::Any; + + fn dyn_finish(&mut self) -> Arc; +} + +impl AnyRecord for R +where + <::Resource as ResourceCtx>::Recording: Sync, +{ + fn as_any(&mut self) -> &mut dyn std::any::Any { + self + } + + fn dyn_finish(&mut self) -> Arc { + let recording = self.finish(); + Arc::new(recording) + } +} + +pub trait AnyRenderCtx { + fn as_any(&mut self) -> &mut dyn std::any::Any; + + fn dyn_playback(&mut self, recording: &Arc); + + fn dyn_fill(&mut self, path: &Path, brush: BrushRef); +} + +impl AnyRenderCtx for RC { + fn as_any(&mut self) -> &mut dyn std::any::Any { + self + } + + fn dyn_playback(&mut self, recording: &Arc) { + if let Some(recording) = recording.downcast_ref() { + self.playback(recording) + } else { + panic!("downcast error on playback"); + } + } + + fn dyn_fill(&mut self, path: &Path, brush: BrushRef) { + self.fill(path, brush) + } +} + +pub type BoxedRenderCtx = Box; + +impl RenderCtx for BoxedRenderCtx { + type Resource = Box; + + fn playback(&mut self, recording: &Arc<::Recording>) { + self.dyn_playback(recording); + } + + fn fill(&mut self, path: &Path, brush: BrushRef) { + self.dyn_fill(path, brush); + } + + fn stroke(&mut self, path: &Path, stroke: &peniko::kurbo::Stroke, brush: BrushRef) { + todo!() + } + + fn draw_image( + &mut self, + image: &::Image, + dst_rect: peniko::kurbo::Rect, + interp: crate::InterpolationMode, + ) { + todo!() + } + + fn clip(&mut self, path: &Path) { + todo!() + } + + fn save(&mut self) { + todo!() + } + + fn restore(&mut self) { + todo!() + } + + fn transform(&mut self, affine: Affine) { + todo!() + } + + fn begin_draw_glyphs(&mut self, font: &peniko::Font) { + todo!() + } + + fn font_size(&mut self, size: f32) { + todo!() + } + + fn hint(&mut self, hint: bool) { + todo!() + } + + fn glyph_brush(&mut self, brush: BrushRef) { + todo!() + } + + fn draw_glyphs(&mut self, style: peniko::StyleRef, glyphs: &dyn Iterator) { + todo!() + } + + fn end_draw_glyphs(&mut self) { + todo!() + } +} + +pub trait AnyResourceCtx { + fn as_any(&mut self) -> &mut dyn std::any::Any; + + fn dyn_record(&mut self) -> Box; + + fn dyn_make_image_with_stride( + &mut self, + width: usize, + height: usize, + stride: usize, + buf: &[u8], + format: crate::ImageFormat, + ) -> Result; +} + +impl ResourceCtx for Box { + type Image = AnyImage; + + type Recording = dyn Any + Send; + + type Record = Box; + + fn record(&mut self) -> Self::Record { + self.dyn_record() + } + + fn make_image_with_stride( + &mut self, + width: usize, + height: usize, + stride: usize, + buf: &[u8], + format: crate::ImageFormat, + ) -> Result { + let image = self.dyn_make_image_with_stride(width, height, stride, buf, format)?; + let id = Id::get(); + Ok(AnyImage { + id, + body: Arc::new(image), + }) + } +} + +pub struct BoxedAnyRecord(Option>); + +impl RenderCtx for Box { + type Resource = Box; + + fn playback(&mut self, recording: &Arc<::Recording>) { + self.dyn_playback(recording); + } + + fn fill(&mut self, path: &Path, brush: BrushRef) { + self.dyn_fill(path, brush); + } + + fn stroke(&mut self, path: &Path, stroke: &peniko::kurbo::Stroke, brush: BrushRef) { + todo!() + } + + fn draw_image( + &mut self, + image: &::Image, + dst_rect: peniko::kurbo::Rect, + interp: crate::InterpolationMode, + ) { + todo!() + } + + fn clip(&mut self, path: &Path) { + todo!() + } + + fn save(&mut self) { + todo!() + } + + fn restore(&mut self) { + todo!() + } + + fn transform(&mut self, affine: Affine) { + todo!() + } + + fn begin_draw_glyphs(&mut self, font: &peniko::Font) { + todo!() + } + + fn font_size(&mut self, size: f32) { + todo!() + } + + fn hint(&mut self, hint: bool) { + todo!() + } + + fn glyph_brush(&mut self, brush: BrushRef) { + todo!() + } + + fn draw_glyphs(&mut self, style: peniko::StyleRef, glyphs: &dyn Iterator) { + todo!() + } + + fn end_draw_glyphs(&mut self) { + todo!() + } +} + +impl Record for Box { + fn finish(&mut self) -> Arc<::Recording> { + self.dyn_finish() + } +} diff --git a/piet-next/src/generic_record.rs b/piet-next/src/generic_record.rs new file mode 100644 index 00000000..db4c1c24 --- /dev/null +++ b/piet-next/src/generic_record.rs @@ -0,0 +1,141 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +#![allow(unused)] + +use std::sync::Arc; + +use peniko::{kurbo::Rect, Brush}; + +use crate::{InterpolationMode, Path, Record, RenderCtx, ResourceCtx}; + +pub struct GenericRecorder { + cmds: Vec>, +} + +pub struct GenericResources { + inner: RC::Resource, +} + +enum Cmd { + Fill(Path, Brush), + Image( + ::Image, + Rect, + InterpolationMode, + ), +} + +impl GenericRecorder { + #[allow(clippy::new_without_default)] + pub fn new() -> Self { + let cmds = Vec::new(); + GenericRecorder { cmds } + } + + pub fn play(&self, ctx: &mut RC) { + for cmd in &self.cmds { + match cmd { + Cmd::Fill(path, brush) => ctx.fill(path, brush.into()), + Cmd::Image(image, rect, interp) => ctx.draw_image(image, *rect, *interp), + } + } + } +} + +impl RenderCtx for GenericRecorder { + type Resource = GenericResources; + + fn playback(&mut self, recording: &std::sync::Arc<::Recording>) { + todo!() + } + + fn fill(&mut self, path: &Path, brush: peniko::BrushRef) { + self.cmds.push(Cmd::Fill(path.clone(), brush.to_owned())); + } + + fn stroke(&mut self, path: &Path, stroke: &peniko::kurbo::Stroke, brush: peniko::BrushRef) { + todo!() + } + + fn draw_image( + &mut self, + image: &::Image, + dst_rect: peniko::kurbo::Rect, + interp: crate::InterpolationMode, + ) { + let image = image.clone(); + self.cmds.push(Cmd::Image(image, dst_rect, interp)); + } + + fn clip(&mut self, path: &Path) { + todo!() + } + + fn save(&mut self) { + todo!() + } + + fn restore(&mut self) { + todo!() + } + + fn transform(&mut self, affine: peniko::kurbo::Affine) { + todo!() + } + + fn begin_draw_glyphs(&mut self, font: &peniko::Font) { + todo!() + } + + fn font_size(&mut self, size: f32) { + todo!() + } + + fn hint(&mut self, hint: bool) { + todo!() + } + + fn glyph_brush(&mut self, brush: peniko::BrushRef) { + todo!() + } + + fn draw_glyphs(&mut self, style: peniko::StyleRef, glyphs: &dyn Iterator) { + todo!() + } + + fn end_draw_glyphs(&mut self) { + todo!() + } +} + +impl ResourceCtx for GenericResources { + type Image = ::Image; + + type Recording = GenericRecorder; + + type Record = GenericRecorder; + + fn record(&mut self) -> Self::Record { + GenericRecorder::new() + } + + fn make_image_with_stride( + &mut self, + width: usize, + height: usize, + stride: usize, + buf: &[u8], + format: crate::ImageFormat, + ) -> Result { + self.inner + .make_image_with_stride(width, height, stride, buf, format) + } +} + +impl Record for GenericRecorder { + fn finish(&mut self) -> Arc<::Recording> { + let cmds = std::mem::take(&mut self.cmds); + Arc::new(GenericRecorder { cmds }) + } +} diff --git a/piet-next/src/lib.rs b/piet-next/src/lib.rs new file mode 100644 index 00000000..249a676b --- /dev/null +++ b/piet-next/src/lib.rs @@ -0,0 +1,143 @@ +// Copyright 2024 the Piet Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use std::{ + num::NonZeroU64, + sync::{atomic::AtomicU64, Arc}, +}; + +pub use peniko; + +use peniko::{ + kurbo::{Affine, BezPath, Rect, Stroke}, + BrushRef, Font, StyleRef, +}; + +mod any; +mod generic_record; + +pub use generic_record::GenericRecorder; + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct Id(NonZeroU64); + +// TODO: think this through +pub type Error = Box; + +#[derive(Clone)] +pub struct Path { + pub id: Id, + pub path: BezPath, + // TODO: Vello encoding. kurbo BezPath can be used in interim + // Question: probably want to special-case rect, line, ellipse at least + // Probably also rounded-rect (incl varying corner radii) +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub enum ImageFormat { + Grayscale, + Rgb, + RgbaSeparate, + RgbaPremul, +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub enum InterpolationMode { + NearestNeighbor, + Bilinear, + // TODO: want to add cubic etc +} + +/// Positioned glyph. This type matches Vello. +pub struct Glyph { + pub id: u32, + pub x: f32, + pub y: f32, +} + +pub trait RenderCtx { + type Resource: ResourceCtx; + + fn playback(&mut self, recording: &Arc<::Recording>); + + // should even-odd be an arg or another method? + fn fill(&mut self, path: &Path, brush: BrushRef); + + fn stroke(&mut self, path: &Path, stroke: &Stroke, brush: BrushRef); + + // TODO: clamp/extend/mirror + fn draw_image( + &mut self, + image: &::Image, + dst_rect: Rect, + interp: InterpolationMode, + ); + + fn clip(&mut self, path: &Path); + + fn save(&mut self); + + fn restore(&mut self); + + fn transform(&mut self, affine: Affine); + + /// Start a glyph drawing operation + /// + /// The glyph drawing operation ends with [`RenderCtx::end_draw_glyphs`] + fn begin_draw_glyphs(&mut self, font: &Font); + + // Following methods are borrowed from Vello's DrawGlyph + fn font_size(&mut self, size: f32); + + fn hint(&mut self, hint: bool); + + fn glyph_brush(&mut self, brush: BrushRef); + + fn draw_glyphs(&mut self, style: StyleRef, glyphs: &dyn Iterator); + + fn end_draw_glyphs(&mut self); +} + +pub trait Record: RenderCtx { + // It should be possible to take self by move, but that triggers E0161 + fn finish(&mut self) -> Arc<::Recording>; +} + +pub trait ResourceCtx { + type Image: Clone + Send; + + type Recording: Send + ?Sized; + + type Record: Record + Send; + + fn record(&mut self) -> Self::Record; + + fn make_image_with_stride( + &mut self, + width: usize, + height: usize, + stride: usize, + buf: &[u8], + format: ImageFormat, + ) -> Result; +} + +static ID_COUNTER: AtomicU64 = AtomicU64::new(0); + +impl Id { + pub fn get() -> Self { + let n = ID_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + if let Some(x) = n.checked_add(1) { + Self(NonZeroU64::new(x).unwrap()) + } else { + panic!("wow, overflow of u64, congratulations") + } + } +} + +impl From for Path { + fn from(path: BezPath) -> Self { + let id = Id::get(); + Self { id, path } + } +}