diff --git a/Cargo.lock b/Cargo.lock index 7e7e2e8e1..d607fbdac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -932,8 +932,7 @@ dependencies = [ [[package]] name = "fearless_simd" version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fb2907d1f08b2b316b9223ced5b0e89d87028ba8deae9764741dba8ff7f3903" +source = "git+https://github.com/valadaptive/fearless_simd?branch=more-avx2#b29bf2ad1409c26fb226d197a3368c3a85d8973d" dependencies = [ "bytemuck", "libm", @@ -3860,6 +3859,7 @@ name = "vello_bench" version = "0.0.0" dependencies = [ "criterion", + "fearless_simd", "parley", "rand", "smallvec", @@ -3942,6 +3942,7 @@ name = "vello_hybrid" version = "0.0.4" dependencies = [ "bytemuck", + "fearless_simd", "guillotiere", "hashbrown 0.15.5", "js-sys", @@ -3991,6 +3992,7 @@ name = "vello_sparse_tests" version = "0.0.0" dependencies = [ "bytemuck", + "fearless_simd", "image", "oxipng", "pollster", diff --git a/Cargo.toml b/Cargo.toml index eb85dc0f2..30847d184 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -115,7 +115,7 @@ rayon = { version = "1.10.0" } thread_local = "1.1.8" crossbeam-channel = "0.5.15" ordered-channel = { version = "1.2.0", features = ["crossbeam-channel"] } -fearless_simd = { version = "0.3.0", default-features = false } +fearless_simd = { git = "https://github.com/valadaptive/fearless_simd", branch = "more-avx2", default-features = false } # The below crates are experimental! vello_api = { path = "sparse_strips/vello_api", default-features = false } diff --git a/sparse_strips/vello_bench/Cargo.toml b/sparse_strips/vello_bench/Cargo.toml index d35ebe213..177b33e19 100644 --- a/sparse_strips/vello_bench/Cargo.toml +++ b/sparse_strips/vello_bench/Cargo.toml @@ -18,6 +18,7 @@ parley = { version = "0.5.0", default-features = true } rand = { workspace = true } smallvec = { workspace = true } usvg = { workspace = true } +fearless_simd = { workspace = true, features = ["force_support_fallback"] } [features] diff --git a/sparse_strips/vello_bench/src/glyph.rs b/sparse_strips/vello_bench/src/glyph.rs index 32ff50996..e02da9a91 100644 --- a/sparse_strips/vello_bench/src/glyph.rs +++ b/sparse_strips/vello_bench/src/glyph.rs @@ -25,7 +25,7 @@ pub fn glyph(c: &mut Criterion) { strip_generator: StripGenerator::new( WIDTH, HEIGHT, - Level::try_detect().unwrap_or(Level::fallback()), + Level::try_detect().unwrap_or(Level::baseline()), ), strip_storage: StripStorage::default(), glyph_caches: Default::default(), diff --git a/sparse_strips/vello_common/Cargo.toml b/sparse_strips/vello_common/Cargo.toml index 1e82221f6..4d80beb45 100644 --- a/sparse_strips/vello_common/Cargo.toml +++ b/sparse_strips/vello_common/Cargo.toml @@ -28,6 +28,10 @@ smallvec = { workspace = true } libm = { version = "0.2.15", optional = true } log = { workspace = true } +[dev-dependencies] +# We use Level::fallback() in tests +fearless_simd = { workspace = true, features = ["force_support_fallback"] } + [features] default = ["std", "png", "text"] # Enable using SIMD instructions for rendering diff --git a/sparse_strips/vello_common/src/strip_generator.rs b/sparse_strips/vello_common/src/strip_generator.rs index e7bae82df..e12918aa4 100644 --- a/sparse_strips/vello_common/src/strip_generator.rs +++ b/sparse_strips/vello_common/src/strip_generator.rs @@ -158,7 +158,7 @@ mod tests { #[test] fn reset() { - let mut generator = StripGenerator::new(100, 100, Level::fallback()); + let mut generator = StripGenerator::new(100, 100, Level::new()); let mut storage = StripStorage::default(); let rect = Rect::new(0.0, 0.0, 100.0, 100.0); diff --git a/sparse_strips/vello_common/src/tile.rs b/sparse_strips/vello_common/src/tile.rs index f688f6cf9..f1beaac98 100644 --- a/sparse_strips/vello_common/src/tile.rs +++ b/sparse_strips/vello_common/src/tile.rs @@ -357,7 +357,7 @@ mod tests { p1: Point { x: 9.0, y: -1.0 }, }; - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&[line], 100, 100); assert!(tiles.is_empty()); @@ -370,7 +370,7 @@ mod tests { p1: Point { x: 103.0, y: 20.0 }, }; - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&[line], 100, 100); assert!(tiles.is_empty()); @@ -383,7 +383,7 @@ mod tests { p1: Point { x: 35.0, y: 105.0 }, }; - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&[line], 100, 100); assert!(tiles.is_empty()); @@ -396,7 +396,7 @@ mod tests { p1: Point { x: 2.0, y: 1.0 }, }; - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&[line], 100, 100); assert_eq!(tiles.tile_buf, [Tile::new_clamped(0, 0, 0, true)]); @@ -409,7 +409,7 @@ mod tests { p1: Point { x: 8.5, y: 1.0 }, }; - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&[line], 100, 100); tiles.sort_tiles(); @@ -430,7 +430,7 @@ mod tests { p1: Point { x: 1.0, y: 8.5 }, }; - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&[line], 100, 100); tiles.sort_tiles(); @@ -451,7 +451,7 @@ mod tests { p1: Point { x: 11.0, y: 8.5 }, }; - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&[line], 100, 100); tiles.sort_tiles(); @@ -474,7 +474,7 @@ mod tests { p1: Point { x: 1.0, y: 1.0 }, }; - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&[line], 100, 100); tiles.sort_tiles(); @@ -497,7 +497,7 @@ mod tests { p1: Point { x: 14.0, y: 6.0 }, }; - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&[line], 100, 100); tiles.sort_tiles(); @@ -520,7 +520,7 @@ mod tests { p1: Point { x: 2.0, y: 11.0 }, }; - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&[line], 100, 100); tiles.sort_tiles(); @@ -548,7 +548,7 @@ mod tests { p1: Point { x: 0.0, y: 1.0 }, }; - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&[line_1, line_2], 100, 100); assert_eq!( @@ -568,7 +568,7 @@ mod tests { p1: Point { x: 224.0, y: 388.0 }, }; - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&[line], 600, 600); } @@ -577,14 +577,14 @@ mod tests { let path = BezPath::from_svg("M261,0 L78848,0 L78848,4 L261,4 Z").unwrap(); let mut line_buf = vec![]; fill( - Level::try_detect().unwrap_or(Level::fallback()), + Level::try_detect().unwrap_or(Level::baseline()), &path, Affine::IDENTITY, &mut line_buf, &mut FlattenCtx::default(), ); - let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::fallback())); + let mut tiles = Tiles::new(Level::try_detect().unwrap_or(Level::baseline())); tiles.make_tiles(&line_buf, 10, 10); assert_eq!(tiles.tile_buf[0].x, 4); assert_eq!(tiles.tile_buf[1].x, 4); diff --git a/sparse_strips/vello_cpu/src/render.rs b/sparse_strips/vello_cpu/src/render.rs index fe2f6b174..3c2a1861a 100644 --- a/sparse_strips/vello_cpu/src/render.rs +++ b/sparse_strips/vello_cpu/src/render.rs @@ -82,7 +82,7 @@ pub struct RenderSettings { impl Default for RenderSettings { fn default() -> Self { Self { - level: Level::try_detect().unwrap_or(Level::fallback()), + level: Level::try_detect().unwrap_or(Level::baseline()), #[cfg(feature = "multithreading")] num_threads: (std::thread::available_parallelism() .unwrap() @@ -1000,7 +1000,7 @@ mod tests { let mut pixmap = Pixmap::new(200, 200); let settings = RenderSettings { - level: Level::try_detect().unwrap_or(Level::fallback()), + level: Level::try_detect().unwrap_or(Level::baseline()), num_threads: 1, render_mode: RenderMode::OptimizeQuality, }; diff --git a/sparse_strips/vello_dev_macros/src/test.rs b/sparse_strips/vello_dev_macros/src/test.rs index c0345de48..c83a4d9ad 100644 --- a/sparse_strips/vello_dev_macros/src/test.rs +++ b/sparse_strips/vello_dev_macros/src/test.rs @@ -249,9 +249,20 @@ pub(crate) fn vello_test_inner(attr: TokenStream, item: TokenStream) -> TokenStr use crate::util::{ check_ref, get_ctx }; + use crate::renderer::CpuRenderArgs; use vello_cpu::{RenderContext, RenderMode}; - let mut ctx = get_ctx::(#width, #height, #transparent, #num_threads, #level, #render_mode); + let mut ctx = get_ctx::( + #width, + #height, + #transparent, + CpuRenderArgs { + num_threads: #num_threads, + level: #level, + render_mode: + #render_mode, + } + ); #input_fn_name(&mut ctx); ctx.flush(); if !#no_ref { @@ -437,7 +448,7 @@ pub(crate) fn vello_test_inner(attr: TokenStream, item: TokenStream) -> TokenStr use crate::renderer::HybridRenderer; use vello_cpu::RenderMode; - let mut ctx = get_ctx::(#width, #height, #transparent, 0, "fallback", RenderMode::OptimizeSpeed); + let mut ctx = get_ctx::(#width, #height, #transparent, ()); #input_fn_name(&mut ctx); ctx.flush(); if !#no_ref { @@ -455,7 +466,7 @@ pub(crate) fn vello_test_inner(attr: TokenStream, item: TokenStream) -> TokenStr use crate::renderer::HybridRenderer; use vello_cpu::RenderMode; - let mut ctx = get_ctx::(#width, #height, #transparent, 0, "fallback", RenderMode::OptimizeSpeed); + let mut ctx = get_ctx::(#width, #height, #transparent, ()); #input_fn_name(&mut ctx); ctx.flush(); if !#no_ref { diff --git a/sparse_strips/vello_hybrid/Cargo.toml b/sparse_strips/vello_hybrid/Cargo.toml index 87ea57a08..13162ffdb 100644 --- a/sparse_strips/vello_hybrid/Cargo.toml +++ b/sparse_strips/vello_hybrid/Cargo.toml @@ -41,6 +41,8 @@ web-sys = { version = "0.3.77", features = [ ], optional = true } [dev-dependencies] +# We use Level::fallback() in tests +fearless_simd = { workspace = true, features = ["force_support_fallback"] } png = { workspace = true } pollster = { workspace = true } vello_common = { workspace = true, features = ["pico_svg"] } diff --git a/sparse_strips/vello_hybrid/examples/wgpu_webgl/src/lib.rs b/sparse_strips/vello_hybrid/examples/wgpu_webgl/src/lib.rs index 2c92302c1..d0e763deb 100644 --- a/sparse_strips/vello_hybrid/examples/wgpu_webgl/src/lib.rs +++ b/sparse_strips/vello_hybrid/examples/wgpu_webgl/src/lib.rs @@ -85,7 +85,7 @@ impl RendererWrapper { height, }, RenderSettings { - level: Level::try_detect().unwrap_or(Level::fallback()), + level: Level::try_detect().unwrap_or(Level::baseline()), atlas_config: AtlasConfig { atlas_size: (max_texture_dimension_2d, max_texture_dimension_2d), ..AtlasConfig::default() diff --git a/sparse_strips/vello_hybrid/src/scene.rs b/sparse_strips/vello_hybrid/src/scene.rs index 7d658088f..b6c6202e6 100644 --- a/sparse_strips/vello_hybrid/src/scene.rs +++ b/sparse_strips/vello_hybrid/src/scene.rs @@ -46,7 +46,7 @@ pub struct RenderSettings { impl Default for RenderSettings { fn default() -> Self { Self { - level: Level::try_detect().unwrap_or(Level::fallback()), + level: Level::try_detect().unwrap_or(Level::baseline()), atlas_config: AtlasConfig::default(), } } diff --git a/sparse_strips/vello_sparse_tests/Cargo.toml b/sparse_strips/vello_sparse_tests/Cargo.toml index 012ad601f..9a7790096 100644 --- a/sparse_strips/vello_sparse_tests/Cargo.toml +++ b/sparse_strips/vello_sparse_tests/Cargo.toml @@ -27,6 +27,7 @@ oxipng = { workspace = true, features = ["freestanding", "parallel"] } image = { workspace = true, features = ["png"] } skrifa = { workspace = true } smallvec = { workspace = true } +fearless_simd = { workspace = true, features = ["force_support_fallback"] } [target.'cfg(target_arch = "wasm32")'.dependencies] wasm-bindgen-test = "0.3.50" diff --git a/sparse_strips/vello_sparse_tests/tests/issues.rs b/sparse_strips/vello_sparse_tests/tests/issues.rs index 1bb935b8c..60ac5af64 100644 --- a/sparse_strips/vello_sparse_tests/tests/issues.rs +++ b/sparse_strips/vello_sparse_tests/tests/issues.rs @@ -404,7 +404,7 @@ fn gradient_color_alpha_unmul(ctx: &mut impl Renderer) { #[test] fn multi_threading_oob_access() { let settings = RenderSettings { - level: Level::try_detect().unwrap_or(Level::fallback()), + level: Level::try_detect().unwrap_or(Level::baseline()), num_threads: 4, render_mode: RenderMode::OptimizeQuality, }; diff --git a/sparse_strips/vello_sparse_tests/tests/renderer.rs b/sparse_strips/vello_sparse_tests/tests/renderer.rs index 12ea9c310..cb500fb2c 100644 --- a/sparse_strips/vello_sparse_tests/tests/renderer.rs +++ b/sparse_strips/vello_sparse_tests/tests/renderer.rs @@ -17,15 +17,10 @@ use vello_hybrid::Scene; use web_sys::WebGl2RenderingContext; pub(crate) trait Renderer: Sized { + type Args; type GlyphRenderer: GlyphRenderer; - fn new( - width: u16, - height: u16, - num_threads: u16, - level: Level, - render_mode: RenderMode, - ) -> Self; + fn new(width: u16, height: u16, args: Self::Args) -> Self; fn fill_path(&mut self, path: &BezPath); fn stroke_path(&mut self, path: &BezPath); fn fill_rect(&mut self, rect: &Rect); @@ -61,20 +56,68 @@ pub(crate) trait Renderer: Sized { fn execute_recording(&mut self, recording: &Recording); } +pub(crate) struct CpuRenderArgs { + pub num_threads: u16, + pub level: &'static str, + pub render_mode: RenderMode, +} + impl Renderer for RenderContext { + type Args = CpuRenderArgs; type GlyphRenderer = Self; - fn new( - width: u16, - height: u16, - num_threads: u16, - level: Level, - render_mode: RenderMode, - ) -> Self { + fn new(width: u16, height: u16, args: CpuRenderArgs) -> Self { + let level = match args.level { + #[cfg(target_arch = "aarch64")] + "neon" => Level::Neon( + Level::try_detect() + .unwrap_or(Level::fallback()) + .as_neon() + .expect("neon should be available"), + ), + #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] + "wasm_simd128" => Level::WasmSimd128( + Level::try_detect() + .unwrap_or(Level::fallback()) + .as_wasm_simd128() + .expect("wasm simd128 should be available"), + ), + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + "sse42" => { + if std::arch::is_x86_feature_detected!("sse4.2") { + #[cfg(not(all(target_feature = "avx2", target_feature = "fma")))] + { + Level::Sse4_2(unsafe { + vello_common::fearless_simd::Sse4_2::new_unchecked() + }) + } + + #[cfg(all(target_feature = "avx2", target_feature = "fma"))] + { + panic!("sse4.2 feature not compiled in") + } + } else { + panic!("sse4.2 feature not detected"); + } + } + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + "avx2" => { + if std::arch::is_x86_feature_detected!("avx2") + && std::arch::is_x86_feature_detected!("fma") + { + Level::Avx2(unsafe { vello_common::fearless_simd::Avx2::new_unchecked() }) + } else { + panic!("avx2 or fma feature not detected"); + } + } + "fallback" => Level::fallback(), + _ => panic!("unknown level: {}", args.level), + }; + let settings = RenderSettings { level, - num_threads, - render_mode, + num_threads: args.num_threads, + render_mode: args.render_mode, }; Self::new_with(width, height, settings) @@ -207,17 +250,10 @@ pub(crate) struct HybridRenderer { #[cfg(not(all(target_arch = "wasm32", feature = "webgl")))] impl Renderer for HybridRenderer { + type Args = (); type GlyphRenderer = Scene; - fn new(width: u16, height: u16, num_threads: u16, level: Level, _: RenderMode) -> Self { - if num_threads != 0 { - panic!("hybrid renderer doesn't support multi-threading"); - } - - if !matches!(level, Level::Fallback(_)) { - panic!("hybrid renderer doesn't support SIMD"); - } - + fn new(width: u16, height: u16, _args: ()) -> Self { let scene = Scene::new(width, height); // Initialize wgpu device and queue for GPU rendering let instance = wgpu::Instance::default(); @@ -517,20 +553,13 @@ pub(crate) struct HybridRenderer { #[cfg(all(target_arch = "wasm32", feature = "webgl"))] impl Renderer for HybridRenderer { + type Args = (); type GlyphRenderer = Scene; - fn new(width: u16, height: u16, num_threads: u16, level: Level, _: RenderMode) -> Self { + fn new(width: u16, height: u16, _args: ()) -> Self { use wasm_bindgen::JsCast; use web_sys::HtmlCanvasElement; - if num_threads != 0 { - panic!("hybrid renderer doesn't support multi-threading"); - } - - if !matches!(level, Level::Fallback(_)) { - panic!("hybrid renderer doesn't support SIMD"); - } - let scene = Scene::new(width, height); // Create an offscreen HTMLCanvasElement, render the test image to it, and finally read off diff --git a/sparse_strips/vello_sparse_tests/tests/util.rs b/sparse_strips/vello_sparse_tests/tests/util.rs index 33ec30086..f78492d93 100644 --- a/sparse_strips/vello_sparse_tests/tests/util.rs +++ b/sparse_strips/vello_sparse_tests/tests/util.rs @@ -16,7 +16,6 @@ use vello_common::glyph::Glyph; use vello_common::kurbo::{BezPath, Join, Point, Rect, Shape, Stroke, Vec2}; use vello_common::peniko::{Blob, ColorStop, ColorStops, FontData}; use vello_common::pixmap::Pixmap; -use vello_cpu::{Level, RenderMode}; #[cfg(not(target_arch = "wasm32"))] use std::path::PathBuf; @@ -51,52 +50,8 @@ macro_rules! load_image { }}; } -pub(crate) fn get_ctx( - width: u16, - height: u16, - transparent: bool, - num_threads: u16, - level: &str, - render_mode: RenderMode, -) -> T { - let level = match level { - #[cfg(target_arch = "aarch64")] - "neon" => Level::Neon( - Level::try_detect() - .unwrap_or(Level::fallback()) - .as_neon() - .expect("neon should be available"), - ), - #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] - "wasm_simd128" => Level::WasmSimd128( - Level::try_detect() - .unwrap_or(Level::fallback()) - .as_wasm_simd128() - .expect("wasm simd128 should be available"), - ), - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - "sse42" => { - if std::arch::is_x86_feature_detected!("sse4.2") { - Level::Sse4_2(unsafe { vello_common::fearless_simd::Sse4_2::new_unchecked() }) - } else { - panic!("sse4.2 feature not detected"); - } - } - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - "avx2" => { - if std::arch::is_x86_feature_detected!("avx2") - && std::arch::is_x86_feature_detected!("fma") - { - Level::Avx2(unsafe { vello_common::fearless_simd::Avx2::new_unchecked() }) - } else { - panic!("avx2 or fma feature not detected"); - } - } - "fallback" => Level::fallback(), - _ => panic!("unknown level: {level}"), - }; - - let mut ctx = T::new(width, height, num_threads, level, render_mode); +pub(crate) fn get_ctx(width: u16, height: u16, transparent: bool, args: T::Args) -> T { + let mut ctx = T::new(width, height, args); if !transparent { let path = Rect::new(0.0, 0.0, width as f64, height as f64).to_path(0.1);