diff --git a/sparse_strips/vello_bench/src/fine/fill.rs b/sparse_strips/vello_bench/src/fine/fill.rs index 850548b4a..e2d6d9c66 100644 --- a/sparse_strips/vello_bench/src/fine/fill.rs +++ b/sparse_strips/vello_bench/src/fine/fill.rs @@ -59,7 +59,7 @@ pub(crate) fn fill_single>( fine: &mut Fine, ) { b.iter(|| { - fine.fill(0, width, paint, blend_mode, encoded_paints, None); + fine.fill(0, width, paint, blend_mode, encoded_paints, None, None); std::hint::black_box(&fine); }); diff --git a/sparse_strips/vello_bench/src/fine/strip.rs b/sparse_strips/vello_bench/src/fine/strip.rs index 569381839..cbf04b466 100644 --- a/sparse_strips/vello_bench/src/fine/strip.rs +++ b/sparse_strips/vello_bench/src/fine/strip.rs @@ -58,6 +58,7 @@ fn strip_single>( default_blend(), encoded_paints, Some(&alphas), + None, ); std::hint::black_box(&fine); diff --git a/sparse_strips/vello_common/src/coarse.rs b/sparse_strips/vello_common/src/coarse.rs index 90def7b5a..5de80ee6c 100644 --- a/sparse_strips/vello_common/src/coarse.rs +++ b/sparse_strips/vello_common/src/coarse.rs @@ -235,7 +235,13 @@ impl Wide { /// - Generate alpha fill commands for the intersected wide tiles /// 2. For active fill regions (determined by fill rule): /// - Generate solid fill commands for the regions between strips - pub fn generate(&mut self, strip_buf: &[Strip], paint: Paint, thread_idx: u8) { + pub fn generate( + &mut self, + strip_buf: &[Strip], + paint: Paint, + thread_idx: u8, + mask: Option, + ) { if strip_buf.is_empty() { return; } @@ -307,6 +313,7 @@ impl Wide { thread_idx, paint: paint.clone(), blend_mode: None, + mask: mask.clone(), }; x += width; col += u32::from(width); @@ -332,8 +339,12 @@ impl Wide { let x_wtile_rel = x % WideTile::WIDTH; let width = x2.min((wtile_x + 1) * WideTile::WIDTH) - x; x += width; - self.get_mut(wtile_x, strip_y) - .fill(x_wtile_rel, width, paint.clone()); + self.get_mut(wtile_x, strip_y).fill( + x_wtile_rel, + width, + paint.clone(), + mask.clone(), + ); } } } @@ -818,7 +829,7 @@ impl WideTile { } } - pub(crate) fn fill(&mut self, x: u16, width: u16, paint: Paint) { + pub(crate) fn fill(&mut self, x: u16, width: u16, paint: Paint, mask: Option) { if !self.is_zero_clip() { match MODE { MODE_CPU => { @@ -836,6 +847,7 @@ impl WideTile { let can_override = x == 0 && width == WideTile::WIDTH && s.is_opaque() + && mask.is_none() && self.n_clip == 0 && self.n_bufs == 0; can_override.then_some(*s) @@ -853,6 +865,7 @@ impl WideTile { width, paint, blend_mode: None, + mask, })); } } @@ -862,6 +875,7 @@ impl WideTile { width, paint, blend_mode: None, + mask, })); } _ => unreachable!(), @@ -1004,6 +1018,8 @@ pub struct CmdFill { pub paint: Paint, /// The blend mode to apply before drawing the contents. pub blend_mode: Option, + /// A mask to apply to the command. + pub mask: Option, } /// Fill a consecutive region of a wide tile with an alpha mask. @@ -1023,6 +1039,8 @@ pub struct CmdAlphaFill { pub paint: Paint, /// A blend mode to apply before drawing the contents. pub blend_mode: Option, + /// A mask to apply to the command. + pub mask: Option, } /// Same as fill, but copies top of clip stack to next on stack. @@ -1097,11 +1115,13 @@ mod tests { 0, 10, Paint::Solid(PremulColor::from_alpha_color(TRANSPARENT)), + None, ); wide.fill( 10, 10, Paint::Solid(PremulColor::from_alpha_color(TRANSPARENT)), + None, ); wide.pop_buf(); @@ -1117,8 +1137,8 @@ mod tests { let mut wide = WideTile::::new(0, 0); wide.push_buf(); - wide.fill(0, 10, paint.clone()); - wide.fill(10, 10, paint.clone()); + wide.fill(0, 10, paint.clone(), None); + wide.fill(10, 10, paint.clone(), None); wide.blend(blend_mode); wide.pop_buf(); @@ -1134,7 +1154,7 @@ mod tests { let mut wide = WideTile::::new(0, 0); wide.push_buf(); - wide.fill(0, 10, paint.clone()); + wide.fill(0, 10, paint.clone(), None); wide.blend(blend_mode); wide.pop_buf(); diff --git a/sparse_strips/vello_common/src/mask.rs b/sparse_strips/vello_common/src/mask.rs index d7b1a4b9e..e45b3efd8 100644 --- a/sparse_strips/vello_common/src/mask.rs +++ b/sparse_strips/vello_common/src/mask.rs @@ -5,15 +5,19 @@ use crate::pixmap::Pixmap; use alloc::sync::Arc; +use alloc::vec::Vec; -/// A mask. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Mask { - data: Arc<[u8]>, +#[derive(Debug, PartialEq, Eq)] +struct MaskRepr { + data: Vec, width: u16, height: u16, } +/// A mask. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Mask(Arc); + impl Mask { /// Create a new alpha mask from the pixmap. pub fn new_alpha(pixmap: &Pixmap) -> Self { @@ -25,55 +29,85 @@ impl Mask { Self::new_with(pixmap, false) } + /// Create a new mask from the given alpha data. + /// + /// The `data` vector must be of length `width * height` exactly. + /// + /// The pixels are in row-major order. + /// + /// # Panics + /// + /// Panics if the `data` vector is not of length `width * height`. + pub fn from_parts(data: Vec, width: u16, height: u16) -> Self { + assert_eq!( + data.len(), + usize::from(width) * usize::from(height), + "Expected `data` to have length of exactly `width * height`" + ); + + Self(Arc::new(MaskRepr { + data, + width, + height, + })) + } + fn new_with(pixmap: &Pixmap, alpha_mask: bool) -> Self { - let data = Arc::from_iter(pixmap.data().iter().map(|pixel| { - if alpha_mask { - pixel.a - } else { - let r = f32::from(pixel.r) / 255.; - let g = f32::from(pixel.g) / 255.; - let b = f32::from(pixel.b) / 255.; + let data = pixmap + .data() + .iter() + .map(|pixel| { + if alpha_mask { + pixel.a + } else { + let r = f32::from(pixel.r) / 255.; + let g = f32::from(pixel.g) / 255.; + let b = f32::from(pixel.b) / 255.; - // See CSS Masking Module Level 1 § 7.10.1 - // - // and Filter Effects Module Level 1 § 9.6 - // . - // Note r, g and b are premultiplied by alpha. - let luma = r * 0.2126 + g * 0.7152 + b * 0.0722; - #[expect(clippy::cast_possible_truncation, reason = "This cannot overflow")] - { - (luma * 255.0 + 0.5) as u8 + // See CSS Masking Module Level 1 § 7.10.1 + // + // and Filter Effects Module Level 1 § 9.6 + // . + // Note r, g and b are premultiplied by alpha. + let luma = r * 0.2126 + g * 0.7152 + b * 0.0722; + #[expect(clippy::cast_possible_truncation, reason = "This cannot overflow")] + { + (luma * 255.0 + 0.5) as u8 + } } - } - })); + }) + .collect::>(); - Self { + Self(Arc::new(MaskRepr { data, width: pixmap.width(), height: pixmap.height(), - } + })) } /// Return the width of the mask. + #[inline] pub fn width(&self) -> u16 { - self.width + self.0.width } /// Return the height of the mask. + #[inline] pub fn height(&self) -> u16 { - self.height + self.0.height } /// Sample the value at a specific location. /// /// This function might panic or yield a wrong result if the location /// is out-of-bounds. + #[inline(always)] pub fn sample(&self, x: u16, y: u16) -> u8 { debug_assert!( - x < self.width && y < self.height, + x < self.0.width && y < self.0.height, "cannot sample mask outside of its range" ); - self.data[y as usize * self.width as usize + x as usize] + self.0.data[y as usize * self.0.width as usize + x as usize] } } diff --git a/sparse_strips/vello_cpu/src/dispatch/mod.rs b/sparse_strips/vello_cpu/src/dispatch/mod.rs index 8c7dd49c3..6680d84b7 100644 --- a/sparse_strips/vello_cpu/src/dispatch/mod.rs +++ b/sparse_strips/vello_cpu/src/dispatch/mod.rs @@ -26,6 +26,7 @@ pub(crate) trait Dispatcher: Debug + Send + Sync { transform: Affine, paint: Paint, aliasing_threshold: Option, + mask: Option, ); fn stroke_path( &mut self, @@ -34,6 +35,7 @@ pub(crate) trait Dispatcher: Debug + Send + Sync { transform: Affine, paint: Paint, aliasing_threshold: Option, + mask: Option, ); fn push_layer( &mut self, diff --git a/sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs b/sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs index 925ab07f1..72a058106 100644 --- a/sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs +++ b/sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs @@ -281,17 +281,20 @@ impl MultiThreadedDispatcher { strips: strip_range, paint, thread_id, + mask, } => self.wide.generate( &task.allocation_group.strips [strip_range.start as usize..strip_range.end as usize], paint.clone(), thread_id, + mask, ), CoarseTaskType::RenderWideCommand { strips, paint, thread_id, - } => self.wide.generate(&strips, paint.clone(), thread_id), + mask, + } => self.wide.generate(&strips, paint.clone(), thread_id, mask), CoarseTaskType::PushLayer { thread_id, clip_path, @@ -387,6 +390,7 @@ impl Dispatcher for MultiThreadedDispatcher { transform: Affine, paint: Paint, aliasing_threshold: Option, + mask: Option, ) { let start = self.allocation_group.path.len() as u32; self.allocation_group.path.extend(path); @@ -397,6 +401,7 @@ impl Dispatcher for MultiThreadedDispatcher { paint, fill_rule, aliasing_threshold, + mask, }); } @@ -407,6 +412,7 @@ impl Dispatcher for MultiThreadedDispatcher { transform: Affine, paint: Paint, aliasing_threshold: Option, + mask: Option, ) { let start = self.allocation_group.path.len() as u32; self.allocation_group.path.extend(path); @@ -417,6 +423,7 @@ impl Dispatcher for MultiThreadedDispatcher { paint, stroke: stroke.clone(), aliasing_threshold, + mask, }); } @@ -696,6 +703,7 @@ pub(crate) enum RenderTaskType { paint: Paint, fill_rule: Fill, aliasing_threshold: Option, + mask: Option, }, WideCommand { strip_buf: Box<[Strip]>, @@ -708,6 +716,7 @@ pub(crate) enum RenderTaskType { paint: Paint, stroke: Stroke, aliasing_threshold: Option, + mask: Option, }, PushLayer { clip_path: Option<(Range, Affine)>, @@ -730,11 +739,13 @@ pub(crate) enum CoarseTaskType { thread_id: u8, strips: Range, paint: Paint, + mask: Option, }, RenderWideCommand { thread_id: u8, strips: Box<[Strip]>, paint: Paint, + mask: Option, }, PushLayer { thread_id: u8, @@ -811,6 +822,7 @@ mod tests { Affine::IDENTITY, Paint::Solid(PremulColor::from_alpha_color(BLUE)), None, + None, ); dispatcher.flush(); } diff --git a/sparse_strips/vello_cpu/src/dispatch/multi_threaded/worker.rs b/sparse_strips/vello_cpu/src/dispatch/multi_threaded/worker.rs index 9d1ca1f71..f8c7051b9 100644 --- a/sparse_strips/vello_cpu/src/dispatch/multi_threaded/worker.rs +++ b/sparse_strips/vello_cpu/src/dispatch/multi_threaded/worker.rs @@ -62,6 +62,7 @@ impl Worker { paint, fill_rule, aliasing_threshold, + mask, } => { let start = self.strip_storage.strips.len() as u32; let path = &render_task.allocation_group.path @@ -80,6 +81,7 @@ impl Worker { thread_id: self.thread_id, strips: start..end, paint, + mask, }; render_task @@ -93,6 +95,7 @@ impl Worker { paint, stroke, aliasing_threshold, + mask, } => { let start = self.strip_storage.strips.len() as u32; let path = &render_task.allocation_group.path @@ -111,6 +114,7 @@ impl Worker { thread_id: self.thread_id, strips: start..end, paint, + mask, }; render_task @@ -174,6 +178,7 @@ impl Worker { thread_id: thread_idx, strips: strip_buf, paint, + mask: None, }; render_task diff --git a/sparse_strips/vello_cpu/src/dispatch/single_threaded.rs b/sparse_strips/vello_cpu/src/dispatch/single_threaded.rs index 09b8cc6c6..ba7b1d1d8 100644 --- a/sparse_strips/vello_cpu/src/dispatch/single_threaded.rs +++ b/sparse_strips/vello_cpu/src/dispatch/single_threaded.rs @@ -97,6 +97,7 @@ impl Dispatcher for SingleThreadedDispatcher { transform: Affine, paint: Paint, aliasing_threshold: Option, + mask: Option, ) { let wide = &mut self.wide; @@ -108,7 +109,7 @@ impl Dispatcher for SingleThreadedDispatcher { &mut self.strip_storage, ); - wide.generate(&self.strip_storage.strips, paint, 0); + wide.generate(&self.strip_storage.strips, paint, 0, mask); } fn stroke_path( @@ -118,6 +119,7 @@ impl Dispatcher for SingleThreadedDispatcher { transform: Affine, paint: Paint, aliasing_threshold: Option, + mask: Option, ) { let wide = &mut self.wide; @@ -129,7 +131,7 @@ impl Dispatcher for SingleThreadedDispatcher { &mut self.strip_storage, ); - wide.generate(&self.strip_storage.strips, paint, 0); + wide.generate(&self.strip_storage.strips, paint, 0, mask); } fn push_layer( @@ -188,7 +190,8 @@ impl Dispatcher for SingleThreadedDispatcher { } fn generate_wide_cmd(&mut self, strip_buf: &[Strip], paint: Paint) { - self.wide.generate(strip_buf, paint, 0); + // Masks are not supported in recordings, so just pass `None` for now. + self.wide.generate(strip_buf, paint, 0, None); } fn strip_storage_mut(&mut self) -> &mut StripStorage { @@ -258,6 +261,7 @@ mod tests { Affine::IDENTITY, Paint::Solid(PremulColor::from_alpha_color(BLUE)), None, + None, ); // Ensure there is data to clear. diff --git a/sparse_strips/vello_cpu/src/fine/highp/mod.rs b/sparse_strips/vello_cpu/src/fine/highp/mod.rs index 2c44268ff..7d335f475 100644 --- a/sparse_strips/vello_cpu/src/fine/highp/mod.rs +++ b/sparse_strips/vello_cpu/src/fine/highp/mod.rs @@ -6,6 +6,7 @@ use crate::fine::{COLOR_COMPONENTS, Painter}; use crate::peniko::BlendMode; use crate::region::Region; use vello_common::fearless_simd::*; +use vello_common::mask::Mask; use vello_common::paint::PremulColor; use vello_common::tile::Tile; @@ -99,7 +100,12 @@ impl FineKernel for F32Kernel { alphas: Option<&[u8]>, ) { if let Some(alphas) = alphas { - alpha_fill::alpha_composite_solid(simd, dest, src, alphas); + alpha_fill::alpha_composite_solid( + simd, + dest, + src, + alphas.chunks_exact(4).map(|c| [c[0], c[1], c[2], c[3]]), + ); } else { fill::alpha_composite_solid(simd, dest, src); } @@ -116,7 +122,7 @@ impl FineKernel for F32Kernel { simd, dest, src.chunks_exact(16).map(|el| f32x16::from_slice(simd, el)), - alphas, + alphas.chunks_exact(4).map(|c| [c[0], c[1], c[2], c[3]]), ); } else { fill::alpha_composite_arbitrary( @@ -130,14 +136,56 @@ impl FineKernel for F32Kernel { fn blend( simd: S, dest: &mut [Self::Numeric], + mut start_x: u16, + start_y: u16, src: impl Iterator, blend_mode: BlendMode, alphas: Option<&[u8]>, + mask: Option<&Mask>, ) { - if let Some(alphas) = alphas { - alpha_fill::blend(simd, dest, src, alphas, blend_mode); - } else { - fill::blend(simd, dest, src, blend_mode); + let alpha_iter = alphas.map(|a| a.chunks_exact(4).map(|d| [d[0], d[1], d[2], d[3]])); + + let mask_iter = mask.map(|m| { + core::iter::from_fn(|| { + let sample = |x: u16, y: u16| { + if x < m.width() && y < m.height() { + m.sample(x, y) + } else { + 255 + } + }; + + let samples = [ + sample(start_x, start_y), + sample(start_x, start_y + 1), + sample(start_x, start_y + 2), + sample(start_x, start_y + 3), + ]; + + start_x += 1; + + Some(samples) + }) + }); + + match (alpha_iter, mask_iter) { + (Some(alpha_iter), Some(mut mask_iter)) => { + let iter = alpha_iter.map(|a1| { + let a2 = mask_iter.next().unwrap(); + [ + ((a1[0] as u16 * a2[0] as u16) / 255) as u8, + ((a1[1] as u16 * a2[1] as u16) / 255) as u8, + ((a1[2] as u16 * a2[2] as u16) / 255) as u8, + ((a1[3] as u16 * a2[3] as u16) / 255) as u8, + ] + }); + alpha_fill::blend(simd, dest, src, iter, blend_mode); + } + (None, Some(mask_iter)) => alpha_fill::blend(simd, dest, src, mask_iter, blend_mode), + (Some(alpha_iter), None) => alpha_fill::blend(simd, dest, src, alpha_iter, blend_mode), + (None, None) => { + fill::blend(simd, dest, src, blend_mode); + } } } } @@ -225,7 +273,7 @@ mod alpha_fill { s: S, dest: &mut [f32], src: [f32; 4], - alphas: &[u8], + alphas: impl Iterator, ) { s.vectorize( #[inline(always)] @@ -234,33 +282,29 @@ mod alpha_fill { let src_c = f32x16::block_splat(src.simd_into(s)); let one = f32x16::splat(s, 1.0); - for (next_dest, next_mask) in dest.chunks_exact_mut(16).zip(alphas.chunks_exact(4)) - { - alpha_composite_inner(s, next_dest, next_mask, src_c, src_a, one); + for (next_dest, next_mask) in dest.chunks_exact_mut(16).zip(alphas) { + alpha_composite_inner(s, next_dest, &next_mask, src_c, src_a, one); } }, ); } - #[inline(always)] pub(super) fn alpha_composite_arbitrary>>( simd: S, dest: &mut [f32], src: T, - alphas: &[u8], + alphas: impl Iterator, ) { simd.vectorize( #[inline(always)] || { let one = f32x16::splat(simd, 1.0); - for ((next_dest, next_mask), next_src) in dest - .chunks_exact_mut(16) - .zip(alphas.chunks_exact(4)) - .zip(src) + for ((next_dest, next_mask), next_src) in + dest.chunks_exact_mut(16).zip(alphas).zip(src) { let src_a = next_src.splat_4th(); - alpha_composite_inner(simd, next_dest, next_mask, next_src, src_a, one); + alpha_composite_inner(simd, next_dest, &next_mask, next_src, src_a, one); } }, ); @@ -270,18 +314,16 @@ mod alpha_fill { simd: S, dest: &mut [f32], src: T, - alphas: &[u8], + alphas: impl Iterator, blend_mode: BlendMode, ) { simd.vectorize( #[inline(always)] || { - for ((next_dest, next_mask), next_src) in dest - .chunks_exact_mut(16) - .zip(alphas.chunks_exact(4)) - .zip(src) + for ((next_dest, next_mask), next_src) in + dest.chunks_exact_mut(16).zip(alphas).zip(src) { - let masks = extract_masks(simd, next_mask); + let masks = extract_masks(simd, &next_mask); let bg = f32x16::from_slice(simd, next_dest); let src_c = blend::mix(next_src, bg, blend_mode); let res = blend_mode.compose(simd, src_c, bg, masks); @@ -295,7 +337,7 @@ mod alpha_fill { fn alpha_composite_inner( s: S, dest: &mut [f32], - masks: &[u8], + masks: &[u8; 4], src_c: f32x16, src_a: f32x16, one: f32x16, @@ -310,7 +352,7 @@ mod alpha_fill { } #[inline(always)] -fn extract_masks(simd: S, masks: &[u8]) -> f32x16 { +fn extract_masks(simd: S, masks: &[u8; 4]) -> f32x16 { let mut base_mask = [ masks[0] as f32, masks[1] as f32, diff --git a/sparse_strips/vello_cpu/src/fine/lowp/mod.rs b/sparse_strips/vello_cpu/src/fine/lowp/mod.rs index e4b74456c..302213827 100644 --- a/sparse_strips/vello_cpu/src/fine/lowp/mod.rs +++ b/sparse_strips/vello_cpu/src/fine/lowp/mod.rs @@ -11,10 +11,13 @@ use crate::fine::{FineKernel, highp, u8_to_f32}; use crate::peniko::BlendMode; use crate::region::Region; use crate::util::Div255Ext; +use crate::util::scalar::div_255; use bytemuck::cast_slice; +use core::iter; use vello_common::coarse::WideTile; use vello_common::encode::{EncodedGradient, EncodedImage}; use vello_common::fearless_simd::*; +use vello_common::mask::Mask; use vello_common::paint::PremulColor; use vello_common::pixmap::Pixmap; use vello_common::tile::Tile; @@ -122,7 +125,14 @@ impl FineKernel for U8Kernel { alphas: Option<&[u8]>, ) { if let Some(alphas) = alphas { - alpha_fill::alpha_composite_solid(simd, dest, src, alphas); + alpha_fill::alpha_composite_solid( + simd, + dest, + src, + alphas + .chunks_exact(8) + .map(|d| [d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]]), + ); } else { fill::alpha_composite_solid(simd, dest, src); } @@ -137,7 +147,14 @@ impl FineKernel for U8Kernel { let src_iter = src.chunks_exact(32).map(|el| u8x32::from_slice(simd, el)); if let Some(alphas) = alphas { - alpha_fill::alpha_composite(simd, dest, src_iter, alphas); + alpha_fill::alpha_composite( + simd, + dest, + src_iter, + alphas + .chunks_exact(8) + .map(|d| [d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]]), + ); } else { fill::alpha_composite(simd, dest, src_iter); } @@ -146,14 +163,67 @@ impl FineKernel for U8Kernel { fn blend( simd: S, dest: &mut [Self::Numeric], + mut start_x: u16, + start_y: u16, src: impl Iterator, blend_mode: BlendMode, alphas: Option<&[u8]>, + mask: Option<&Mask>, ) { - if let Some(alphas) = alphas { - alpha_fill::blend(simd, dest, src, blend_mode, alphas); - } else { - fill::blend(simd, dest, src, blend_mode); + let alpha_iter = alphas.map(|a| { + a.chunks_exact(8) + .map(|d| [d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]]) + }); + + let mask_iter = mask.map(|m| { + iter::from_fn(|| { + let sample = |x: u16, y: u16| { + if x < m.width() && y < m.height() { + m.sample(x, y) + } else { + 255 + } + }; + + let samples = [ + sample(start_x, start_y), + sample(start_x, start_y + 1), + sample(start_x, start_y + 2), + sample(start_x, start_y + 3), + sample(start_x + 1, start_y), + sample(start_x + 1, start_y + 1), + sample(start_x + 1, start_y + 2), + sample(start_x + 1, start_y + 3), + ]; + + start_x += 2; + + Some(samples) + }) + }); + + match (alpha_iter, mask_iter) { + (Some(alpha_iter), Some(mut mask_iter)) => { + let iter = alpha_iter.map(|a1| { + let a2 = mask_iter.next().unwrap(); + [ + div_255(a1[0] as u16 * a2[0] as u16) as u8, + div_255(a1[1] as u16 * a2[1] as u16) as u8, + div_255(a1[2] as u16 * a2[2] as u16) as u8, + div_255(a1[3] as u16 * a2[3] as u16) as u8, + div_255(a1[4] as u16 * a2[4] as u16) as u8, + div_255(a1[5] as u16 * a2[5] as u16) as u8, + div_255(a1[6] as u16 * a2[6] as u16) as u8, + div_255(a1[7] as u16 * a2[7] as u16) as u8, + ] + }); + alpha_fill::blend(simd, dest, src, blend_mode, iter); + } + (None, Some(mask_iter)) => alpha_fill::blend(simd, dest, src, blend_mode, mask_iter), + (Some(alpha_iter), None) => alpha_fill::blend(simd, dest, src, blend_mode, alpha_iter), + (None, None) => { + fill::blend(simd, dest, src, blend_mode); + } } } } @@ -255,7 +325,7 @@ mod alpha_fill { dest: &mut [u8], src: T, blend_mode: BlendMode, - alphas: &[u8], + alphas: impl Iterator, ) { simd.vectorize( #[inline(always)] @@ -263,10 +333,8 @@ mod alpha_fill { #[expect(deprecated, reason = "Provided by the user, need to handle correctly.")] let default_mix = matches!(blend_mode.mix, Mix::Normal | Mix::Clip); - for ((next_bg, next_mask), next_src) in dest - .chunks_exact_mut(32) - .zip(alphas.chunks_exact(8)) - .zip(src) + for ((next_bg, next_mask), next_src) in + dest.chunks_exact_mut(32).zip(alphas).zip(src) { let bg_v = u8x32::from_slice(simd, next_bg); let src_c = if default_mix { @@ -274,7 +342,7 @@ mod alpha_fill { } else { mix(next_src, bg_v, blend_mode) }; - let masks = extract_masks(simd, next_mask); + let masks = extract_masks(simd, &next_mask); let res = blend_mode.compose(simd, src_c, bg_v, masks); next_bg.copy_from_slice(&res.val); @@ -288,7 +356,7 @@ mod alpha_fill { s: S, dest: &mut [u8], src: [u8; 4], - alphas: &[u8], + alphas: impl Iterator, ) { s.vectorize( #[inline(always)] @@ -297,8 +365,8 @@ mod alpha_fill { let src_c = u32x8::splat(s, u32::from_ne_bytes(src)).reinterpret_u8(); let one = u8x32::splat(s, 255); - for (next_bg, next_mask) in dest.chunks_exact_mut(32).zip(alphas.chunks_exact(8)) { - alpha_composite_inner(s, next_bg, next_mask, src_c, src_a, one); + for (next_bg, next_mask) in dest.chunks_exact_mut(32).zip(alphas) { + alpha_composite_inner(s, next_bg, &next_mask, src_c, src_a, one); } }, ); @@ -309,20 +377,18 @@ mod alpha_fill { simd: S, dest: &mut [u8], src: T, - alphas: &[u8], + alphas: impl Iterator, ) { simd.vectorize( #[inline(always)] || { let one = u8x32::splat(simd, 255); - for ((next_dest, next_mask), next_src) in dest - .chunks_exact_mut(32) - .zip(alphas.chunks_exact(8)) - .zip(src) + for ((next_dest, next_mask), next_src) in + dest.chunks_exact_mut(32).zip(alphas).zip(src) { let src_a = next_src.splat_4th(); - alpha_composite_inner(simd, next_dest, next_mask, next_src, src_a, one); + alpha_composite_inner(simd, next_dest, &next_mask, next_src, src_a, one); } }, ); @@ -332,7 +398,7 @@ mod alpha_fill { fn alpha_composite_inner( s: S, dest: &mut [u8], - masks: &[u8], + masks: &[u8; 8], src_c: u8x32, src_a: u8x32, one: u8x32, @@ -385,7 +451,7 @@ fn mix(src_c: u8x32, bg_c: u8x32, blend_mode: BlendMode) -> u8x32 } #[inline(always)] -fn extract_masks(simd: S, masks: &[u8]) -> u8x32 { +fn extract_masks(simd: S, masks: &[u8; 8]) -> u8x32 { let m1 = u32x4::splat(simd, u32::from_ne_bytes(masks[0..4].try_into().unwrap())).reinterpret_u8(); let m2 = diff --git a/sparse_strips/vello_cpu/src/fine/mod.rs b/sparse_strips/vello_cpu/src/fine/mod.rs index 0b6326dad..60f8e2a21 100644 --- a/sparse_strips/vello_cpu/src/fine/mod.rs +++ b/sparse_strips/vello_cpu/src/fine/mod.rs @@ -35,6 +35,7 @@ pub use lowp::U8Kernel; use vello_common::fearless_simd::{ Simd, SimdBase, SimdFloat, SimdInto, f32x4, f32x8, f32x16, u8x16, u8x32, u32x4, u32x8, }; +use vello_common::mask::Mask; use vello_common::pixmap::Pixmap; use vello_common::simd::Splat4thExt; use vello_common::util::f32_to_u8; @@ -275,9 +276,12 @@ pub trait FineKernel: Send + Sync + 'static { fn blend( simd: S, dest: &mut [Self::Numeric], + start_x: u16, + start_y: u16, src: impl Iterator, blend_mode: BlendMode, alphas: Option<&[u8]>, + mask: Option<&Mask>, ); } @@ -334,6 +338,7 @@ impl> Fine { .unwrap_or(BlendMode::new(Mix::Normal, Compose::SrcOver)), paints, None, + f.mask.as_ref(), ); } Cmd::AlphaFill(s) => { @@ -345,6 +350,7 @@ impl> Fine { .unwrap_or(BlendMode::new(Mix::Normal, Compose::SrcOver)), paints, Some(&alphas[s.alpha_idx..]), + s.mask.as_ref(), ); } Cmd::PushBuf => { @@ -430,6 +436,7 @@ impl> Fine { blend_mode: BlendMode, encoded_paints: &[EncodedPaint], alphas: Option<&[u8]>, + mask: Option<&Mask>, ) { let blend_buf = &mut self.blend_buf.last_mut().unwrap()[x * TILE_HEIGHT_COMPONENTS..] [..TILE_HEIGHT_COMPONENTS * width]; @@ -441,21 +448,31 @@ impl> Fine { // If color is completely opaque, we can just directly override // the blend buffer. - if color[3] == T::Numeric::ONE && default_blend && alphas.is_none() { + if color[3] == T::Numeric::ONE + && default_blend + && alphas.is_none() + && mask.is_none() + { T::copy_solid(self.simd, blend_buf, color); return; } - if default_blend { + if default_blend && mask.is_none() { T::alpha_composite_solid(self.simd, blend_buf, color, alphas); } else { + let start_x = self.wide_coords.0 * WideTile::WIDTH + x as u16; + let start_y = self.wide_coords.1 * Tile::HEIGHT; + T::blend( self.simd, blend_buf, + start_x, + start_y, iter::repeat(T::Composite::from_color(self.simd, color)), blend_mode, alphas, + mask, ); } } @@ -475,17 +492,20 @@ impl> Fine { if $has_opacities || alphas.is_some() { T::apply_painter(self.simd, color_buf, $filler); - if default_blend { + if default_blend && mask.is_none() { T::alpha_composite_buffer(self.simd, blend_buf, color_buf, alphas); } else { T::blend( self.simd, blend_buf, + start_x, + start_y, color_buf .chunks_exact(T::Composite::LENGTH) .map(|s| T::Composite::from_slice(self.simd, s)), blend_mode, alphas, + mask, ); } } else { @@ -620,11 +640,16 @@ impl> Fine { T::blend( self.simd, target_buffer, + // `start_x` and `start_y` are only needed to sample the correct position + // of a mask, so we can just pass dummy values here. + 0, + 0, source_buffer .chunks_exact(T::Composite::LENGTH) .map(|s| T::Composite::from_slice(self.simd, s)), blend_mode, None, + None, ); } } diff --git a/sparse_strips/vello_cpu/src/render.rs b/sparse_strips/vello_cpu/src/render.rs index ebcb5ef37..1989492ac 100644 --- a/sparse_strips/vello_cpu/src/render.rs +++ b/sparse_strips/vello_cpu/src/render.rs @@ -43,6 +43,7 @@ pub struct RenderContext { pub(crate) height: u16, pub(crate) paint: PaintType, pub(crate) paint_transform: Affine, + pub(crate) mask: Option, pub(crate) stroke: Stroke, pub(crate) transform: Affine, pub(crate) fill_rule: Fill, @@ -142,6 +143,7 @@ impl RenderContext { aliasing_threshold, paint, render_settings: settings, + mask: None, paint_transform, fill_rule, stroke, @@ -178,6 +180,7 @@ impl RenderContext { self.transform, paint, self.aliasing_threshold, + self.mask.clone(), ); } @@ -190,6 +193,7 @@ impl RenderContext { self.transform, paint, self.aliasing_threshold, + self.mask.clone(), ); } @@ -203,6 +207,7 @@ impl RenderContext { self.transform, paint, self.aliasing_threshold, + self.mask.clone(), ); } @@ -254,6 +259,7 @@ impl RenderContext { self.transform, paint, self.aliasing_threshold, + self.mask.clone(), ); } @@ -267,6 +273,7 @@ impl RenderContext { self.transform, paint, self.aliasing_threshold, + self.mask.clone(), ); } @@ -398,6 +405,11 @@ impl RenderContext { self.fill_rule = fill_rule; } + /// Set the mask to use for path-painting operations. + pub fn set_mask(&mut self, mask: Option) { + self.mask = mask; + } + /// Get the current fill rule. pub fn fill_rule(&self) -> &Fill { &self.fill_rule @@ -422,6 +434,7 @@ impl RenderContext { pub fn reset(&mut self) { self.dispatcher.reset(); self.encoded_paints.clear(); + self.mask = None; self.reset_transform(); self.reset_paint_transform(); #[cfg(feature = "text")] @@ -502,6 +515,7 @@ impl GlyphRenderer for RenderContext { prepared_glyph.transform, paint, self.aliasing_threshold, + self.mask.clone(), ); } GlyphType::Bitmap(glyph) => { @@ -607,6 +621,7 @@ impl GlyphRenderer for RenderContext { prepared_glyph.transform, paint, self.aliasing_threshold, + self.mask.clone(), ); } GlyphType::Bitmap(_) | GlyphType::Colr(_) => { diff --git a/sparse_strips/vello_cpu/src/util.rs b/sparse_strips/vello_cpu/src/util.rs index 6acd24bde..14264e14e 100644 --- a/sparse_strips/vello_cpu/src/util.rs +++ b/sparse_strips/vello_cpu/src/util.rs @@ -6,11 +6,6 @@ use vello_common::encode::EncodedImage; use vello_common::fearless_simd::{Simd, SimdBase, f32x4, u8x32, u16x16, u16x32}; use vello_common::math::FloatExt; -#[allow( - dead_code, - reason = "this is not used because the division by 255 is now done with SIMD, but\ -we still keep it around to document its properties." -)] pub(crate) mod scalar { /// Perform an approximate division by 255. /// diff --git a/sparse_strips/vello_hybrid/src/scene.rs b/sparse_strips/vello_hybrid/src/scene.rs index 40234d8c9..4f827d835 100644 --- a/sparse_strips/vello_hybrid/src/scene.rs +++ b/sparse_strips/vello_hybrid/src/scene.rs @@ -186,7 +186,7 @@ impl Scene { aliasing_threshold, &mut self.strip_storage, ); - wide.generate(&self.strip_storage.strips, paint, 0); + wide.generate(&self.strip_storage.strips, paint, 0, None); } /// Stroke a path with the current paint and stroke settings. @@ -217,7 +217,7 @@ impl Scene { &mut self.strip_storage, ); - wide.generate(&self.strip_storage.strips, paint, 0); + wide.generate(&self.strip_storage.strips, paint, 0, None); } /// Set the aliasing threshold. @@ -626,7 +626,8 @@ impl Scene { "Invalid strip range: start={start}, end={end}, count={count}" ); let paint = self.encode_current_paint(); - self.wide.generate(&adjusted_strips[start..end], paint, 0); + self.wide + .generate(&adjusted_strips[start..end], paint, 0, None); } /// Prepare cached strips for rendering by adjusting alpha indices and extending alpha buffer. diff --git a/sparse_strips/vello_sparse_tests/snapshots/mask_non_isolated.png b/sparse_strips/vello_sparse_tests/snapshots/mask_non_isolated.png new file mode 100644 index 000000000..2da6ace59 --- /dev/null +++ b/sparse_strips/vello_sparse_tests/snapshots/mask_non_isolated.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fffe5139b6ad63b2b7251c97f19851607a9617d84c3ba733f991e8d5c94c2f32 +size 263 diff --git a/sparse_strips/vello_sparse_tests/tests/mask.rs b/sparse_strips/vello_sparse_tests/tests/mask.rs index 39dd07eeb..c089f3018 100644 --- a/sparse_strips/vello_sparse_tests/tests/mask.rs +++ b/sparse_strips/vello_sparse_tests/tests/mask.rs @@ -77,3 +77,14 @@ fn mask_alpha(ctx: &mut impl Renderer) { fn mask_luminance(ctx: &mut impl Renderer) { mask(ctx, false); } + +#[vello_test(skip_hybrid)] +fn mask_non_isolated(ctx: &mut impl Renderer) { + let mask = example_mask(false); + + ctx.set_paint(BLACK); + ctx.fill_rect(&Rect::new(10.0, 10.0, 90.0, 90.0)); + ctx.set_mask(Some(mask)); + ctx.set_paint(RED); + ctx.fill_rect(&Rect::new(10.0, 10.0, 90.0, 90.0)); +} diff --git a/sparse_strips/vello_sparse_tests/tests/renderer.rs b/sparse_strips/vello_sparse_tests/tests/renderer.rs index e2ce9a185..f04a5e037 100644 --- a/sparse_strips/vello_sparse_tests/tests/renderer.rs +++ b/sparse_strips/vello_sparse_tests/tests/renderer.rs @@ -46,6 +46,7 @@ pub(crate) trait Renderer: Sized { fn push_mask_layer(&mut self, mask: Mask); fn pop_layer(&mut self); fn set_stroke(&mut self, stroke: Stroke); + fn set_mask(&mut self, mask: Option); fn set_paint(&mut self, paint: impl Into); fn set_paint_transform(&mut self, affine: Affine); fn set_fill_rule(&mut self, fill_rule: Fill); @@ -141,6 +142,10 @@ impl Renderer for RenderContext { Self::set_stroke(self, stroke); } + fn set_mask(&mut self, mask: Option) { + Self::set_mask(self, mask); + } + fn set_paint(&mut self, paint: impl Into) { Self::set_paint(self, paint); } @@ -327,6 +332,10 @@ impl Renderer for HybridRenderer { self.scene.set_stroke(stroke); } + fn set_mask(&mut self, _: Option) { + unimplemented!() + } + fn set_paint(&mut self, paint: impl Into) { let paint_type: PaintType = paint.into(); match paint_type { @@ -611,6 +620,10 @@ impl Renderer for HybridRenderer { self.scene.set_stroke(stroke); } + fn set_mask(&mut self, _: Option) { + unimplemented!() + } + fn set_paint(&mut self, paint: impl Into) { let paint_type: PaintType = paint.into(); match paint_type { diff --git a/sparse_strips/vello_toy/src/debug.rs b/sparse_strips/vello_toy/src/debug.rs index 01732a8f6..d9331848e 100644 --- a/sparse_strips/vello_toy/src/debug.rs +++ b/sparse_strips/vello_toy/src/debug.rs @@ -87,7 +87,7 @@ fn main() { } if stages.iter().any(|s| s.requires_wide_tiles()) { - wide.generate(&strip_buf, BLACK.into(), 0); + wide.generate(&strip_buf, BLACK.into(), 0, None); } draw_grid(&mut document, args.width, args.height);