diff --git a/src/decoder.rs b/src/decoder.rs index de2c64b..b36275b 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -271,6 +271,43 @@ pub enum LoopCount { Times(NonZeroU16), } +/// WebP decoder configuration options +#[derive(Clone)] +#[non_exhaustive] +pub struct WebPDecodeOptions { + /// The upsampling method used in conversion from lossy yuv to rgb + /// + /// Defaults to `Bilinear`. + pub lossy_upsampling: UpsamplingMethod, +} + +impl Default for WebPDecodeOptions { + fn default() -> Self { + Self { + lossy_upsampling: UpsamplingMethod::Bilinear, + } + } +} + +/// Methods for upsampling the chroma values in lossy decoding +/// +/// The chroma red and blue planes are encoded in VP8 as half the size of the luma plane +/// Therefore we need to upsample these values up to fit each pixel in the image. +#[derive(Clone, Copy, Default)] +pub enum UpsamplingMethod { + /// Fancy upsampling + /// + /// Does bilinear interpolation using the 4 values nearest to the pixel, weighting based on the distance + /// from the pixel. + #[default] + Bilinear, + /// Simple upsampling, just uses the closest u/v value to the pixel when upsampling + /// + /// Matches the -nofancy option in dwebp. + /// Should be faster but may lead to slightly jagged edges. + Simple, +} + /// WebP image format decoder. pub struct WebPDecoder { r: R, @@ -289,12 +326,23 @@ pub struct WebPDecoder { loop_duration: u64, chunks: HashMap>, + + webp_decode_options: WebPDecodeOptions, } impl WebPDecoder { /// Create a new `WebPDecoder` from the reader `r`. The decoder performs many small reads, so the /// reader should be buffered. pub fn new(r: R) -> Result { + Self::new_with_options(r, WebPDecodeOptions::default()) + } + + /// Create a new `WebPDecoder` from the reader `r` with the options `WebPDecodeOptions`. The decoder + /// performs many small reads, so the reader should be buffered. + pub fn new_with_options( + r: R, + webp_decode_options: WebPDecodeOptions, + ) -> Result { let mut decoder = Self { r, width: 0, @@ -308,6 +356,7 @@ impl WebPDecoder { has_alpha: false, loop_count: LoopCount::Times(NonZeroU16::new(1).unwrap()), loop_duration: 0, + webp_decode_options, }; decoder.read_data()?; Ok(decoder) @@ -653,7 +702,7 @@ impl WebPDecoder { } if self.has_alpha() { - frame.fill_rgba(buf); + frame.fill_rgba(buf, self.webp_decode_options.lossy_upsampling); let range = self .chunks @@ -684,7 +733,7 @@ impl WebPDecoder { } } } else { - frame.fill_rgb(buf); + frame.fill_rgb(buf, self.webp_decode_options.lossy_upsampling); } } @@ -758,7 +807,7 @@ impl WebPDecoder { return Err(DecodingError::InconsistentImageSizes); } let mut rgb_frame = vec![0; frame_width as usize * frame_height as usize * 3]; - raw_frame.fill_rgb(&mut rgb_frame); + raw_frame.fill_rgb(&mut rgb_frame, self.webp_decode_options.lossy_upsampling); (rgb_frame, false) } WebPRiffChunk::VP8L => { @@ -789,7 +838,7 @@ impl WebPDecoder { let frame = Vp8Decoder::decode_frame((&mut self.r).take(next_chunk_size))?; let mut rgba_frame = vec![0; frame_width as usize * frame_height as usize * 4]; - frame.fill_rgba(&mut rgba_frame); + frame.fill_rgba(&mut rgba_frame, self.webp_decode_options.lossy_upsampling); for y in 0..frame.height { for x in 0..frame.width { @@ -880,6 +929,11 @@ impl WebPDecoder { self.animation.next_frame_start = self.chunks.get(&WebPRiffChunk::ANMF).unwrap().start - 8; self.animation.dispose_next_frame = true; } + + /// Sets the upsampling method that is used in lossy decoding + pub fn set_lossy_upsampling(&mut self, upsampling_method: UpsamplingMethod) { + self.webp_decode_options.lossy_upsampling = upsampling_method; + } } pub(crate) fn range_reader( diff --git a/src/lib.rs b/src/lib.rs index dedc9f8..fe29eae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,7 +9,9 @@ #[cfg(all(test, feature = "_benchmarks"))] extern crate test; -pub use self::decoder::{DecodingError, LoopCount, WebPDecoder}; +pub use self::decoder::{ + DecodingError, LoopCount, UpsamplingMethod, WebPDecodeOptions, WebPDecoder, +}; pub use self::encoder::{ColorType, EncoderParams, EncodingError, WebPEncoder}; mod alpha_blending; @@ -22,5 +24,6 @@ mod lossless; mod lossless_transform; mod transform; mod vp8_arithmetic_decoder; +mod yuv; pub mod vp8; diff --git a/src/vp8.rs b/src/vp8.rs index 47ec1bd..ac14b93 100644 --- a/src/vp8.rs +++ b/src/vp8.rs @@ -14,7 +14,8 @@ use byteorder_lite::{LittleEndian, ReadBytesExt}; use std::default::Default; use std::io::Read; -use crate::decoder::DecodingError; +use crate::decoder::{DecodingError, UpsamplingMethod}; +use crate::yuv; use super::vp8_arithmetic_decoder::ArithmeticDecoder; use super::{loop_filter, transform}; @@ -802,7 +803,6 @@ pub struct Frame { } impl Frame { - /// Chroma plane is half the size of the Luma plane const fn chroma_width(&self) -> u16 { self.width.div_ceil(2) } @@ -816,168 +816,65 @@ impl Frame { } } - /// Fills an rgb buffer with the image - pub(crate) fn fill_rgb(&self, buf: &mut [u8]) { + /// Fills an rgb buffer from the YUV buffers + pub(crate) fn fill_rgb(&self, buf: &mut [u8], upsampling_method: UpsamplingMethod) { const BPP: usize = 3; - let buffer_width = usize::from(self.buffer_width()); - - let u_row_twice_iter = self - .ubuf - .chunks_exact(buffer_width / 2) - .flat_map(|n| std::iter::repeat(n).take(2)); - let v_row_twice_iter = self - .vbuf - .chunks_exact(buffer_width / 2) - .flat_map(|n| std::iter::repeat(n).take(2)); - - for (((row, y_row), u_row), v_row) in buf - .chunks_exact_mut(usize::from(self.width) * BPP) - .zip(self.ybuf.chunks_exact(buffer_width)) - .zip(u_row_twice_iter) - .zip(v_row_twice_iter) - { - Self::fill_rgb_row( - &y_row[..usize::from(self.width)], - &u_row[..usize::from(self.chroma_width())], - &v_row[..usize::from(self.chroma_width())], - row, - ); - } - } - - fn fill_rgb_row(y_vec: &[u8], u_vec: &[u8], v_vec: &[u8], rgb: &mut [u8]) { - // Fill 2 pixels per iteration: these pixels share `u` and `v` components - let mut rgb_chunks = rgb.chunks_exact_mut(6); - let mut y_chunks = y_vec.chunks_exact(2); - let mut u_iter = u_vec.iter(); - let mut v_iter = v_vec.iter(); - - for (((rgb, y), &u), &v) in (&mut rgb_chunks) - .zip(&mut y_chunks) - .zip(&mut u_iter) - .zip(&mut v_iter) - { - let coeffs = [ - mulhi(v, 26149), - mulhi(u, 6419), - mulhi(v, 13320), - mulhi(u, 33050), - ]; - - rgb[0] = clip(mulhi(y[0], 19077) + coeffs[0] - 14234); - rgb[1] = clip(mulhi(y[0], 19077) - coeffs[1] - coeffs[2] + 8708); - rgb[2] = clip(mulhi(y[0], 19077) + coeffs[3] - 17685); - - rgb[3] = clip(mulhi(y[1], 19077) + coeffs[0] - 14234); - rgb[4] = clip(mulhi(y[1], 19077) - coeffs[1] - coeffs[2] + 8708); - rgb[5] = clip(mulhi(y[1], 19077) + coeffs[3] - 17685); - } - - let remainder = rgb_chunks.into_remainder(); - if remainder.len() >= 3 { - if let (Some(&y), Some(&u), Some(&v)) = ( - y_chunks.remainder().iter().next(), - u_iter.next(), - v_iter.next(), - ) { - let coeffs = [ - mulhi(v, 26149), - mulhi(u, 6419), - mulhi(v, 13320), - mulhi(u, 33050), - ]; - - remainder[0] = clip(mulhi(y, 19077) + coeffs[0] - 14234); - remainder[1] = clip(mulhi(y, 19077) - coeffs[1] - coeffs[2] + 8708); - remainder[2] = clip(mulhi(y, 19077) + coeffs[3] - 17685); + match upsampling_method { + UpsamplingMethod::Bilinear => { + yuv::fill_rgb_buffer_fancy::( + buf, + &self.ybuf, + &self.ubuf, + &self.vbuf, + usize::from(self.width), + usize::from(self.height), + usize::from(self.buffer_width()), + ); + } + UpsamplingMethod::Simple => { + yuv::fill_rgb_buffer_simple::( + buf, + &self.ybuf, + &self.ubuf, + &self.vbuf, + usize::from(self.width), + usize::from(self.chroma_width()), + usize::from(self.buffer_width()), + ); } } } - /// Fills an rgba buffer by skipping the alpha values - pub(crate) fn fill_rgba(&self, buf: &mut [u8]) { + /// Fills an rgba buffer from the YUV buffers + pub(crate) fn fill_rgba(&self, buf: &mut [u8], upsampling_method: UpsamplingMethod) { const BPP: usize = 4; - let buffer_width = usize::from(self.buffer_width()); - - let u_row_twice_iter = self - .ubuf - .chunks_exact(buffer_width / 2) - .flat_map(|n| std::iter::repeat(n).take(2)); - let v_row_twice_iter = self - .vbuf - .chunks_exact(buffer_width / 2) - .flat_map(|n| std::iter::repeat(n).take(2)); - - for (((row, y_row), u_row), v_row) in buf - .chunks_exact_mut(usize::from(self.width) * BPP) - .zip(self.ybuf.chunks_exact(buffer_width)) - .zip(u_row_twice_iter) - .zip(v_row_twice_iter) - { - Self::fill_rgba_row( - &y_row[..usize::from(self.width)], - &u_row[..usize::from(self.chroma_width())], - &v_row[..usize::from(self.chroma_width())], - row, - ); - } - } - - fn fill_rgba_row(y_vec: &[u8], u_vec: &[u8], v_vec: &[u8], rgba: &mut [u8]) { - // Fill 2 pixels per iteration: these pixels share `u` and `v` components - let mut rgb_chunks = rgba.chunks_exact_mut(8); - let mut y_chunks = y_vec.chunks_exact(2); - let mut u_iter = u_vec.iter(); - let mut v_iter = v_vec.iter(); - - for (((rgb, y), &u), &v) in (&mut rgb_chunks) - .zip(&mut y_chunks) - .zip(&mut u_iter) - .zip(&mut v_iter) - { - let coeffs = [ - mulhi(v, 26149), - mulhi(u, 6419), - mulhi(v, 13320), - mulhi(u, 33050), - ]; - - let to_copy = [ - clip(mulhi(y[0], 19077) + coeffs[0] - 14234), - clip(mulhi(y[0], 19077) - coeffs[1] - coeffs[2] + 8708), - clip(mulhi(y[0], 19077) + coeffs[3] - 17685), - rgb[3], - clip(mulhi(y[1], 19077) + coeffs[0] - 14234), - clip(mulhi(y[1], 19077) - coeffs[1] - coeffs[2] + 8708), - clip(mulhi(y[1], 19077) + coeffs[3] - 17685), - rgb[7], - ]; - rgb.copy_from_slice(&to_copy); - } - - let remainder = rgb_chunks.into_remainder(); - if remainder.len() >= 4 { - if let (Some(&y), Some(&u), Some(&v)) = ( - y_chunks.remainder().iter().next(), - u_iter.next(), - v_iter.next(), - ) { - let coeffs = [ - mulhi(v, 26149), - mulhi(u, 6419), - mulhi(v, 13320), - mulhi(u, 33050), - ]; - - remainder[0] = clip(mulhi(y, 19077) + coeffs[0] - 14234); - remainder[1] = clip(mulhi(y, 19077) - coeffs[1] - coeffs[2] + 8708); - remainder[2] = clip(mulhi(y, 19077) + coeffs[3] - 17685); + match upsampling_method { + UpsamplingMethod::Bilinear => { + yuv::fill_rgb_buffer_fancy::( + buf, + &self.ybuf, + &self.ubuf, + &self.vbuf, + usize::from(self.width), + usize::from(self.height), + usize::from(self.buffer_width()), + ); + } + UpsamplingMethod::Simple => { + yuv::fill_rgb_buffer_simple::( + buf, + &self.ybuf, + &self.ubuf, + &self.vbuf, + usize::from(self.width), + usize::from(self.chroma_width()), + usize::from(self.buffer_width()), + ); } } } - /// Gets the buffer size #[must_use] pub fn get_buf_size(&self) -> usize { @@ -985,36 +882,6 @@ impl Frame { } } -/// `_mm_mulhi_epu16` emulation used in `Frame::fill_rgb` and `Frame::fill_rgba`. -fn mulhi(v: u8, coeff: u16) -> i32 { - ((u32::from(v) * u32::from(coeff)) >> 8) as i32 -} - -/// Used in `Frame::fill_rgb` and `Frame::fill_rgba`. -/// This function has been rewritten to encourage auto-vectorization. -/// -/// Based on [src/dsp/yuv.h](https://github.com/webmproject/libwebp/blob/8534f53960befac04c9631e6e50d21dcb42dfeaf/src/dsp/yuv.h#L79) -/// from the libwebp source. -/// ```text -/// const YUV_FIX2: i32 = 6; -/// const YUV_MASK2: i32 = (256 << YUV_FIX2) - 1; -/// fn clip(v: i32) -> u8 { -/// if (v & !YUV_MASK2) == 0 { -/// (v >> YUV_FIX2) as u8 -/// } else if v < 0 { -/// 0 -/// } else { -/// 255 -/// } -/// } -/// ``` -// Clippy suggests the clamp method, but it seems to optimize worse as of rustc 1.82.0 nightly. -#[allow(clippy::manual_clamp)] -fn clip(v: i32) -> u8 { - const YUV_FIX2: i32 = 6; - (v >> YUV_FIX2).max(0).min(255) as u8 -} - #[derive(Clone, Copy, Default)] struct Segment { ydc: i16, diff --git a/src/yuv.rs b/src/yuv.rs new file mode 100644 index 0000000..3594f04 --- /dev/null +++ b/src/yuv.rs @@ -0,0 +1,452 @@ +//! Utilities for doing the YUV -> RGB conversion +//! The images are encoded in the Y'CbCr format as detailed here: +//! so need to be converted to RGB to be displayed +//! To do the YUV -> RGB conversion we need to first decide how to map the yuv values to the pixels +//! The y buffer is the same size as the pixel buffer so that maps 1-1 but the +//! u and v buffers are half the size of the pixel buffer so we need to scale it up +//! The simple way to upscale is just to take each u/v value and associate it with the 4 +//! pixels around it e.g. for a 4x4 image: +//! +//! |||||| +//! |yyyy| +//! |yyyy| +//! |yyyy| +//! |yyyy| +//! |||||| +//! +//! ||||||| +//! |uu|vv| +//! |uu|vv| +//! ||||||| +//! +//! Then each of the 2x2 pixels would match the u/v from the same quadrant +//! +//! However fancy upsampling is the default for libwebp which does a little more work to make the values smoother +//! It interpolates u and v so that for e.g. the pixel 1 down and 1 from the left the u value +//! would be (9*u0 + 3*u1 + 3*u2 + u3 + 8) / 16 and similar for the other pixels +//! The edges are mirrored, so for the pixel 1 down and 0 from the left it uses (9*u0 + 3*u2 + 3*u0 + u2 + 8) / 16 + +/// `_mm_mulhi_epu16` emulation +fn mulhi(v: u8, coeff: u16) -> i32 { + ((u32::from(v) * u32::from(coeff)) >> 8) as i32 +} + +/// This function has been rewritten to encourage auto-vectorization. +/// +/// Based on [src/dsp/yuv.h](https://github.com/webmproject/libwebp/blob/8534f53960befac04c9631e6e50d21dcb42dfeaf/src/dsp/yuv.h#L79) +/// from the libwebp source. +/// ```text +/// const YUV_FIX2: i32 = 6; +/// const YUV_MASK2: i32 = (256 << YUV_FIX2) - 1; +/// fn clip(v: i32) -> u8 { +/// if (v & !YUV_MASK2) == 0 { +/// (v >> YUV_FIX2) as u8 +/// } else if v < 0 { +/// 0 +/// } else { +/// 255 +/// } +/// } +/// ``` +// Clippy suggests the clamp method, but it seems to optimize worse as of rustc 1.82.0 nightly. +#[allow(clippy::manual_clamp)] +fn clip(v: i32) -> u8 { + const YUV_FIX2: i32 = 6; + (v >> YUV_FIX2).max(0).min(255) as u8 +} + +#[inline(always)] +fn yuv_to_r(y: u8, v: u8) -> u8 { + clip(mulhi(y, 19077) + mulhi(v, 26149) - 14234) +} + +#[inline(always)] +fn yuv_to_g(y: u8, u: u8, v: u8) -> u8 { + clip(mulhi(y, 19077) - mulhi(u, 6419) - mulhi(v, 13320) + 8708) +} + +#[inline(always)] +fn yuv_to_b(y: u8, u: u8) -> u8 { + clip(mulhi(y, 19077) + mulhi(u, 33050) - 17685) +} + +/// Fills an rgb buffer with the image from the yuv buffers +/// Size of the buffer is assumed to be correct +/// BPP is short for bytes per pixel, allows both rgb and rgba to be decoded +pub(crate) fn fill_rgb_buffer_fancy( + buffer: &mut [u8], + y_buffer: &[u8], + u_buffer: &[u8], + v_buffer: &[u8], + width: usize, + height: usize, + buffer_width: usize, +) { + // buffer width is always even so don't need to do div_ceil + let chroma_buffer_width = buffer_width / 2; + let chroma_width = width.div_ceil(2); + + // fill top row first since it only uses the top u/v row + let top_row_y = &y_buffer[..width]; + let top_row_u = &u_buffer[..chroma_width]; + let top_row_v = &v_buffer[..chroma_width]; + let top_row_buffer = &mut buffer[..width * BPP]; + fill_row_fancy_with_1_uv_row::(top_row_buffer, top_row_y, top_row_u, top_row_v); + + let mut main_row_chunks = buffer[width * BPP..].chunks_exact_mut(width * BPP * 2); + // the y buffer iterator limits the end of the row iterator so we need this end index + let end_y_index = height * buffer_width; + let mut main_y_chunks = y_buffer[buffer_width..end_y_index].chunks_exact(buffer_width * 2); + let mut main_u_windows = u_buffer + .windows(chroma_buffer_width * 2) + .step_by(chroma_buffer_width); + let mut main_v_windows = v_buffer + .windows(chroma_buffer_width * 2) + .step_by(chroma_buffer_width); + + for (((row_buffer, y_rows), u_rows), v_rows) in (&mut main_row_chunks) + .zip(&mut main_y_chunks) + .zip(&mut main_u_windows) + .zip(&mut main_v_windows) + { + let (u_row_1, u_row_2) = u_rows.split_at(chroma_buffer_width); + let (v_row_1, v_row_2) = v_rows.split_at(chroma_buffer_width); + let (row_buf_1, row_buf_2) = row_buffer.split_at_mut(width * BPP); + let (y_row_1, y_row_2) = y_rows.split_at(buffer_width); + fill_row_fancy_with_2_uv_rows::( + row_buf_1, + &y_row_1[..width], + &u_row_1[..chroma_width], + &u_row_2[..chroma_width], + &v_row_1[..chroma_width], + &v_row_2[..chroma_width], + ); + fill_row_fancy_with_2_uv_rows::( + row_buf_2, + &y_row_2[..width], + &u_row_2[..chroma_width], + &u_row_1[..chroma_width], + &v_row_2[..chroma_width], + &v_row_1[..chroma_width], + ); + } + + let final_row_buffer = main_row_chunks.into_remainder(); + + // if the image has even height there will be one final row with only one u/v row matching it + if !final_row_buffer.is_empty() { + let final_y_row = main_y_chunks.remainder(); + + let chroma_height = height.div_ceil(2); + let start_chroma_index = (chroma_height - 1) * chroma_buffer_width; + + let final_u_row = &u_buffer[start_chroma_index..]; + let final_v_row = &v_buffer[start_chroma_index..]; + fill_row_fancy_with_1_uv_row::( + final_row_buffer, + &final_y_row[..width], + &final_u_row[..chroma_width], + &final_v_row[..chroma_width], + ); + } +} + +/// Fills a row with the fancy interpolation as detailed +fn fill_row_fancy_with_2_uv_rows( + row_buffer: &mut [u8], + y_row: &[u8], + u_row_1: &[u8], + u_row_2: &[u8], + v_row_1: &[u8], + v_row_2: &[u8], +) { + // need to do left pixel separately since it will only have one u/v value + { + let rgb1 = &mut row_buffer[0..3]; + let y_value = y_row[0]; + // first pixel uses the first u/v as the main one + let u_value = get_fancy_chroma_value(u_row_1[0], u_row_1[0], u_row_2[0], u_row_2[0]); + let v_value = get_fancy_chroma_value(v_row_1[0], v_row_1[0], v_row_2[0], v_row_2[0]); + set_pixel(rgb1, y_value, u_value, v_value); + } + + let rest_row_buffer = &mut row_buffer[BPP..]; + let rest_y_row = &y_row[1..]; + + // we do two pixels at a time since they share the same u/v values + let mut main_row_chunks = rest_row_buffer.chunks_exact_mut(BPP * 2); + let mut main_y_chunks = rest_y_row.chunks_exact(2); + + for (((((rgb, y_val), u_val_1), u_val_2), v_val_1), v_val_2) in (&mut main_row_chunks) + .zip(&mut main_y_chunks) + .zip(u_row_1.windows(2)) + .zip(u_row_2.windows(2)) + .zip(v_row_1.windows(2)) + .zip(v_row_2.windows(2)) + { + { + let rgb1 = &mut rgb[0..3]; + let y_value = y_val[0]; + // first pixel uses the first u/v as the main one + let u_value = get_fancy_chroma_value(u_val_1[0], u_val_1[1], u_val_2[0], u_val_2[1]); + let v_value = get_fancy_chroma_value(v_val_1[0], v_val_1[1], v_val_2[0], v_val_2[1]); + set_pixel(rgb1, y_value, u_value, v_value); + } + { + let rgb2 = &mut rgb[BPP..]; + let y_value = y_val[1]; + let u_value = get_fancy_chroma_value(u_val_1[1], u_val_1[0], u_val_2[1], u_val_2[0]); + let v_value = get_fancy_chroma_value(v_val_1[1], v_val_1[0], v_val_2[1], v_val_2[0]); + set_pixel(rgb2, y_value, u_value, v_value); + } + } + + let final_pixel = main_row_chunks.into_remainder(); + let final_y = main_y_chunks.remainder(); + + if let (rgb, [y_value]) = (final_pixel, final_y) { + let final_u_1 = *u_row_1.last().unwrap(); + let final_u_2 = *u_row_2.last().unwrap(); + + let final_v_1 = *v_row_1.last().unwrap(); + let final_v_2 = *v_row_2.last().unwrap(); + + let rgb1 = &mut rgb[0..3]; + // first pixel uses the first u/v as the main one + let u_value = get_fancy_chroma_value(final_u_1, final_u_1, final_u_2, final_u_2); + let v_value = get_fancy_chroma_value(final_v_1, final_v_1, final_v_2, final_v_2); + set_pixel(rgb1, *y_value, u_value, v_value); + } +} + +fn fill_row_fancy_with_1_uv_row( + row_buffer: &mut [u8], + y_row: &[u8], + u_row: &[u8], + v_row: &[u8], +) { + // doing left pixel first + { + let rgb1 = &mut row_buffer[0..3]; + let y_value = y_row[0]; + + let u_value = u_row[0]; + let v_value = v_row[0]; + set_pixel(rgb1, y_value, u_value, v_value); + } + + // two pixels at a time since they share the same u/v value + let mut main_row_chunks = row_buffer[BPP..].chunks_exact_mut(BPP * 2); + let mut main_y_row_chunks = y_row[1..].chunks_exact(2); + + for (((rgb, y_val), u_val), v_val) in (&mut main_row_chunks) + .zip(&mut main_y_row_chunks) + .zip(u_row.windows(2)) + .zip(v_row.windows(2)) + { + { + let rgb1 = &mut rgb[0..3]; + let y_value = y_val[0]; + // first pixel uses the first u/v as the main one + let u_value = get_fancy_chroma_value(u_val[0], u_val[1], u_val[0], u_val[1]); + let v_value = get_fancy_chroma_value(v_val[0], v_val[1], v_val[0], v_val[1]); + set_pixel(rgb1, y_value, u_value, v_value); + } + { + let rgb2 = &mut rgb[BPP..]; + let y_value = y_val[1]; + let u_value = get_fancy_chroma_value(u_val[1], u_val[0], u_val[1], u_val[0]); + let v_value = get_fancy_chroma_value(v_val[1], v_val[0], v_val[1], v_val[0]); + set_pixel(rgb2, y_value, u_value, v_value); + } + } + + let final_pixel = main_row_chunks.into_remainder(); + let final_y = main_y_row_chunks.remainder(); + + if let (rgb, [final_y]) = (final_pixel, final_y) { + let final_u = *u_row.last().unwrap(); + let final_v = *v_row.last().unwrap(); + + set_pixel(rgb, *final_y, final_u, final_v); + } +} + +#[inline] +fn get_fancy_chroma_value(main: u8, secondary1: u8, secondary2: u8, tertiary: u8) -> u8 { + let val0 = u16::from(main); + let val1 = u16::from(secondary1); + let val2 = u16::from(secondary2); + let val3 = u16::from(tertiary); + ((9 * val0 + 3 * val1 + 3 * val2 + val3 + 8) / 16) as u8 +} + +#[inline] +fn set_pixel(rgb: &mut [u8], y: u8, u: u8, v: u8) { + rgb[0] = yuv_to_r(y, v); + rgb[1] = yuv_to_g(y, u, v); + rgb[2] = yuv_to_b(y, u); +} + +/// Simple conversion, not currently used but could add a config to allow for using the simple +#[allow(unused)] +pub(crate) fn fill_rgb_buffer_simple( + buffer: &mut [u8], + y_buffer: &[u8], + u_buffer: &[u8], + v_buffer: &[u8], + width: usize, + chroma_width: usize, + buffer_width: usize, +) { + let u_row_twice_iter = u_buffer + .chunks_exact(buffer_width / 2) + .flat_map(|n| std::iter::repeat(n).take(2)); + let v_row_twice_iter = v_buffer + .chunks_exact(buffer_width / 2) + .flat_map(|n| std::iter::repeat(n).take(2)); + + for (((row, y_row), u_row), v_row) in buffer + .chunks_exact_mut(width * BPP) + .zip(y_buffer.chunks_exact(buffer_width)) + .zip(u_row_twice_iter) + .zip(v_row_twice_iter) + { + fill_rgba_row_simple::( + &y_row[..width], + &u_row[..chroma_width], + &v_row[..chroma_width], + row, + ); + } +} + +fn fill_rgba_row_simple( + y_vec: &[u8], + u_vec: &[u8], + v_vec: &[u8], + rgba: &mut [u8], +) { + // Fill 2 pixels per iteration: these pixels share `u` and `v` components + let mut rgb_chunks = rgba.chunks_exact_mut(BPP * 2); + let mut y_chunks = y_vec.chunks_exact(2); + let mut u_iter = u_vec.iter(); + let mut v_iter = v_vec.iter(); + + for (((rgb, y), &u), &v) in (&mut rgb_chunks) + .zip(&mut y_chunks) + .zip(&mut u_iter) + .zip(&mut v_iter) + { + let coeffs = [ + mulhi(v, 26149), + mulhi(u, 6419), + mulhi(v, 13320), + mulhi(u, 33050), + ]; + + let get_r = |y: u8| clip(mulhi(y, 19077) + coeffs[0] - 14234); + let get_g = |y: u8| clip(mulhi(y, 19077) - coeffs[1] - coeffs[2] + 8708); + let get_b = |y: u8| clip(mulhi(y, 19077) + coeffs[3] - 17685); + + let rgb1 = &mut rgb[0..3]; + rgb1[0] = get_r(y[0]); + rgb1[1] = get_g(y[0]); + rgb1[2] = get_b(y[0]); + + let rgb2 = &mut rgb[BPP..]; + rgb2[0] = get_r(y[1]); + rgb2[1] = get_g(y[1]); + rgb2[2] = get_b(y[1]); + } + + let remainder = rgb_chunks.into_remainder(); + if remainder.len() >= 3 { + if let (Some(&y), Some(&u), Some(&v)) = ( + y_chunks.remainder().iter().next(), + u_iter.next(), + v_iter.next(), + ) { + let coeffs = [ + mulhi(v, 26149), + mulhi(u, 6419), + mulhi(v, 13320), + mulhi(u, 33050), + ]; + + remainder[0] = clip(mulhi(y, 19077) + coeffs[0] - 14234); + remainder[1] = clip(mulhi(y, 19077) - coeffs[1] - coeffs[2] + 8708); + remainder[2] = clip(mulhi(y, 19077) + coeffs[3] - 17685); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_fancy_grid() { + #[rustfmt::skip] + let y_buffer = [ + 77, 162, 202, 185, + 28, 13, 199, 182, + 135, 147, 164, 135, + 66, 27, 171, 130, + ]; + + #[rustfmt::skip] + let u_buffer = [ + 34, 101, + 123, 163 + ]; + + #[rustfmt::skip] + let v_buffer = [ + 97, 167, + 149, 23, + ]; + + let mut rgb_buffer = [0u8; 16 * 3]; + fill_rgb_buffer_fancy::<3>(&mut rgb_buffer, &y_buffer, &u_buffer, &v_buffer, 4, 4, 4); + + #[rustfmt::skip] + let upsampled_u_buffer = [ + 34, 51, 84, 101, + 56, 71, 101, 117, + 101, 112, 136, 148, + 123, 133, 153, 163, + ]; + + #[rustfmt::skip] + let upsampled_v_buffer = [ + 97, 115, 150, 167, + 110, 115, 126, 131, + 136, 117, 78, 59, + 149, 118, 55, 23, + ]; + + let mut upsampled_rgb_buffer = [0u8; 16 * 3]; + for (((rgb_val, y), u), v) in upsampled_rgb_buffer + .chunks_exact_mut(3) + .zip(y_buffer) + .zip(upsampled_u_buffer) + .zip(upsampled_v_buffer) + { + rgb_val[0] = yuv_to_r(y, v); + rgb_val[1] = yuv_to_g(y, u, v); + rgb_val[2] = yuv_to_b(y, u); + } + + assert_eq!(rgb_buffer, upsampled_rgb_buffer); + } + + #[test] + fn test_yuv_conversions() { + let (y, u, v) = (203, 40, 42); + + assert_eq!(yuv_to_r(y, v), 80); + assert_eq!(yuv_to_g(y, u, v), 255); + assert_eq!(yuv_to_b(y, u), 40); + } +} diff --git a/tests/CREDITS.md b/tests/CREDITS.md index c0c21f1..421a84f 100644 --- a/tests/CREDITS.md +++ b/tests/CREDITS.md @@ -38,7 +38,11 @@ These files are all PNGs with contents that should exactly match the associated ## reference/gallery1 and reference/gallery2 -These files were all produced by running dwebp with the `-nofancy` option. +These files were all produced by running dwebp with the default settings. + +## reference/gallery1_nofancy + +These files were produced by running dwebp with the `nofancy` option. ## reference/animated @@ -51,7 +55,7 @@ for i in {1..3}; do webpmux -get frame ${i} ../../images/animated/random_lossles random-lossy-N.png: ``` -for i in {1..4}; do webpmux -get frame ${i} ../../images/animated/random_lossy.webp -o random_lossy-${i}.png && dwebp random_lossy-${i}.png -nofancy -o random_lossy-${i}.png; done +for i in {1..4}; do webpmux -get frame ${i} ../../images/animated/random_lossy.webp -o random_lossy-${i}.png && dwebp random_lossy-${i}.png -o random_lossy-${i}.png; done ``` ## reference/regression diff --git a/tests/decode.rs b/tests/decode.rs index 0e8e8fe..8674ee0 100644 --- a/tests/decode.rs +++ b/tests/decode.rs @@ -37,16 +37,26 @@ fn save_image(data: &[u8], file: &str, i: Option, has_alpha: bool, width: u } fn reference_test(file: &str) { + reference_test_with_options(file, image_webp::WebPDecodeOptions::default(), None); +} + +fn reference_test_with_options( + file: &str, + options: image_webp::WebPDecodeOptions, + custom_reference_file: Option<&str>, +) { // Prepare WebP decoder let contents = std::fs::read(format!("tests/images/{file}.webp")).unwrap(); - let mut decoder = image_webp::WebPDecoder::new(Cursor::new(contents)).unwrap(); + let mut decoder = + image_webp::WebPDecoder::new_with_options(Cursor::new(contents), options).unwrap(); let (width, height) = decoder.dimensions(); // Decode reference PNG + let reference_file = custom_reference_file.unwrap_or(file); let reference_path = if decoder.is_animated() { - format!("tests/reference/{file}-1.png") + format!("tests/reference/{reference_file}-1.png") } else { - format!("tests/reference/{file}.png") + format!("tests/reference/{reference_file}.png") }; let reference_contents = std::fs::read(reference_path).unwrap(); let mut reference_decoder = png::Decoder::new(Cursor::new(reference_contents)) @@ -85,7 +95,7 @@ fn reference_test(file: &str) { } } else { // NOTE: WebP lossy images are stored in YUV format. The conversion to RGB is not precisely - // defined, but we currently attempt to match the dwebp's "-nofancy" conversion option. + // defined, but we currently attempt to match the dwebp's default conversion option. let num_bytes_different = data .iter() .zip(reference_data.iter()) @@ -156,7 +166,29 @@ macro_rules! reftest { } } +macro_rules! reftest_nofancy { + ($basename:expr, $name:expr) => { + paste::paste! { + #[test] + fn []() { + let mut options = image_webp::WebPDecodeOptions::default(); + options.lossy_upsampling = image_webp::UpsamplingMethod::Simple; + reference_test_with_options( + concat!(stringify!($basename), "/", stringify!($name)), + options, + Some(concat!(stringify!($basename), "_nofancy", "/", stringify!($name))) + ); + } + } + }; + ($basename:expr, $name:expr, $($tail:expr),+) => { + reftest_nofancy!( $basename, $name ); + reftest_nofancy!( $basename, $($tail),+ ); + } +} + reftest!(gallery1, 1, 2, 3, 4, 5); +reftest_nofancy!(gallery1, 1, 2, 3, 4, 5); reftest!(gallery2, 1_webp_ll, 2_webp_ll, 3_webp_ll, 4_webp_ll, 5_webp_ll); reftest!(gallery2, 1_webp_a, 2_webp_a, 3_webp_a, 4_webp_a, 5_webp_a); reftest!(animated, random_lossless, random_lossy); diff --git a/tests/reference/animated/random_lossy-1.png b/tests/reference/animated/random_lossy-1.png index d555097..c0b05d2 100644 Binary files a/tests/reference/animated/random_lossy-1.png and b/tests/reference/animated/random_lossy-1.png differ diff --git a/tests/reference/animated/random_lossy-2.png b/tests/reference/animated/random_lossy-2.png index 41ca056..1272574 100644 Binary files a/tests/reference/animated/random_lossy-2.png and b/tests/reference/animated/random_lossy-2.png differ diff --git a/tests/reference/animated/random_lossy-3.png b/tests/reference/animated/random_lossy-3.png index bee4cbf..5227737 100644 Binary files a/tests/reference/animated/random_lossy-3.png and b/tests/reference/animated/random_lossy-3.png differ diff --git a/tests/reference/animated/random_lossy-4.png b/tests/reference/animated/random_lossy-4.png index ca504e0..e2ee179 100644 Binary files a/tests/reference/animated/random_lossy-4.png and b/tests/reference/animated/random_lossy-4.png differ diff --git a/tests/reference/gallery1/1.png b/tests/reference/gallery1/1.png index ceb01a8..c50e3cd 100644 Binary files a/tests/reference/gallery1/1.png and b/tests/reference/gallery1/1.png differ diff --git a/tests/reference/gallery1/2.png b/tests/reference/gallery1/2.png index a9df76a..3cc4f75 100644 Binary files a/tests/reference/gallery1/2.png and b/tests/reference/gallery1/2.png differ diff --git a/tests/reference/gallery1/3.png b/tests/reference/gallery1/3.png index 3197655..b998a8c 100644 Binary files a/tests/reference/gallery1/3.png and b/tests/reference/gallery1/3.png differ diff --git a/tests/reference/gallery1/4.png b/tests/reference/gallery1/4.png index 3c9b404..dd216c4 100644 Binary files a/tests/reference/gallery1/4.png and b/tests/reference/gallery1/4.png differ diff --git a/tests/reference/gallery1/5.png b/tests/reference/gallery1/5.png index 1b5ac55..4ed511a 100644 Binary files a/tests/reference/gallery1/5.png and b/tests/reference/gallery1/5.png differ diff --git a/tests/reference/gallery1_nofancy/1.png b/tests/reference/gallery1_nofancy/1.png new file mode 100644 index 0000000..ceb01a8 Binary files /dev/null and b/tests/reference/gallery1_nofancy/1.png differ diff --git a/tests/reference/gallery1_nofancy/2.png b/tests/reference/gallery1_nofancy/2.png new file mode 100644 index 0000000..a9df76a Binary files /dev/null and b/tests/reference/gallery1_nofancy/2.png differ diff --git a/tests/reference/gallery1_nofancy/3.png b/tests/reference/gallery1_nofancy/3.png new file mode 100644 index 0000000..3197655 Binary files /dev/null and b/tests/reference/gallery1_nofancy/3.png differ diff --git a/tests/reference/gallery1_nofancy/4.png b/tests/reference/gallery1_nofancy/4.png new file mode 100644 index 0000000..3c9b404 Binary files /dev/null and b/tests/reference/gallery1_nofancy/4.png differ diff --git a/tests/reference/gallery1_nofancy/5.png b/tests/reference/gallery1_nofancy/5.png new file mode 100644 index 0000000..1b5ac55 Binary files /dev/null and b/tests/reference/gallery1_nofancy/5.png differ diff --git a/tests/reference/gallery2/1_webp_a.png b/tests/reference/gallery2/1_webp_a.png index 401c7c4..72f61f5 100644 Binary files a/tests/reference/gallery2/1_webp_a.png and b/tests/reference/gallery2/1_webp_a.png differ diff --git a/tests/reference/gallery2/2_webp_a.png b/tests/reference/gallery2/2_webp_a.png index 36aa663..6868995 100644 Binary files a/tests/reference/gallery2/2_webp_a.png and b/tests/reference/gallery2/2_webp_a.png differ diff --git a/tests/reference/gallery2/3_webp_a.png b/tests/reference/gallery2/3_webp_a.png index a9f939e..f3251e7 100644 Binary files a/tests/reference/gallery2/3_webp_a.png and b/tests/reference/gallery2/3_webp_a.png differ diff --git a/tests/reference/gallery2/4_webp_a.png b/tests/reference/gallery2/4_webp_a.png index b341ef1..6a141e6 100644 Binary files a/tests/reference/gallery2/4_webp_a.png and b/tests/reference/gallery2/4_webp_a.png differ diff --git a/tests/reference/gallery2/5_webp_a.png b/tests/reference/gallery2/5_webp_a.png index c613bae..2b658af 100644 Binary files a/tests/reference/gallery2/5_webp_a.png and b/tests/reference/gallery2/5_webp_a.png differ