From 215e82423fad87d4e865b83a3fd23754b619c935 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fernando=20Jim=C3=A9nez=20Moreno?= <ferjmoreno@gmail.com>
Date: Thu, 4 Sep 2025 12:10:13 +0200
Subject: [PATCH 1/3] Add automatic frame resizing to edgeimpulsevideoinfer
 element

---
 README.md                   |  58 +++++-------
 examples/audio_inference.rs |   4 +-
 examples/image_inference.rs |  11 +--
 examples/image_slideshow.rs |  17 +---
 examples/video_inference.rs |  48 +++-------
 src/video/imp.rs            | 178 +++++++++++++++++++++++++++++-------
 6 files changed, 189 insertions(+), 127 deletions(-)
diff --git a/README.md b/README.md
index 4efa2c9..acdb296 100644
--- a/README.md
+++ b/README.md
@@ -191,7 +191,7 @@ export EI_MODEL="~/Downloads/your-model-directory"  # Optional: for local models
 cargo build --release
 ```
 
-**EIM Mode (Legacy):**
+**EIM Mode:**
 - Uses Edge Impulse model files (.eim) for inference
 - Requires EIM model files to be present on the filesystem
 - Compatible with all Edge Impulse deployment targets
@@ -390,7 +390,7 @@ gst-launch-1.0 autoaudiosrc ! \
 ```
 
 ### edgeimpulsevideoinfer
-Video inference element that processes video frames through Edge Impulse models.
+Video inference element that processes video frames through Edge Impulse models. The element automatically handles frame resizing to match model input requirements and scales detection results back to the original resolution.
 
 Element Details:
 - Long name: Edge Impulse Video Inference
@@ -443,22 +443,20 @@ Example pipelines:
 Basic pipeline with built-in overlay:
 ```bash
 # FFI mode (default)
-gst-launch-1.0  avfvideosrc ! \
+gst-launch-1.0 avfvideosrc ! \
   queue max-size-buffers=2 leaky=downstream ! \
   videoconvert n-threads=4 ! \
-  videoscale method=nearest-neighbour ! \
-  video/x-raw,format=RGB,width=384,height=384 ! \
+  video/x-raw,format=RGB,width=1920,height=1080 ! \
   queue max-size-buffers=2 leaky=downstream ! \
   edgeimpulsevideoinfer ! \
   edgeimpulseoverlay ! \
   autovideosink sync=false
 
-# EIM mode (legacy)
-gst-launch-1.0  avfvideosrc ! \
+# EIM mode
+gst-launch-1.0 avfvideosrc ! \
   queue max-size-buffers=2 leaky=downstream ! \
   videoconvert n-threads=4 ! \
-  videoscale method=nearest-neighbour ! \
-  video/x-raw,format=RGB,width=384,height=384 ! \
+  video/x-raw,format=RGB,width=1920,height=1080 ! \
   queue max-size-buffers=2 leaky=downstream ! \
   edgeimpulsevideoinfer model-path=<path-to-model> ! \
   edgeimpulseoverlay ! \
@@ -470,8 +468,7 @@ Pipeline with threshold settings:
 # FFI mode (default) - Set object detection threshold
 gst-launch-1.0 avfvideosrc ! \
   videoconvert ! \
-  videoscale ! \
-  video/x-raw,format=RGB,width=384,height=384 ! \
+  video/x-raw,format=RGB,width=1920,height=1080 ! \
   edgeimpulsevideoinfer threshold="5.min_score=0.6" ! \
   edgeimpulseoverlay ! \
   autovideosink sync=false
@@ -479,28 +476,25 @@ gst-launch-1.0 avfvideosrc ! \
 # FFI mode (default) - Set multiple thresholds
 gst-launch-1.0 avfvideosrc ! \
   videoconvert ! \
-  videoscale ! \
-  video/x-raw,format=RGB,width=384,height=384 ! \
+  video/x-raw,format=RGB,width=1920,height=1080 ! \
   edgeimpulsevideoinfer \
     threshold="5.min_score=0.6" \
     threshold="4.min_anomaly_score=0.35" ! \
   edgeimpulseoverlay ! \
   autovideosink sync=false
 
-# EIM mode (legacy) - Set object detection threshold
+# EIM mode - Set object detection threshold
 gst-launch-1.0 avfvideosrc ! \
   videoconvert ! \
-  videoscale ! \
-  video/x-raw,format=RGB,width=384,height=384 ! \
+  video/x-raw,format=RGB,width=1920,height=1080 ! \
   edgeimpulsevideoinfer model-path=<path-to-model> threshold="5.min_score=0.6" ! \
   edgeimpulseoverlay ! \
   autovideosink sync=false
 
-# EIM mode (legacy) - Set multiple thresholds
+# EIM mode - Set multiple thresholds
 gst-launch-1.0 avfvideosrc ! \
   videoconvert ! \
-  videoscale ! \
-  video/x-raw,format=RGB,width=384,height=384 ! \
+  video/x-raw,format=RGB,width=1920,height=1080 ! \
   edgeimpulsevideoinfer model-path=<path-to-model> \
     threshold="5.min_score=0.6" \
     threshold="4.min_anomaly_score=0.35" ! \
@@ -564,17 +558,15 @@ Example pipeline:
 # FFI mode (default)
 gst-launch-1.0 avfvideosrc ! \
   videoconvert ! \
-  videoscale ! \
-  video/x-raw,format=RGB,width=384,height=384 ! \
+  video/x-raw,format=RGB,width=1920,height=1080 ! \
   edgeimpulsevideoinfer ! \
   edgeimpulseoverlay stroke-width=3 text-scale-ratio=1.5 text-color=0x00FF00 background-color=0x000000 ! \
   autovideosink sync=false
 
-# EIM mode (legacy)
+# EIM mode
 gst-launch-1.0 avfvideosrc ! \
   videoconvert ! \
-  videoscale ! \
-  video/x-raw,format=RGB,width=384,height=384 ! \
+  video/x-raw,format=RGB,width=1920,height=1080 ! \
   edgeimpulsevideoinfer model-path=<path-to-model> ! \
   edgeimpulseoverlay stroke-width=3 text-scale-ratio=1.5 text-color=0x00FF00 background-color=0x000000 ! \
   autovideosink sync=false
@@ -847,16 +839,15 @@ The repository includes an `image_slideshow` example that demonstrates how to ru
 
 ```bash
 # FFI mode (default)
-cargo run --example image_slideshow -- --folder <path-to-image-folder> -W <width> -H <height> [--framerate <fps>] [--max-images <N>]
+cargo run --example image_slideshow -- --folder <path-to-image-folder> [--framerate <fps>] [--max-images <N>]
+
+# EIM mode
+cargo run --example image_slideshow -- --model <path-to-model.eim> --folder <path-to-image-folder> [--framerate <fps>] [--max-images <N>]
 
-# EIM mode (legacy)
-cargo run --example image_slideshow -- --model <path-to-model.eim> --folder <path-to-image-folder> -W <width> -H <height> [--framerate <fps>] [--max-images <N>]
 ```
 
 - `--model` (optional): Path to the Edge Impulse model file (.eim) - only needed for EIM mode
 - `--folder` (required): Path to the folder containing images (jpg, jpeg, png)
-- `-W`, `--width` (required): Input width for inference
-- `-H`, `--height` (required): Input height for inference
 - `--framerate` (optional): Slideshow speed in images per second (default: 1)
 - `--max-images` (optional): Maximum number of images to process (default: 100)
 
@@ -864,7 +855,7 @@ cargo run --example image_slideshow -- --model <path-to-model.eim> --folder <pat
 - All images in the folder are copied and converted to JPEG in a temporary directory for robust GStreamer playback.
 - The pipeline mimics the following structure:
   ```
-  multifilesrc ! decodebin ! videoconvert ! queue ! videoscale ! videorate ! video/x-raw,format=GRAY8,width=...,height=...,framerate=... ! edgeimpulsevideoinfer ! videoconvert ! video/x-raw,format=RGB,width=...,height=... ! edgeimpulseoverlay ! autovideosink
+  multifilesrc ! decodebin ! videoconvert ! queue ! videorate ! video/x-raw,format=GRAY8,width=...,height=...,framerate=... ! edgeimpulsevideoinfer ! videoconvert ! video/x-raw,format=RGB,width=...,height=... ! edgeimpulseoverlay ! autovideosink
   ```
 - The slideshow speed is controlled by the `--framerate` argument.
 - Each image is shown for the correct duration, and the pipeline loops through all images.
@@ -874,10 +865,11 @@ cargo run --example image_slideshow -- --model <path-to-model.eim> --folder <pat
 
 ```bash
 # FFI mode (default)
-cargo run --example image_slideshow -- --folder ./images -W 160 -H 160 --framerate 2
+cargo run --example image_slideshow -- --folder ./images --framerate 2
+
+# EIM mode
+cargo run --example image_slideshow -- --model model.eim --folder ./images --framerate 2
 
-# EIM mode (legacy)
-cargo run --example image_slideshow -- --model model.eim --folder ./images -W 160 -H 160 --framerate 2
 ```
 
 This will show a 2 FPS slideshow of all images in `./images`, running inference and overlaying results.
diff --git a/examples/audio_inference.rs b/examples/audio_inference.rs
index 305c6b8..50dc456 100644
--- a/examples/audio_inference.rs
+++ b/examples/audio_inference.rs
@@ -8,7 +8,7 @@
 //!   # FFI mode (default, no model path needed):
 //!   cargo run --example audio_inference [OPTIONS]
 //!
-//!   # EIM mode (legacy, requires model path):
+//!   # EIM mode (requires model path):
 //!   cargo run --example audio_inference -- --model <path_to_model> [OPTIONS]
 //!
 //! Optional arguments:
@@ -37,7 +37,7 @@ use std::path::Path;
 /// Command line parameters for the real-time audio classification example
 #[derive(Parser, Debug)]
 struct AudioClassifyParams {
-    /// Path to the Edge Impulse model file (.eim) - EIM mode only (legacy)
+    /// Path to the Edge Impulse model file (.eim) - EIM mode only
     #[clap(short, long)]
     model: Option<String>,
 
diff --git a/examples/image_inference.rs b/examples/image_inference.rs
index ea16f75..78af2a1 100644
--- a/examples/image_inference.rs
+++ b/examples/image_inference.rs
@@ -5,6 +5,9 @@
 //! This example demonstrates how to use the Edge Impulse GStreamer plugin to perform
 //! image classification using a trained model on a single image file.
 //!
+//! The edgeimpulsevideoinfer element automatically handles frame resizing to match model
+//! input requirements and scales detection results back to the original resolution.
+//!
 //! Usage:
 //!   # EIM mode (requires model path):
 //!   cargo run --example image_inference -- --model <path_to_model> --image <path_to_image>
@@ -12,6 +15,7 @@
 //!   # FFI mode (no model path needed):
 //!   cargo run --example image_inference -- --image <path_to_image>
 //!
+//!
 //! Environment setup:
 //! export GST_PLUGIN_PATH="target/debug:$GST_PLUGIN_PATH"
 
@@ -42,13 +46,6 @@ struct ImageClassifyParams {
     #[arg(short, long, default_value = "RGB")]
     format: String,
 
-    /// Input width
-    #[arg(short = 'W', long, default_value = "96")]
-    width: i32,
-
-    /// Input height
-    #[arg(short = 'H', long, default_value = "96")]
-    height: i32,
 
     /// Enable debug output
     #[arg(short, long)]
diff --git a/examples/image_slideshow.rs b/examples/image_slideshow.rs
index d9e9927..3b53410 100644
--- a/examples/image_slideshow.rs
+++ b/examples/image_slideshow.rs
@@ -13,6 +13,7 @@ use image::io::Reader as ImageReader;
 use image::ImageFormat;
 
 /// A GStreamer-based image slideshow that runs inference on images from a folder.
+/// The edgeimpulsevideoinfer element automatically handles frame resizing to match model requirements.
 #[derive(Parser, Debug)]
 #[command(author, version, about, long_about = None)]
 struct Args {
@@ -28,13 +29,6 @@ struct Args {
     #[arg(short = 'n', long, default_value = "100")]
     max_images: usize,
 
-    /// Input width
-    #[arg(short = 'W', long)]
-    width: i32,
-
-    /// Input height
-    #[arg(short = 'H', long)]
-    height: i32,
 
     /// Slideshow framerate (images per second, default: 1)
     #[arg(long, default_value = "1")]
@@ -187,7 +181,7 @@ fn example_main() -> Result<()> {
         .property("max-size-buffers", 1u32)
         .property("max-size-time", 30000000000u64)
         .build()?;
-    let videoscale = ElementFactory::make("videoscale").build()?;
+    // videoscale removed - edgeimpulsevideoinfer now handles resizing
     let videorate = ElementFactory::make("videorate")
         .property("max-rate", args.framerate)
         .build()?;
@@ -203,18 +197,15 @@ fn example_main() -> Result<()> {
         .build()?;
 
     // Set caps for GRAY8 before inference, including framerate to control slideshow speed
+    // The edgeimpulsevideoinfer element will automatically resize to match model requirements
     let caps_gray = gstreamer::Caps::builder("video/x-raw")
         .field("format", "GRAY8")
-        .field("width", args.width)
-        .field("height", args.height)
         .field("framerate", &gstreamer::Fraction::new(args.framerate, 1))
         .build();
     capsfilter_gray.set_property("caps", &caps_gray);
 
     let caps_rgb = gstreamer::Caps::builder("video/x-raw")
         .field("format", "RGB")
-        .field("width", args.width)
-        .field("height", args.height)
         .build();
     capsfilter_rgb.set_property("caps", &caps_rgb);
 
@@ -223,7 +214,6 @@ fn example_main() -> Result<()> {
         &decodebin,
         &videoconvert1,
         &queue,
-        &videoscale,
         &videorate,
         &capsfilter_gray,
         &edgeimpulse,
@@ -259,7 +249,6 @@ fn example_main() -> Result<()> {
     Element::link_many([
         &videoconvert1,
         &queue,
-        &videoscale,
         &videorate,
         &capsfilter_gray,
         &edgeimpulse,
diff --git a/examples/video_inference.rs b/examples/video_inference.rs
index a38c97a..372bbda 100644
--- a/examples/video_inference.rs
+++ b/examples/video_inference.rs
@@ -3,7 +3,10 @@
 //! Video Classification Example using edgeimpulseinfer GStreamer plugin
 //!
 //! This example demonstrates how to use the Edge Impulse GStreamer plugin to perform
-//! video classification using a trained model with performance optimizations.
+//! video classification using a trained model with automatic frame resizing and performance optimizations.
+//!
+//! The edgeimpulsevideoinfer element automatically handles frame resizing to match model
+//! input requirements and scales detection results back to the original resolution.
 //!
 //! Usage:
 //!   # EIM mode (requires model path):
@@ -12,6 +15,7 @@
 //!   # FFI mode (no model path needed):
 //!   cargo run --example video_inference
 //!
+//!
 //! Environment setup:
 //! export GST_PLUGIN_PATH="target/debug:$GST_PLUGIN_PATH"
 
@@ -36,13 +40,6 @@ struct VideoClassifyParams {
     #[arg(short, long, default_value = "RGB")]
     format: String,
 
-    /// Input width
-    #[arg(short = 'W', long)]
-    width: i32,
-
-    /// Input height
-    #[arg(short = 'H', long)]
-    height: i32,
 
     /// Enable debug output
     #[arg(short, long)]
@@ -286,10 +283,6 @@ fn create_pipeline(args: &VideoClassifyParams) -> Result<gst::Pipeline, Box<dyn
         .build()
         .expect("Could not create videoconvert element.");
 
-    let videoscale1 = gst::ElementFactory::make("videoscale")
-        .build()
-        .expect("Could not create videoscale element.");
-
     let caps1 = gst::ElementFactory::make("capsfilter")
         .build()
         .expect("Could not create capsfilter element.");
@@ -365,14 +358,6 @@ fn create_pipeline(args: &VideoClassifyParams) -> Result<gst::Pipeline, Box<dyn
         .build()
         .expect("Could not create edgeimpulseoverlay element.");
 
-    let videoscale2 = gst::ElementFactory::make("videoscale")
-        .build()
-        .expect("Could not create videoscale element.");
-
-    let caps2 = gst::ElementFactory::make("capsfilter")
-        .build()
-        .expect("Could not create capsfilter element.");
-
     let videoconvert2 = gst::ElementFactory::make("videoconvert")
         .property("n-threads", 4u32)
         .build()
@@ -383,33 +368,23 @@ fn create_pipeline(args: &VideoClassifyParams) -> Result<gst::Pipeline, Box<dyn
         .build()
         .expect("Could not create autovideosink element.");
 
-    // Set caps using provided dimensions
+    // Set caps - the edgeimpulsevideoinfer element will automatically resize frames
+    // to match model requirements and scale results back
     let caps1_struct = gst::Caps::builder("video/x-raw")
         .field("format", "RGB")
-        .field("width", args.width)
-        .field("height", args.height)
         .build();
     caps1.set_property("caps", &caps1_struct);
 
-    let caps2_struct = gst::Caps::builder("video/x-raw")
-        .field("width", 480i32)
-        .field("height", 480i32)
-        .build();
-    caps2.set_property("caps", &caps2_struct);
-
     // Add elements to the pipeline
     pipeline.add_many(&[
         &src,
         &queue1,
         &videoconvert1,
-        &videoscale1,
         &caps1,
         &queue2,
         &classifier,
         &queue3,
         &overlay,
-        &videoscale2,
-        &caps2,
         &videoconvert2,
         &sink,
     ])?;
@@ -419,14 +394,11 @@ fn create_pipeline(args: &VideoClassifyParams) -> Result<gst::Pipeline, Box<dyn
         &src,
         &queue1,
         &videoconvert1,
-        &videoscale1,
         &caps1,
         &queue2,
         &classifier,
         &queue3,
         &overlay,
-        &videoscale2,
-        &caps2,
         &videoconvert2,
         &sink,
     ])?;
@@ -457,10 +429,10 @@ fn example_main() -> Result<(), Box<dyn Error>> {
                         println!("📊 Model type: {}", model_type);
                     }
                     if let Ok(input_width) = structure.get::<u32>("input-width") {
-                        println!("📏 Input width: {}", input_width);
+                        println!("📏 Model input width: {}", input_width);
                     }
                     if let Ok(input_height) = structure.get::<u32>("input-height") {
-                        println!("📏 Input height: {}", input_height);
+                        println!("📏 Model input height: {}", input_height);
                     }
                     if let Ok(channel_count) = structure.get::<u32>("channel-count") {
                         println!("🎨 Channel count: {}", channel_count);
@@ -468,6 +440,8 @@ fn example_main() -> Result<(), Box<dyn Error>> {
                     if let Ok(has_anomaly) = structure.get::<bool>("has-anomaly") {
                         println!("🔍 Has anomaly detection: {}", has_anomaly);
                     }
+                    println!("🔄 Input frames will be resized to match model requirements");
+                    println!("📐 Detection results will be scaled back to original resolution");
                 }
 
                 // Print model info from inference results (for FFI mode)
diff --git a/src/video/imp.rs b/src/video/imp.rs
index dd0dbaf..995ed51 100644
--- a/src/video/imp.rs
+++ b/src/video/imp.rs
@@ -10,9 +10,10 @@
 //! 2. Input frames are copied directly to output (always)
 //! 3. If a model is loaded, frames are also:
 //!    - Mapped to raw RGB data
+//!    - Resized to match model input requirements if needed (internal resize)
 //!    - Converted to features based on model requirements (RGB or grayscale)
 //!    - Processed through the Edge Impulse model
-//!    - Results are emitted as GStreamer messages
+//!    - Results are scaled back to original resolution and emitted as GStreamer messages
 //!
 //! # Result Output Mechanisms
 //! For object detection models, the element provides two mechanisms to consume results:
@@ -32,7 +33,7 @@
 //! visualization tools to work with the detection results.
 //!
 //! # Properties
-//! - `model-path`: Path to the Edge Impulse model file (.eim) - EIM mode only (legacy)
+//! - `model-path`: Path to the Edge Impulse model file (.eim) - EIM mode only
 //!   - When set, loads the model and begins inference
 //!   - When unset, uses FFI mode (default)
 //! - `debug`: Enable debug mode for FFI inference (FFI mode only)
@@ -76,28 +77,22 @@
 //!   avfvideosrc ! \
 //!   queue max-size-buffers=2 leaky=downstream ! \
 //!   videoconvert n-threads=4 ! \
-//!   videoscale method=nearest-neighbour ! \
-//!   video/x-raw,format=RGB,width=384,height=384 ! \
+//!   video/x-raw,format=RGB,width=1920,height=1080 ! \
 //!   queue max-size-buffers=2 leaky=downstream ! \
 //!   edgeimpulsevideoinfer ! \
 //!   queue max-size-buffers=2 leaky=downstream ! \
-//!   videoscale method=nearest-neighbour ! \
-//!   video/x-raw,width=480,height=480 ! \
 //!   videoconvert n-threads=4 ! \
 //!   autovideosink sync=false
 //!
-//! # EIM mode (legacy)
+//! # EIM mode
 //! gst-launch-1.0 \
 //!   avfvideosrc ! \
 //!   queue max-size-buffers=2 leaky=downstream ! \
 //!   videoconvert n-threads=4 ! \
-//!   videoscale method=nearest-neighbour ! \
-//!   video/x-raw,format=RGB,width=384,height=384 ! \
+//!   video/x-raw,format=RGB,width=1920,height=1080 ! \
 //!   queue max-size-buffers=2 leaky=downstream ! \
 //!   edgeimpulsevideoinfer model-path=<model path> ! \
 //!   queue max-size-buffers=2 leaky=downstream ! \
-//!   videoscale method=nearest-neighbour ! \
-//!   video/x-raw,width=480,height=480 ! \
 //!   videoconvert n-threads=4 ! \
 //!   autovideosink sync=false
 //! ```
@@ -138,10 +133,10 @@
 //!    - Message type: "object-detection"
 //!
 //! ## Video Format Requirements
-//! - Format must be RGB (no other color formats supported)
-//! - Width and height must match model input dimensions
+//! - Format must be RGB or GRAY8 (no other color formats supported)
+//! - Width and height can be any size - the element will automatically resize to model requirements
 //! - Frame rate is unrestricted
-//! - Stride must be width * 3 (no padding supported)
+//! - Stride must be width * 3 for RGB or width * 1 for GRAY8 (no padding supported)
 //!
 //! ## Error Handling
 //! The element handles errors gracefully:
@@ -164,9 +159,9 @@
 //!
 //! ## Performance Considerations
 //! - Single frame copy operation
+//! - Automatic internal resizing when input size differs from model requirements
 //! - Feature conversion optimized for both RGB and grayscale
-//! - No additional scaling or format conversion
-//! - Pipeline should handle any necessary pre-processing
+//! - Results are automatically scaled back to original resolution
 //! - Inference runs in transform thread
 //!
 //! ## Pad Templates
@@ -191,7 +186,7 @@
 //! ## Element Information
 //! - Name: "edgeimpulsevideoinfer"
 //! - Classification: Filter/Video/AI
-//! - Description: "Runs video inference on Edge Impulse models (FFI default, EIM legacy)"
+//! - Description: "Runs video inference on Edge Impulse models (FFI default, EIM mode)"
 //!
 //! ## Debug Categories
 //! The element uses the "edgeimpulsevideoinfer" debug category for logging.
@@ -210,6 +205,7 @@ use gstreamer_base::subclass::prelude::*;
 use gstreamer_base::subclass::BaseTransformMode;
 use gstreamer_video as gst_video;
 use gstreamer_video::{VideoFormat, VideoInfo};
+use image::{ImageBuffer, Rgb, RgbImage};
 use once_cell::sync::Lazy;
 use std::sync::Mutex;
 
@@ -259,6 +255,66 @@ impl Default for VideoState {
 
 impl VideoState {}
 
+/// Helper function to resize RGB image data
+fn resize_rgb_image(
+    data: &[u8],
+    src_width: u32,
+    src_height: u32,
+    dst_width: u32,
+    dst_height: u32,
+) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
+    // Create image buffer from input data
+    let img: RgbImage = ImageBuffer::from_raw(src_width, src_height, data.to_vec())
+        .ok_or("Failed to create image buffer")?;
+
+    // Resize the image
+    let resized = image::imageops::resize(&img, dst_width, dst_height, image::imageops::FilterType::Lanczos3);
+
+    // Convert back to bytes
+    Ok(resized.into_raw())
+}
+
+/// Helper function to resize grayscale image data
+fn resize_gray_image(
+    data: &[u8],
+    src_width: u32,
+    src_height: u32,
+    dst_width: u32,
+    dst_height: u32,
+) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
+    // Create grayscale image buffer from input data
+    let img = ImageBuffer::<image::Luma<u8>, Vec<u8>>::from_raw(src_width, src_height, data.to_vec())
+        .ok_or("Failed to create grayscale image buffer")?;
+
+    // Resize the image
+    let resized = image::imageops::resize(&img, dst_width, dst_height, image::imageops::FilterType::Lanczos3);
+
+    // Convert back to bytes
+    Ok(resized.into_raw())
+}
+
+/// Helper function to scale bounding box coordinates from model resolution to original resolution
+fn scale_bounding_box(
+    x: u32,
+    y: u32,
+    width: u32,
+    height: u32,
+    model_width: u32,
+    model_height: u32,
+    original_width: u32,
+    original_height: u32,
+) -> (u32, u32, u32, u32) {
+    let scale_x = original_width as f32 / model_width as f32;
+    let scale_y = original_height as f32 / model_height as f32;
+
+    let scaled_x = (x as f32 * scale_x) as u32;
+    let scaled_y = (y as f32 * scale_y) as u32;
+    let scaled_width = (width as f32 * scale_x) as u32;
+    let scaled_height = (height as f32 * scale_y) as u32;
+
+    (scaled_x, scaled_y, scaled_width, scaled_height)
+}
+
 impl crate::common::DebugState for VideoState {
     fn set_debug(&mut self, enabled: bool) {
         #[cfg(feature = "ffi")]
@@ -357,7 +413,7 @@ impl ElementImpl for EdgeImpulseVideoInfer {
             gst::subclass::ElementMetadata::new(
                 "Edge Impulse Video Inference",
                 "Filter/Video/AI",
-                "Runs video inference on Edge Impulse models (FFI default, EIM legacy)",
+                "Runs video inference on Edge Impulse models (FFI default, EIM mode)",
                 "Fernando Jiménez Moreno <fernando@edgeimpulse.com>",
             )
         });
@@ -561,8 +617,8 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
                 }
             };
 
-            let _model_width = params.image_input_width;
-            let _model_height = params.image_input_height;
+            let model_width = params.image_input_width;
+            let model_height = params.image_input_height;
             let channels = params.image_channel_count;
             let is_object_detection = params.model_type == "constrained_object_detection"
                 || params.model_type == "object_detection"
@@ -595,18 +651,50 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
                 CAT,
                 obj = self.obj(),
                 "Model parameters: width={}, height={}, channels={}, type={}, has_anomaly={:?}",
-                _model_width,
-                _model_height,
+                model_width,
+                model_height,
                 channels,
                 params.model_type,
                 params.has_anomaly
             );
 
-            // Use the already mapped input buffer data directly for feature conversion
-            let frame_data = &in_map;
+            // Check if we need to resize the frame for the model
+            let (frame_data, inference_width, inference_height) = if width != model_width || height != model_height {
+                gst::debug!(
+                    CAT,
+                    obj = self.obj(),
+                    "Frame size mismatch: input={}x{}, model requires={}x{}, resizing frame",
+                    width, height, model_width, model_height
+                );
+
+                // Resize the frame data
+                let resized_data = if format == Some(VideoFormat::Gray8) {
+                    resize_gray_image(&in_map, width, height, model_width, model_height)
+                        .map_err(|e| {
+                            gst::error!(CAT, obj = self.obj(), "Failed to resize grayscale frame: {}", e);
+                            gst::FlowError::Error
+                        })?
+                } else {
+                    resize_rgb_image(&in_map, width, height, model_width, model_height)
+                        .map_err(|e| {
+                            gst::error!(CAT, obj = self.obj(), "Failed to resize RGB frame: {}", e);
+                            gst::FlowError::Error
+                        })?
+                };
+
+                (resized_data, model_width, model_height)
+            } else {
+                gst::debug!(
+                    CAT,
+                    obj = self.obj(),
+                    "Frame size matches model requirements: {}x{}",
+                    width, height
+                );
+                (in_map.to_vec(), width, height)
+            };
 
             // Pre-allocate features vector with exact capacity
-            let pixel_count = (width * height) as usize;
+            let pixel_count = (inference_width * inference_height) as usize;
             let mut features = Vec::with_capacity(pixel_count);
 
             // Optimized feature conversion - avoid redundant VideoFrameRef creation
@@ -671,7 +759,7 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
             // Standardize object detection: always array of objects for bounding_boxes
             if let Some(bboxes) = result_value.get_mut("bounding_boxes") {
                 if bboxes.is_object() {
-                    // Convert object to array if needed (legacy)
+                    // Convert object to array if needed
                     let mut arr = Vec::new();
                     for (_k, v) in bboxes.as_object().unwrap() {
                         arr.push(v.clone());
@@ -724,18 +812,29 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
                     // Convert grid cells to VideoRegionOfInterestMeta format
                     let mut grid_rois = Vec::new();
                     for cell in grid {
-                        if let (Some(x), Some(y), Some(width), Some(height), Some(score)) = (
+                        if let (Some(x), Some(y), Some(cell_width), Some(cell_height), Some(score)) = (
                             cell["x"].as_u64(),
                             cell["y"].as_u64(),
                             cell["width"].as_u64(),
                             cell["height"].as_u64(),
                             cell["value"].as_f64(),
                         ) {
+                            // Scale coordinates back to original resolution if frame was resized
+                            let (scaled_x, scaled_y, scaled_width, scaled_height) = if inference_width != width || inference_height != height {
+                                scale_bounding_box(
+                                    x as u32, y as u32, cell_width as u32, cell_height as u32,
+                                    inference_width, inference_height,
+                                    width, height
+                                )
+                            } else {
+                                (x as u32, y as u32, cell_width as u32, cell_height as u32)
+                            };
+
                             grid_rois.push(VideoRegionOfInterestMeta {
-                                x: x as u32,
-                                y: y as u32,
-                                width: width as u32,
-                                height: height as u32,
+                                x: scaled_x,
+                                y: scaled_y,
+                                width: scaled_width,
+                                height: scaled_height,
                                 label: format!("{:.1}", score * 100.0), // Store score as label
                             });
                         }
@@ -778,8 +877,8 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
                             Some(value),
                             Some(x),
                             Some(y),
-                            Some(width),
-                            Some(height),
+                            Some(bbox_width),
+                            Some(bbox_height),
                         ) = (
                             bbox["label"].as_str(),
                             bbox["value"].as_f64(),
@@ -788,10 +887,21 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
                             bbox["width"].as_u64(),
                             bbox["height"].as_u64(),
                         ) {
+                            // Scale coordinates back to original resolution if frame was resized
+                            let (scaled_x, scaled_y, scaled_width, scaled_height) = if inference_width != width || inference_height != height {
+                                scale_bounding_box(
+                                    x as u32, y as u32, bbox_width as u32, bbox_height as u32,
+                                    inference_width, inference_height,
+                                    width, height
+                                )
+                            } else {
+                                (x as u32, y as u32, bbox_width as u32, bbox_height as u32)
+                            };
+
                             let mut roi_meta = gst_video::VideoRegionOfInterestMeta::add(
                                 outbuf,
                                 label,
-                                (x as u32, y as u32, width as u32, height as u32),
+                                (scaled_x, scaled_y, scaled_width, scaled_height),
                             );
                             let s = gst::Structure::builder("detection")
                                 .field("label", label)

From adb57d9c7a9224942a259c2f7f01ffd461c2b0eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fernando=20Jim=C3=A9nez=20Moreno?= <ferjmoreno@gmail.com>
Date: Thu, 4 Sep 2025 13:33:18 +0200
Subject: [PATCH 2/3] Fix build

---
 README.md                   | 215 ++++++++++++++++++++++++++++-
 build.rs                    |  21 +++
 examples/image_inference.rs |   1 -
 examples/image_slideshow.rs |   1 -
 examples/video_inference.rs | 107 ++++++++++-----
 src/audio/imp.rs            |   2 +
 src/common.rs               |   2 +
 src/video/imp.rs            | 266 +++++++++++++++++++++++++++++-------
 8 files changed, 522 insertions(+), 93 deletions(-)

diff --git a/README.md b/README.md
index acdb296..0cd67ac 100644
--- a/README.md
+++ b/README.md
@@ -106,12 +106,33 @@ The plugin exposes results and ingestion status through standardized mechanisms:
 > **Note:** Audio elements only emit bus messages; video elements emit both bus messages and metadata.
 
 ## Dependencies
-This plugin depends on:
-* GStreamer 1.20 or newer
+
+### System Dependencies
+This plugin requires additional system libraries for overlay rendering:
+
+**On macOS (with Homebrew):**
+```bash
+brew install pango cairo xorgproto libx11
+```
+
+**Note:** We recommend installing GStreamer from official binaries (see step 2 above) rather than via Homebrew to avoid potential version conflicts.
+
+**On Ubuntu/Debian:**
+```bash
+sudo apt-get update
+sudo apt-get install libpango1.0-dev libcairo2-dev libx11-dev libxext-dev libxrender-dev \
+    libxcb1-dev libxau-dev libxdmcp-dev libxorg-dev
+```
+
+**On CentOS/RHEL/Fedora:**
+```bash
+sudo dnf install pango-devel cairo-devel libX11-devel libXext-devel libXrender-devel \
+    libxcb-devel libXau-devel libXdmcp-devel xorg-x11-proto-devel
+```
+
+### Edge Impulse Rust Dependencies
 * [edge-impulse-runner-rs](https://github.com/edgeimpulse/edge-impulse-runner-rs) - Rust bindings for Edge Impulse Linux SDK
 * [edge-impulse-ffi-rs](https://github.com/edgeimpulse/edge-impulse-ffi-rs) - FFI bindings for Edge Impulse C++ SDK (used by runner-rs)
-* A trained Edge Impulse model file (.eim) or environment variables for FFI mode
-
 
 **Note:** The plugin inherits all build flags and environment variables supported by the underlying FFI crate. See the [edge-impulse-ffi-rs documentation](https://github.com/edgeimpulse/edge-impulse-ffi-rs) for the complete list of supported platforms, accelerators, and build options.
 
@@ -128,13 +149,14 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 Follow the prompts to complete the installation. After installation, restart your terminal to ensure the Rust tools are in your PATH.
 
 ### 2. Install GStreamer
-Download and install GStreamer from the official binaries:
 
 #### macOS
-Download and install both packages:
+Download and install GStreamer from the official binaries:
 - [Runtime installer](https://gstreamer.freedesktop.org/data/pkg/osx/1.24.12/gstreamer-1.0-1.24.12-universal.pkg)
 - [Development installer](https://gstreamer.freedesktop.org/data/pkg/osx/1.24.12/gstreamer-1.0-devel-1.24.12-universal.pkg)
 
+**Note:** Install both packages for complete GStreamer development support.
+
 #### Linux
 Install from your distribution's package manager. For example:
 
@@ -657,6 +679,7 @@ Got element message with name: edge-impulse-inference-result
 Message structure: edge-impulse-inference-result {
     timestamp: (guint64) 9498000000,
     type: "classification",
+    resize_timing_ms: (guint32) 2,
     result: {
         "classification": {
             "no": 0.015625,
@@ -700,6 +723,7 @@ Got element message with name: edge-impulse-inference-result
 Message structure: edge-impulse-inference-result {
     timestamp: (guint64) 1234567890,
     type: "object-detection",
+    resize_timing_ms: (guint32) 3,
     result: {
         "bounding_boxes": [
             {
@@ -722,6 +746,7 @@ Got element message with name: edge-impulse-inference-result
 Message structure: edge-impulse-inference-result {
     timestamp: (guint64) 1234567890,
     type: "classification",
+    resize_timing_ms: (guint32) 1,
     result: {
         "classification": {
             "cat": 0.85,
@@ -738,6 +763,7 @@ Got element message with name: edge-impulse-inference-result
 Message structure: edge-impulse-inference-result {
     timestamp: (guint64) 1234567890,
     type: "anomaly-detection",
+    resize_timing_ms: (guint32) 2,
     result: {
         "anomaly": 0.35,
         "classification": {
@@ -875,6 +901,183 @@ cargo run --example image_slideshow -- --model model.eim --folder ./images --fra
 This will show a 2 FPS slideshow of all images in `./images`, running inference and overlaying results.
 
 ---
+## Troubleshooting
+
+### Build Issues
+
+#### pkg-config Errors (cairo/pango not found)
+If you encounter errors like:
+```
+The system library `cairo` required by crate `cairo-sys-rs` was not found.
+The system library `pango` required by crate `pango-sys` was not found.
+```
+
+**Solution:**
+1. Ensure all system dependencies are installed (see Dependencies section above)
+2. The build.rs script automatically sets the correct PKG_CONFIG_PATH for macOS. If you still encounter issues, manually set the PKG_CONFIG_PATH:
+
+**On macOS:**
+```bash
+export PKG_CONFIG_PATH="/opt/homebrew/opt/libxml2/lib/pkgconfig:/opt/homebrew/lib/pkgconfig:/opt/homebrew/share/pkgconfig"
+```
+
+**On Linux:**
+```bash
+export PKG_CONFIG_PATH="/usr/lib/pkgconfig:/usr/share/pkgconfig:/usr/lib/x86_64-linux-gnu/pkgconfig"
+```
+
+3. Verify pkg-config can find the libraries:
+```bash
+pkg-config --exists cairo && echo "cairo found" || echo "cairo not found"
+pkg-config --exists pango && echo "pango found" || echo "pango not found"
+```
+
+4. If the issue persists, clean and rebuild:
+```bash
+cargo clean
+cargo build --release
+```
+
+#### Missing Model File
+If you get errors about missing Edge Impulse models:
+```
+FFI crate requires a valid Edge Impulse model, but none was found
+```
+
+**Solution:**
+1. Set the EI_MODEL environment variable to point to your model:
+```bash
+export EI_MODEL=/path/to/your/model
+```
+
+2. Or set up Edge Impulse API credentials:
+```bash
+export EI_PROJECT_ID=your-project-id
+export EI_API_KEY=your-api-key
+```
+
+#### TensorFlow Lite Model Issues
+If you get errors like:
+```
+This model cannot run under TensorFlow Lite Micro (EI_CLASSIFIER_TFLITE_LARGEST_ARENA_SIZE is 0)
+```
+
+**Solution:**
+1. For TensorFlow Lite models, you need to set the correct environment variable:
+```bash
+export USE_FULL_TFLITE=1
+```
+
+2. Use the complete build command:
+```bash
+PKG_CONFIG_PATH="/opt/homebrew/opt/libxml2/lib/pkgconfig:/opt/homebrew/lib/pkgconfig:/opt/homebrew/share/pkgconfig" \
+EI_MODEL=/path/to/your/model \
+EI_ENGINE=tflite \
+USE_FULL_TFLITE=1 \
+cargo build --release
+```
+
+3. If the issue persists, clean the cargo cache:
+```bash
+cargo clean
+rm -rf ~/.cargo/git/checkouts/edge-impulse-ffi-rs-*
+```
+
+#### GStreamer Plugin Not Found
+If GStreamer can't find the plugin:
+```
+gst-inspect-1.0 edgeimpulsevideoinfer
+# ERROR: No such element or plugin 'edgeimpulsevideoinfer'
+```
+
+**Solution:**
+1. Ensure the plugin was built successfully
+2. Set the GST_PLUGIN_PATH environment variable:
+```bash
+export GST_PLUGIN_PATH="$(pwd)/target/release"
+```
+
+3. Verify the plugin is available:
+```bash
+gst-inspect-1.0 edgeimpulsevideoinfer
+```
+
+### Runtime Issues
+
+#### Video Inference Not Working
+If video inference fails or produces no results:
+
+1. **Check input format compatibility:**
+```bash
+gst-launch-1.0 videotestsrc ! video/x-raw,format=RGB,width=224,height=224 ! edgeimpulsevideoinfer ! fakesink
+```
+
+2. **Verify model requirements:**
+   - The `edgeimpulsevideoinfer` element automatically resizes frames to match the model's expected input size
+   - Ensure the input format is supported (RGB, GRAY8)
+
+3. **Enable debug output:**
+```bash
+GST_DEBUG=edgeimpulsevideoinfer:4 gst-launch-1.0 ...
+```
+
+#### Audio Inference Issues
+If audio inference fails:
+
+1. **Check audio format:**
+```bash
+gst-launch-1.0 audiotestsrc ! audio/x-raw,format=S16LE,rate=16000,channels=1 ! edgeimpulseaudioinfer ! fakesink
+```
+
+2. **Verify sample rate and channels match model requirements**
+
+#### Overlay Not Displaying
+If the overlay element doesn't show results:
+
+1. **Check that inference is working** (see above)
+2. **Verify overlay element is in the pipeline:**
+```bash
+gst-launch-1.0 videotestsrc ! edgeimpulsevideoinfer ! edgeimpulseoverlay ! autovideosink
+```
+
+3. **Check for X11/display issues on Linux:**
+```bash
+export DISPLAY=:0
+```
+
+### Performance Issues
+
+#### Slow Inference
+If inference is slower than expected:
+
+1. **Check environment variables:**
+```bash
+# Ensure you're using the correct engine
+export EI_ENGINE=tflite  # or eim
+
+# Enable full TensorFlow Lite for better performance
+export USE_FULL_TFLITE=1
+```
+
+2. **For specific accelerators, use FFI crate advanced build flags:**
+```bash
+# Qualcomm QNN example
+export USE_QUALCOMM_QNN=1
+export QNN_SDK_ROOT=/path/to/qnn/sdk
+
+# Other accelerators may have similar environment variables
+# Refer to the [FFI crate documentation](https://github.com/edgeimpulse/edge-impulse-ffi-rs) for your specific hardware
+```
+
+3. **Optimize input resolution:**
+   - Use the minimum resolution required by your model
+   - The automatic resizing feature helps, but smaller inputs are faster
+
+4. **Check system resources:**
+```bash
+htop  # Monitor CPU/memory usage
+```
+
 ## Debugging
 Enable debug output with:
 ```bash
diff --git a/build.rs b/build.rs
index cda12e5..df1e8da 100644
--- a/build.rs
+++ b/build.rs
@@ -1,3 +1,24 @@
 fn main() {
+    // Set PKG_CONFIG_PATH to include all necessary directories for macOS
+    if cfg!(target_os = "macos") {
+        let homebrew_prefix = "/opt/homebrew";
+        let pkg_config_paths = vec![
+            format!("{}/opt/libxml2/lib/pkgconfig", homebrew_prefix),
+            format!("{}/lib/pkgconfig", homebrew_prefix),
+            format!("{}/share/pkgconfig", homebrew_prefix),
+        ];
+
+        let existing_path = std::env::var("PKG_CONFIG_PATH").unwrap_or_default();
+        let new_path = if existing_path.is_empty() {
+            pkg_config_paths.join(":")
+        } else {
+            format!("{}:{}", pkg_config_paths.join(":"), existing_path)
+        };
+
+        println!("cargo:warning=Setting PKG_CONFIG_PATH to: {}", new_path);
+        std::env::set_var("PKG_CONFIG_PATH", &new_path);
+        println!("cargo:rerun-if-env-changed=PKG_CONFIG_PATH");
+    }
+
     gst_plugin_version_helper::info()
 }
diff --git a/examples/image_inference.rs b/examples/image_inference.rs
index 78af2a1..c18efde 100644
--- a/examples/image_inference.rs
+++ b/examples/image_inference.rs
@@ -46,7 +46,6 @@ struct ImageClassifyParams {
     #[arg(short, long, default_value = "RGB")]
     format: String,
 
-
     /// Enable debug output
     #[arg(short, long)]
     debug: bool,
diff --git a/examples/image_slideshow.rs b/examples/image_slideshow.rs
index 3b53410..ee53de0 100644
--- a/examples/image_slideshow.rs
+++ b/examples/image_slideshow.rs
@@ -29,7 +29,6 @@ struct Args {
     #[arg(short = 'n', long, default_value = "100")]
     max_images: usize,
 
-
     /// Slideshow framerate (images per second, default: 1)
     #[arg(long, default_value = "1")]
     framerate: i32,
diff --git a/examples/video_inference.rs b/examples/video_inference.rs
index 372bbda..5c9aa8c 100644
--- a/examples/video_inference.rs
+++ b/examples/video_inference.rs
@@ -40,7 +40,6 @@ struct VideoClassifyParams {
     #[arg(short, long, default_value = "RGB")]
     format: String,
 
-
     /// Enable debug output
     #[arg(short, long)]
     debug: bool,
@@ -49,6 +48,18 @@ struct VideoClassifyParams {
     #[clap(long)]
     threshold: Vec<String>,
 
+    /// Input resolution width (default: use camera default)
+    #[arg(long)]
+    width: Option<u32>,
+
+    /// Input resolution height (default: use camera default)
+    #[arg(long)]
+    height: Option<u32>,
+
+    /// Add videoscale element to downscale input for better performance
+    #[arg(long)]
+    downscale: bool,
+
     /// Enable performance monitoring
     #[arg(long)]
     perf: bool,
@@ -267,17 +278,29 @@ fn create_pipeline(args: &VideoClassifyParams) -> Result<gst::Pipeline, Box<dyn
     // Create pipeline
     let pipeline = gst::Pipeline::new();
 
-    // Create elements
+    // Create elements with performance optimizations
     let src = gst::ElementFactory::make("avfvideosrc")
+        .property("device-index", 0i32) // Use first available camera
         .build()
         .expect("Could not create avfvideosrc element.");
 
     let queue1 = gst::ElementFactory::make("queue")
-        .property("max-size-buffers", 2u32)
+        .property("max-size-buffers", 8u32) // Larger buffer for maximum throughput
         .property_from_str("leaky", "downstream")
         .build()
         .expect("Could not create queue element.");
 
+    // Add videoscale element for downscaling if requested
+    let videoscale = if args.downscale {
+        Some(
+            gst::ElementFactory::make("videoscale")
+                .build()
+                .expect("Could not create videoscale element."),
+        )
+    } else {
+        None
+    };
+
     let videoconvert1 = gst::ElementFactory::make("videoconvert")
         .property("n-threads", 4u32)
         .build()
@@ -288,7 +311,7 @@ fn create_pipeline(args: &VideoClassifyParams) -> Result<gst::Pipeline, Box<dyn
         .expect("Could not create capsfilter element.");
 
     let queue2 = gst::ElementFactory::make("queue")
-        .property("max-size-buffers", 2u32)
+        .property("max-size-buffers", 8u32) // Larger buffer for maximum throughput
         .property_from_str("leaky", "downstream")
         .build()
         .expect("Could not create queue element.");
@@ -339,7 +362,7 @@ fn create_pipeline(args: &VideoClassifyParams) -> Result<gst::Pipeline, Box<dyn
     });
 
     let queue3 = gst::ElementFactory::make("queue")
-        .property("max-size-buffers", 2u32)
+        .property("max-size-buffers", 8u32) // Larger buffer for maximum throughput
         .property_from_str("leaky", "downstream")
         .build()
         .expect("Could not create queue element.");
@@ -364,44 +387,58 @@ fn create_pipeline(args: &VideoClassifyParams) -> Result<gst::Pipeline, Box<dyn
         .expect("Could not create videoconvert element.");
 
     let sink = gst::ElementFactory::make("autovideosink")
-        .property("sync", false)
+        .property("sync", false) // Disable sync for maximum throughput
         .build()
         .expect("Could not create autovideosink element.");
 
     // Set caps - the edgeimpulsevideoinfer element will automatically resize frames
     // to match model requirements and scale results back
-    let caps1_struct = gst::Caps::builder("video/x-raw")
-        .field("format", "RGB")
-        .build();
+    let mut caps_builder = gst::Caps::builder("video/x-raw").field("format", "RGB");
+
+    // Add resolution if specified
+    if let Some(width) = args.width {
+        caps_builder = caps_builder.field("width", width as i32);
+    }
+    if let Some(height) = args.height {
+        caps_builder = caps_builder.field("height", height as i32);
+    }
+
+    let caps1_struct = caps_builder.build();
     caps1.set_property("caps", &caps1_struct);
 
     // Add elements to the pipeline
-    pipeline.add_many(&[
-        &src,
-        &queue1,
-        &videoconvert1,
-        &caps1,
-        &queue2,
-        &classifier,
-        &queue3,
-        &overlay,
-        &videoconvert2,
-        &sink,
-    ])?;
+    let mut elements_to_add = vec![&src, &queue1];
+
+    // Add videoscale if enabled
+    if let Some(ref videoscale_elem) = videoscale {
+        elements_to_add.push(videoscale_elem);
+    }
+
+    elements_to_add.extend(&[&videoconvert1, &caps1, &queue2, &classifier, &queue3]);
+
+    elements_to_add.push(&overlay);
+
+    elements_to_add.push(&videoconvert2);
+    elements_to_add.push(&sink);
+
+    pipeline.add_many(&elements_to_add)?;
 
     // Link the elements
-    gst::Element::link_many(&[
-        &src,
-        &queue1,
-        &videoconvert1,
-        &caps1,
-        &queue2,
-        &classifier,
-        &queue3,
-        &overlay,
-        &videoconvert2,
-        &sink,
-    ])?;
+    let mut elements_to_link = vec![&src, &queue1];
+
+    // Link videoscale if enabled
+    if let Some(ref videoscale_elem) = videoscale {
+        elements_to_link.push(videoscale_elem);
+    }
+
+    elements_to_link.extend(&[&videoconvert1, &caps1, &queue2, &classifier, &queue3]);
+
+    elements_to_link.push(&overlay);
+
+    elements_to_link.push(&videoconvert2);
+    elements_to_link.push(&sink);
+
+    gst::Element::link_many(&elements_to_link)?;
 
     Ok(pipeline)
 }
@@ -479,6 +516,7 @@ fn example_main() -> Result<(), Box<dyn Error>> {
                 if structure.name() == "edge-impulse-video-inference-result" {
                     // Extract timing information for performance monitoring
                     let timing_ms = structure.get::<u32>("timing_ms").unwrap_or(0);
+                    let resize_timing_ms = structure.get::<u32>("resize_timing_ms").unwrap_or(0);
                     perf_metrics.update(timing_ms);
 
                     // Always print performance info
@@ -499,10 +537,11 @@ fn example_main() -> Result<(), Box<dyn Error>> {
                         Duration::ZERO
                     };
 
-                    println!("🎯 Frame #{:3} | FPS: {:4.1} | Inference: {:3}ms | Avg: {:4.1}ms | Confidence: {:.1}%",
+                    println!("🎯 Frame #{:3} | FPS: {:4.1} | Inference: {:3}ms | Resize: {:3}ms | Avg: {:4.1}ms | Confidence: {:.1}%",
                         perf_metrics.frame_count,
                         current_fps,
                         timing_ms,
+                        resize_timing_ms,
                         avg_inference.as_millis(),
                         if let Ok(result) = structure.get::<String>("result") {
                             if let Ok(json) = serde_json::from_str::<serde_json::Value>(&result) {
diff --git a/src/audio/imp.rs b/src/audio/imp.rs
index 13b0bca..74d56ca 100644
--- a/src/audio/imp.rs
+++ b/src/audio/imp.rs
@@ -481,6 +481,7 @@ impl BaseTransformImpl for EdgeImpulseAudioInfer {
                             "classification",
                             result_json,
                             elapsed.as_millis() as u32,
+                            0u32, // No resizing for audio
                         );
 
                         // Post the message
@@ -578,6 +579,7 @@ impl BaseTransformImpl for EdgeImpulseAudioInfer {
                                 "classification",
                                 result_json,
                                 elapsed.as_millis() as u32,
+                                0u32, // No resizing for audio
                             );
 
                             let _ = self.obj().post_message(gst::message::Element::new(s));
diff --git a/src/common.rs b/src/common.rs
index b08d7c2..3a57cd5 100644
--- a/src/common.rs
+++ b/src/common.rs
@@ -408,12 +408,14 @@ pub fn create_inference_message(
     result_type: &str,
     result_json: String,
     timing_ms: u32,
+    resize_timing_ms: u32,
 ) -> gst::Structure {
     gst::Structure::builder(format!("edge-impulse-{element_type}-inference-result"))
         .field("timestamp", timestamp)
         .field("type", result_type)
         .field("result", result_json)
         .field("timing_ms", timing_ms)
+        .field("resize_timing_ms", resize_timing_ms)
         .build()
 }
 
diff --git a/src/video/imp.rs b/src/video/imp.rs
index 995ed51..dd334da 100644
--- a/src/video/imp.rs
+++ b/src/video/imp.rs
@@ -205,9 +205,10 @@ use gstreamer_base::subclass::prelude::*;
 use gstreamer_base::subclass::BaseTransformMode;
 use gstreamer_video as gst_video;
 use gstreamer_video::{VideoFormat, VideoInfo};
-use image::{ImageBuffer, Rgb, RgbImage};
+use image::{ImageBuffer, RgbImage};
 use once_cell::sync::Lazy;
 use std::sync::Mutex;
+use std::time::Instant;
 
 use super::meta::VideoRegionOfInterestMeta;
 use super::VideoAnomalyMeta;
@@ -255,7 +256,43 @@ impl Default for VideoState {
 
 impl VideoState {}
 
-/// Helper function to resize RGB image data
+/// Fast resize for simple cases (powers of 2 scaling)
+fn fast_resize_rgb(
+    data: &[u8],
+    src_width: u32,
+    src_height: u32,
+    dst_width: u32,
+    dst_height: u32,
+) -> Option<Vec<u8>> {
+    // Check if this is a simple power-of-2 downscaling
+    let scale_x = src_width as f32 / dst_width as f32;
+    let scale_y = src_height as f32 / dst_height as f32;
+
+    // Only use fast path for exact power-of-2 scaling (2x, 4x, 8x, etc.)
+    if scale_x == scale_y && scale_x == scale_x.round() && scale_x >= 2.0 {
+        let scale = scale_x as u32;
+        if src_width % scale == 0 && src_height % scale == 0 {
+            let mut result = Vec::with_capacity((dst_width * dst_height * 3) as usize);
+
+            for y in 0..dst_height {
+                for x in 0..dst_width {
+                    let src_x = x * scale;
+                    let src_y = y * scale;
+                    let src_idx = ((src_y * src_width + src_x) * 3) as usize;
+
+                    // Copy RGB pixel
+                    result.push(data[src_idx]); // R
+                    result.push(data[src_idx + 1]); // G
+                    result.push(data[src_idx + 2]); // B
+                }
+            }
+            return Some(result);
+        }
+    }
+    None
+}
+
+/// Helper function to resize RGB image data with optimized performance
 fn resize_rgb_image(
     data: &[u8],
     src_width: u32,
@@ -263,18 +300,71 @@ fn resize_rgb_image(
     dst_width: u32,
     dst_height: u32,
 ) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
-    // Create image buffer from input data
+    // Try fast resize first for simple cases
+    if let Some(result) = fast_resize_rgb(data, src_width, src_height, dst_width, dst_height) {
+        return Ok(result);
+    }
+
+    // Choose filter type based on scaling ratio for optimal performance
+    let filter_type = if (src_width as f32 / dst_width as f32) > 2.0
+        || (src_height as f32 / dst_height as f32) > 2.0
+    {
+        // Large downscaling - use nearest neighbor for speed
+        image::imageops::FilterType::Nearest
+    } else if dst_width > src_width || dst_height > src_height {
+        // Upscaling - use linear for better quality
+        image::imageops::FilterType::Triangle
+    } else {
+        // Small downscaling - use linear for good balance
+        image::imageops::FilterType::Triangle
+    };
+
+    // Create image buffer from input data (avoid unnecessary copy)
     let img: RgbImage = ImageBuffer::from_raw(src_width, src_height, data.to_vec())
         .ok_or("Failed to create image buffer")?;
 
-    // Resize the image
-    let resized = image::imageops::resize(&img, dst_width, dst_height, image::imageops::FilterType::Lanczos3);
+    // Resize the image with optimized filter
+    let resized = image::imageops::resize(&img, dst_width, dst_height, filter_type);
 
     // Convert back to bytes
     Ok(resized.into_raw())
 }
 
-/// Helper function to resize grayscale image data
+/// Fast resize for grayscale images (powers of 2 scaling)
+fn fast_resize_gray(
+    data: &[u8],
+    src_width: u32,
+    src_height: u32,
+    dst_width: u32,
+    dst_height: u32,
+) -> Option<Vec<u8>> {
+    // Check if this is a simple power-of-2 downscaling
+    let scale_x = src_width as f32 / dst_width as f32;
+    let scale_y = src_height as f32 / dst_height as f32;
+
+    // Only use fast path for exact power-of-2 scaling (2x, 4x, 8x, etc.)
+    if scale_x == scale_y && scale_x == scale_x.round() && scale_x >= 2.0 {
+        let scale = scale_x as u32;
+        if src_width % scale == 0 && src_height % scale == 0 {
+            let mut result = Vec::with_capacity((dst_width * dst_height) as usize);
+
+            for y in 0..dst_height {
+                for x in 0..dst_width {
+                    let src_x = x * scale;
+                    let src_y = y * scale;
+                    let src_idx = (src_y * src_width + src_x) as usize;
+
+                    // Copy grayscale pixel
+                    result.push(data[src_idx]);
+                }
+            }
+            return Some(result);
+        }
+    }
+    None
+}
+
+/// Helper function to resize grayscale image data with optimized performance
 fn resize_gray_image(
     data: &[u8],
     src_width: u32,
@@ -282,12 +372,32 @@ fn resize_gray_image(
     dst_width: u32,
     dst_height: u32,
 ) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
+    // Try fast resize first for simple cases
+    if let Some(result) = fast_resize_gray(data, src_width, src_height, dst_width, dst_height) {
+        return Ok(result);
+    }
+
+    // Choose filter type based on scaling ratio for optimal performance
+    let filter_type = if (src_width as f32 / dst_width as f32) > 2.0
+        || (src_height as f32 / dst_height as f32) > 2.0
+    {
+        // Large downscaling - use nearest neighbor for speed
+        image::imageops::FilterType::Nearest
+    } else if dst_width > src_width || dst_height > src_height {
+        // Upscaling - use linear for better quality
+        image::imageops::FilterType::Triangle
+    } else {
+        // Small downscaling - use linear for good balance
+        image::imageops::FilterType::Triangle
+    };
+
     // Create grayscale image buffer from input data
-    let img = ImageBuffer::<image::Luma<u8>, Vec<u8>>::from_raw(src_width, src_height, data.to_vec())
-        .ok_or("Failed to create grayscale image buffer")?;
+    let img =
+        ImageBuffer::<image::Luma<u8>, Vec<u8>>::from_raw(src_width, src_height, data.to_vec())
+            .ok_or("Failed to create grayscale image buffer")?;
 
-    // Resize the image
-    let resized = image::imageops::resize(&img, dst_width, dst_height, image::imageops::FilterType::Lanczos3);
+    // Resize the image with optimized filter
+    let resized = image::imageops::resize(&img, dst_width, dst_height, filter_type);
 
     // Convert back to bytes
     Ok(resized.into_raw())
@@ -390,18 +500,22 @@ impl ObjectImpl for EdgeImpulseVideoInfer {
     }
 
     fn set_property(&self, id: usize, value: &glib::Value, pspec: &glib::ParamSpec) {
-        crate::common::set_common_property::<VideoState>(
-            &self.state,
-            id,
-            value,
-            pspec,
-            &*self.obj(),
-            &CAT,
-        );
+        {
+            crate::common::set_common_property::<VideoState>(
+                &self.state,
+                id,
+                value,
+                pspec,
+                &*self.obj(),
+                &CAT,
+            );
+        }
     }
 
     fn property(&self, id: usize, pspec: &glib::ParamSpec) -> glib::Value {
-        crate::common::get_common_property::<VideoState>(&self.state, id, pspec)
+        {
+            crate::common::get_common_property::<VideoState>(&self.state, id, pspec)
+        }
     }
 }
 
@@ -659,38 +773,66 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
             );
 
             // Check if we need to resize the frame for the model
-            let (frame_data, inference_width, inference_height) = if width != model_width || height != model_height {
+            let (frame_data, inference_width, inference_height, resize_time_ms) = if width
+                != model_width
+                || height != model_height
+            {
                 gst::debug!(
                     CAT,
                     obj = self.obj(),
                     "Frame size mismatch: input={}x{}, model requires={}x{}, resizing frame",
-                    width, height, model_width, model_height
+                    width,
+                    height,
+                    model_width,
+                    model_height
                 );
 
-                // Resize the frame data
+                // Time the resizing operation
+                let resize_start = Instant::now();
                 let resized_data = if format == Some(VideoFormat::Gray8) {
-                    resize_gray_image(&in_map, width, height, model_width, model_height)
-                        .map_err(|e| {
-                            gst::error!(CAT, obj = self.obj(), "Failed to resize grayscale frame: {}", e);
+                    resize_gray_image(&in_map, width, height, model_width, model_height).map_err(
+                        |e| {
+                            gst::error!(
+                                CAT,
+                                obj = self.obj(),
+                                "Failed to resize grayscale frame: {}",
+                                e
+                            );
                             gst::FlowError::Error
-                        })?
+                        },
+                    )?
                 } else {
-                    resize_rgb_image(&in_map, width, height, model_width, model_height)
-                        .map_err(|e| {
+                    resize_rgb_image(&in_map, width, height, model_width, model_height).map_err(
+                        |e| {
                             gst::error!(CAT, obj = self.obj(), "Failed to resize RGB frame: {}", e);
                             gst::FlowError::Error
-                        })?
+                        },
+                    )?
                 };
+                let resize_duration = resize_start.elapsed();
+                let resize_time_ms = resize_duration.as_millis() as u32;
 
-                (resized_data, model_width, model_height)
+                gst::debug!(
+                    CAT,
+                    obj = self.obj(),
+                    "Frame resizing completed in {}ms ({}x{} -> {}x{})",
+                    resize_time_ms,
+                    width,
+                    height,
+                    model_width,
+                    model_height
+                );
+
+                (resized_data, model_width, model_height, resize_time_ms)
             } else {
                 gst::debug!(
                     CAT,
                     obj = self.obj(),
                     "Frame size matches model requirements: {}x{}",
-                    width, height
+                    width,
+                    height
                 );
-                (in_map.to_vec(), width, height)
+                (in_map.to_vec(), width, height, 0u32)
             };
 
             // Pre-allocate features vector with exact capacity
@@ -812,7 +954,13 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
                     // Convert grid cells to VideoRegionOfInterestMeta format
                     let mut grid_rois = Vec::new();
                     for cell in grid {
-                        if let (Some(x), Some(y), Some(cell_width), Some(cell_height), Some(score)) = (
+                        if let (
+                            Some(x),
+                            Some(y),
+                            Some(cell_width),
+                            Some(cell_height),
+                            Some(score),
+                        ) = (
                             cell["x"].as_u64(),
                             cell["y"].as_u64(),
                             cell["width"].as_u64(),
@@ -820,15 +968,21 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
                             cell["value"].as_f64(),
                         ) {
                             // Scale coordinates back to original resolution if frame was resized
-                            let (scaled_x, scaled_y, scaled_width, scaled_height) = if inference_width != width || inference_height != height {
-                                scale_bounding_box(
-                                    x as u32, y as u32, cell_width as u32, cell_height as u32,
-                                    inference_width, inference_height,
-                                    width, height
-                                )
-                            } else {
-                                (x as u32, y as u32, cell_width as u32, cell_height as u32)
-                            };
+                            let (scaled_x, scaled_y, scaled_width, scaled_height) =
+                                if inference_width != width || inference_height != height {
+                                    scale_bounding_box(
+                                        x as u32,
+                                        y as u32,
+                                        cell_width as u32,
+                                        cell_height as u32,
+                                        inference_width,
+                                        inference_height,
+                                        width,
+                                        height,
+                                    )
+                                } else {
+                                    (x as u32, y as u32, cell_width as u32, cell_height as u32)
+                                };
 
                             grid_rois.push(VideoRegionOfInterestMeta {
                                 x: scaled_x,
@@ -858,6 +1012,7 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
                         "visual-anomaly",
                         result_json.clone(),
                         elapsed.as_millis() as u32,
+                        resize_time_ms,
                     );
                     let _ = self.obj().post_message(gst::message::Element::new(s));
                 }
@@ -888,15 +1043,21 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
                             bbox["height"].as_u64(),
                         ) {
                             // Scale coordinates back to original resolution if frame was resized
-                            let (scaled_x, scaled_y, scaled_width, scaled_height) = if inference_width != width || inference_height != height {
-                                scale_bounding_box(
-                                    x as u32, y as u32, bbox_width as u32, bbox_height as u32,
-                                    inference_width, inference_height,
-                                    width, height
-                                )
-                            } else {
-                                (x as u32, y as u32, bbox_width as u32, bbox_height as u32)
-                            };
+                            let (scaled_x, scaled_y, scaled_width, scaled_height) =
+                                if inference_width != width || inference_height != height {
+                                    scale_bounding_box(
+                                        x as u32,
+                                        y as u32,
+                                        bbox_width as u32,
+                                        bbox_height as u32,
+                                        inference_width,
+                                        inference_height,
+                                        width,
+                                        height,
+                                    )
+                                } else {
+                                    (x as u32, y as u32, bbox_width as u32, bbox_height as u32)
+                                };
 
                             let mut roi_meta = gst_video::VideoRegionOfInterestMeta::add(
                                 outbuf,
@@ -916,6 +1077,7 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
                         "object-detection",
                         result_json,
                         elapsed.as_millis() as u32,
+                        resize_time_ms,
                     );
                     let _ = self.obj().post_message(gst::message::Element::new(s));
                 } else {
@@ -956,6 +1118,7 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
                         "classification",
                         result_json,
                         elapsed.as_millis() as u32,
+                        resize_time_ms,
                     );
                     let _ = self.obj().post_message(gst::message::Element::new(s));
                 }
@@ -997,6 +1160,7 @@ impl BaseTransformImpl for EdgeImpulseVideoInfer {
                     "classification",
                     result_json,
                     elapsed.as_millis() as u32,
+                    resize_time_ms,
                 );
                 let _ = self.obj().post_message(gst::message::Element::new(s));
             }

From ce1e63de16e5ac615e791c71af9afec01ca35d19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fernando=20Jim=C3=A9nez=20Moreno?= <ferjmoreno@gmail.com>
Date: Thu, 4 Sep 2025 13:46:45 +0200
Subject: [PATCH 3/3] Fix tests

---
 examples/image_inference.rs |  3 ---
 examples/video_inference.rs | 18 +-----------------
 2 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/examples/image_inference.rs b/examples/image_inference.rs
index c18efde..e289b78 100644
--- a/examples/image_inference.rs
+++ b/examples/image_inference.rs
@@ -99,8 +99,6 @@ fn create_pipeline(args: &ImageClassifyParams) -> Result<gst::Pipeline, Box<dyn
             "caps",
             &gst::Caps::builder("video/x-raw")
                 .field("format", &args.format)
-                .field("width", &args.width)
-                .field("height", &args.height)
                 .build(),
         )
         .build()?;
@@ -218,7 +216,6 @@ fn example_main() -> Result<(), Box<dyn Error>> {
     if let Some(output) = &args.output {
         println!("💾 Output image: {}", output);
     }
-    println!("📐 Image dimensions: {}x{}", args.width, args.height);
     println!("🎨 Format: {}", args.format);
     println!("🔧 Debug mode: {}", args.debug);
 
diff --git a/examples/video_inference.rs b/examples/video_inference.rs
index 5c9aa8c..a1fbc55 100644
--- a/examples/video_inference.rs
+++ b/examples/video_inference.rs
@@ -48,14 +48,6 @@ struct VideoClassifyParams {
     #[clap(long)]
     threshold: Vec<String>,
 
-    /// Input resolution width (default: use camera default)
-    #[arg(long)]
-    width: Option<u32>,
-
-    /// Input resolution height (default: use camera default)
-    #[arg(long)]
-    height: Option<u32>,
-
     /// Add videoscale element to downscale input for better performance
     #[arg(long)]
     downscale: bool,
@@ -393,15 +385,7 @@ fn create_pipeline(args: &VideoClassifyParams) -> Result<gst::Pipeline, Box<dyn
 
     // Set caps - the edgeimpulsevideoinfer element will automatically resize frames
     // to match model requirements and scale results back
-    let mut caps_builder = gst::Caps::builder("video/x-raw").field("format", "RGB");
-
-    // Add resolution if specified
-    if let Some(width) = args.width {
-        caps_builder = caps_builder.field("width", width as i32);
-    }
-    if let Some(height) = args.height {
-        caps_builder = caps_builder.field("height", height as i32);
-    }
+    let caps_builder = gst::Caps::builder("video/x-raw").field("format", "RGB");
 
     let caps1_struct = caps_builder.build();
     caps1.set_property("caps", &caps1_struct);