google-ai-edge
diff --git a/‎runtime/core/BUILD‎
Lines changed: 0 additions & 9 deletions b/‎runtime/core/BUILD‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎runtime/core/engine_impl.cc‎
Lines changed: 4 additions & 27 deletions b/‎runtime/core/engine_impl.cc‎
Lines changed: 4 additions & 27 deletions
diff --git a/‎runtime/core/engine_legacy_impl.cc‎
Lines changed: 2 additions & 25 deletions b/‎runtime/core/engine_legacy_impl.cc‎
Lines changed: 2 additions & 25 deletions
diff --git a/‎runtime/core/session_basic.cc‎
Lines changed: 7 additions & 33 deletions b/‎runtime/core/session_basic.cc‎
Lines changed: 7 additions & 33 deletions
diff --git a/‎runtime/core/session_basic.h‎
Lines changed: 10 additions & 22 deletions b/‎runtime/core/session_basic.h‎
Lines changed: 10 additions & 22 deletions
@@ -41,10 +41,6 @@ cc_library(
         "@com_google_absl//absl/time",
         "@litert//litert/cc:litert_macros",
         "//runtime/components:model_resources",
-        "//runtime/components/preprocessor:audio_preprocessor",
-        "//runtime/components/preprocessor:audio_preprocessor_miniaudio",
-        "//runtime/components/preprocessor:image_preprocessor",
-        "//runtime/components/preprocessor:stb_image_preprocessor",
         "//runtime/engine:engine_interface",
         "//runtime/engine:engine_settings",
         "//runtime/engine:io_types",
@@ -191,8 +187,6 @@ cc_library(
         "//runtime/components:stop_token_detector",
         "//runtime/components:tokenizer",
         "//runtime/components/constrained_decoding:constraint",
-        "//runtime/components/preprocessor:audio_preprocessor",
-        "//runtime/components/preprocessor:image_preprocessor",
         "//runtime/engine:engine_interface",
         "//runtime/engine:engine_settings",
         "//runtime/engine:io_types",
@@ -243,7 +237,6 @@ cc_test(
         "//runtime/components:sentencepiece_tokenizer",
         "//runtime/components:tokenizer",
         "//runtime/components/constrained_decoding:fake_constraint",
-        "//runtime/components/preprocessor:by_pass_audio_preprocessor",
         "//runtime/engine:engine_settings",
         "//runtime/engine:io_types",
         "//runtime/executor:audio_executor_settings",
@@ -276,8 +269,6 @@ cc_library(
         "@com_google_absl//absl/base:nullability",
         "@com_google_absl//absl/status:statusor",
         "//runtime/components:tokenizer",
-        "//runtime/components/preprocessor:audio_preprocessor",
-        "//runtime/components/preprocessor:image_preprocessor",
         "//runtime/engine:engine_interface",
         "//runtime/engine:engine_settings",
         "//runtime/engine:io_types",
 
@@ -32,10 +32,6 @@
 #include "litert/cc/litert_environment.h"  // from @litert
 #include "litert/cc/litert_macros.h"  // from @litert
 #include "runtime/components/model_resources.h"
-#include "runtime/components/preprocessor/audio_preprocessor.h"
-#include "runtime/components/preprocessor/audio_preprocessor_miniaudio.h"
-#include "runtime/components/preprocessor/image_preprocessor.h"
-#include "runtime/components/preprocessor/stb_image_preprocessor.h"
 #include "runtime/core/session_factory.h"
 #include "runtime/engine/engine.h"
 #include "runtime/engine/engine_settings.h"
@@ -72,23 +68,19 @@ class EngineImpl : public Engine {
   explicit EngineImpl(EngineSettings engine_settings,
                       std::unique_ptr<ModelResources> litert_model_resources,
                       std::unique_ptr<Environment> lrt_env,
-                      std::unique_ptr<ImagePreprocessor> image_preprocessor,
                       std::unique_ptr<LlmExecutor> executor,
                       std::unique_ptr<VisionExecutor> vision_executor,
-                      std::unique_ptr<AudioPreprocessor> audio_preprocessor,
                       std::unique_ptr<AudioExecutor> audio_executor,
                       std::optional<BenchmarkInfo> benchmark_info,
                       std::unique_ptr<ThreadPool> worker_thread_pool)
       : engine_settings_(std::move(engine_settings)),
         litert_model_resources_(std::move(litert_model_resources)),
         lrt_env_(std::move(lrt_env)),
-        image_preprocessor_(std::move(image_preprocessor)),
         executor_(std::move(executor)),
         vision_executor_(std::move(vision_executor)),
+        audio_executor_(std::move(audio_executor)),
         stop_token_ids_(),
         sampler_params_(),
-        audio_preprocessor_(std::move(audio_preprocessor)),
-        audio_executor_(std::move(audio_executor)),
         benchmark_info_(std::move(benchmark_info)),
         worker_thread_pool_(std::move(worker_thread_pool)) {}
 
@@ -103,9 +95,7 @@ class EngineImpl : public Engine {
     ABSL_CHECK(litert_model_resources_ != nullptr);
     ASSIGN_OR_RETURN(auto* tokenizer, litert_model_resources_->GetTokenizer());
     return InitializeSession(executor_.get(), tokenizer,
-                             /*image_preprocessor=*/image_preprocessor_.get(),
                              /*vision_executor=*/vision_executor_.get(),
-                             /*audio_preprocessor=*/audio_preprocessor_.get(),
                              /*audio_executor=*/audio_executor_.get(), config,
                              benchmark_info_, worker_thread_pool_.get());
   }
@@ -124,20 +114,16 @@ class EngineImpl : public Engine {
   std::unique_ptr<ModelResources> litert_model_resources_;
   // LiteRT environment.
   std::unique_ptr<Environment> lrt_env_;
-  // Image preprocessor for the vision model.
-  std::unique_ptr<ImagePreprocessor> image_preprocessor_;
   // Shared executor for all sessions.
   std::unique_ptr<LlmExecutor> executor_;
   // Shared vision executor for all sessions.
   std::unique_ptr<VisionExecutor> vision_executor_;
+  // shared audio executor for all sessions.
+  std::unique_ptr<AudioExecutor> audio_executor_;
   // Default stop token ids for all sessions loaded from the model file.
   std::vector<std::vector<int>> stop_token_ids_;
   proto::SamplerParameters sampler_params_;
 
-  // Shared audio preprocessor and executor for all sessions.
-  std::unique_ptr<AudioPreprocessor> audio_preprocessor_;
-  std::unique_ptr<AudioExecutor> audio_executor_;
-
   // Benchmark info for the engine.
   std::optional<BenchmarkInfo> benchmark_info_;
 
@@ -247,7 +233,6 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
   // TODO - b/436674053: Modularize the executor creation logic into a
   // separate executor class, and have unit test for it.
   std::unique_ptr<VisionExecutor> vision_executor;
-  std::unique_ptr<ImagePreprocessor> image_preprocessor;
   if (engine_settings.GetVisionExecutorSettings().has_value()) {
     ASSIGN_OR_RETURN(
         auto vision_executor_settings,
@@ -258,13 +243,9 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
             /*adapter_backend=*/Backend::CPU));
     ASSIGN_OR_RETURN(vision_executor, VisionLiteRtCompiledModelExecutor::Create(
                                           vision_executor_settings, *lrt_env));
-    // Create the image preprocessor for processing the image input only if
-    // vision executor is enabled.
-    image_preprocessor = std::make_unique<StbImagePreprocessor>();
   }
 
   std::unique_ptr<AudioExecutor> audio_executor;
-  std::unique_ptr<AudioPreprocessor> audio_preprocessor;
   if (engine_settings.GetAudioExecutorSettings().has_value()) {
     ASSIGN_OR_RETURN(
         auto audio_executor_settings,
@@ -274,9 +255,6 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
             engine_settings.GetAudioExecutorSettings()->GetBackend()));
     ASSIGN_OR_RETURN(audio_executor, AudioLiteRtCompiledModelExecutor::Create(
                                          audio_executor_settings, *lrt_env));
-    ASSIGN_OR_RETURN(audio_preprocessor,
-                     AudioPreprocessorMiniAudio::Create(
-                         AudioPreprocessorConfig::CreateDefaultUsmConfig()));
   }
 
   if (benchmark_info.has_value()) {
@@ -290,8 +268,7 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
                                    /*max_num_threads=*/1);
   auto llm_impl = std::make_unique<EngineImpl>(
       std::move(engine_settings), std::move(model_resources),
-      std::move(lrt_env), std::move(image_preprocessor), std::move(executor),
-      std::move(vision_executor), std::move(audio_preprocessor),
+      std::move(lrt_env), std::move(executor), std::move(vision_executor),
       std::move(audio_executor), std::move(benchmark_info),
       std::move(worker_thread_pool));
 
 
@@ -33,10 +33,6 @@
 #include "third_party/odml/infra/genai/inference/executor/llm_litert_xnnpack_executor.h"
 #include "litert/cc/litert_environment.h"  // from @litert
 #include "litert/cc/litert_macros.h"  // from @litert
-#include "runtime/components/preprocessor/audio_preprocessor.h"
-#include "runtime/components/preprocessor/audio_preprocessor_miniaudio.h"
-#include "runtime/components/preprocessor/image_preprocessor.h"
-#include "runtime/components/preprocessor/stb_image_preprocessor.h"
 #include "runtime/components/sentencepiece_tokenizer.h"
 #include "runtime/components/tokenizer.h"
 #include "runtime/core/session_factory.h"
@@ -121,9 +117,7 @@ class EngineImpl : public Engine {
       std::unique_ptr<oi::ExecutorModelResources> model_resources,
       std::unique_ptr<LlmExecutor> executor,
       std::unique_ptr<Tokenizer> task_tokenizer, Tokenizer* tokenizer,
-      std::unique_ptr<ImagePreprocessor> image_preprocessor,
       std::unique_ptr<VisionExecutor> vision_executor,
-      std::unique_ptr<AudioPreprocessor> audio_preprocessor,
       std::unique_ptr<AudioExecutor> audio_executor,
       std::optional<BenchmarkInfo> benchmark_info,
       std::unique_ptr<ThreadPool> worker_thread_pool)
@@ -133,9 +127,7 @@ class EngineImpl : public Engine {
         executor_(std::move(executor)),
         task_tokenizer_(std::move(task_tokenizer)),
         tokenizer_(tokenizer),
-        image_preprocessor_(std::move(image_preprocessor)),
         vision_executor_(std::move(vision_executor)),
-        audio_preprocessor_(std::move(audio_preprocessor)),
         audio_executor_(std::move(audio_executor)),
         stop_token_ids_(),
         benchmark_info_(std::move(benchmark_info)),
@@ -152,8 +144,7 @@ class EngineImpl : public Engine {
     config.GetMutableSamplerParams().set_type(
         proto::SamplerParameters::TYPE_UNSPECIFIED);
     return InitializeSession(executor_.get(), tokenizer_,
-                             image_preprocessor_.get(), vision_executor_.get(),
-                             audio_preprocessor_.get(), audio_executor_.get(),
+                             vision_executor_.get(), audio_executor_.get(),
                              config, benchmark_info_,
                              worker_thread_pool_.get());
   }
@@ -188,15 +179,9 @@ class EngineImpl : public Engine {
   // used in CreateSession().
   Tokenizer* tokenizer_ = nullptr;
 
-  // Image preprocessor for the vision model.
-  std::unique_ptr<ImagePreprocessor> image_preprocessor_;
-
   // Vision executor for all sessions.
   std::unique_ptr<VisionExecutor> vision_executor_;
 
-  // Audio executor for all sessions.
-  std::unique_ptr<AudioPreprocessor> audio_preprocessor_;
-
   // Audio executor for all sessions.
   std::unique_ptr<AudioExecutor> audio_executor_;
 
@@ -272,7 +257,6 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
       auto lrt_env, Environment::Create(std::vector<Environment::Option>()));
 
   std::unique_ptr<VisionExecutor> vision_executor;
-  std::unique_ptr<ImagePreprocessor> image_preprocessor;
   if (engine_settings.GetVisionExecutorSettings().has_value()) {
     ASSIGN_OR_RETURN(
         auto vision_executor_settings,
@@ -283,12 +267,9 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
             /*adapter_backend=*/Backend::CPU));
     ASSIGN_OR_RETURN(vision_executor, VisionLiteRtCompiledModelExecutor::Create(
                                           vision_executor_settings, lrt_env));
-    // Create the image preprocessor for processing the image input.
-    image_preprocessor = std::make_unique<StbImagePreprocessor>();
   }
 
   std::unique_ptr<AudioExecutor> audio_executor;
-  std::unique_ptr<AudioPreprocessor> audio_preprocessor;
   if (engine_settings.GetAudioExecutorSettings().has_value()) {
     ASSIGN_OR_RETURN(
         auto audio_executor_settings,
@@ -299,9 +280,6 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
 
     ASSIGN_OR_RETURN(audio_executor, AudioLiteRtCompiledModelExecutor::Create(
                                          audio_executor_settings, lrt_env));
-    ASSIGN_OR_RETURN(audio_preprocessor,
-                     AudioPreprocessorMiniAudio::Create(
-                         AudioPreprocessorConfig::CreateDefaultUsmConfig()));
   }
 
   if (benchmark_info.has_value()) {
@@ -327,8 +305,7 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
       std::move(engine_settings),
       std::make_unique<Environment>(std::move(lrt_env)),
       std::move(model_resources), std::move(executor),
-      std::move(task_tokenizer), tokenizer, std::move(image_preprocessor),
-      std::move(vision_executor), std::move(audio_preprocessor),
+      std::move(task_tokenizer), tokenizer, std::move(vision_executor),
       std::move(audio_executor), std::move(benchmark_info),
       std::move(worker_thread_pool));
   return llm_impl;
 
@@ -35,8 +35,6 @@
 #include "litert/cc/litert_layout.h"  // from @litert
 #include "litert/cc/litert_model.h"  // from @litert
 #include "litert/cc/litert_tensor_buffer.h"  // from @litert
-#include "runtime/components/preprocessor/audio_preprocessor.h"
-#include "runtime/components/preprocessor/image_preprocessor.h"
 #include "runtime/components/sampler.h"
 #include "runtime/components/sampler_factory.h"
 #include "runtime/components/stop_token_detector.h"
@@ -155,8 +153,7 @@ absl::StatusOr<T> CombineExecutorDataImpl(std::vector<T>& executor_data) {
 // static
 absl::StatusOr<std::unique_ptr<SessionBasic>> SessionBasic::Create(
     LlmExecutor* executor, Tokenizer* tokenizer,
-    ImagePreprocessor* image_preprocessor, VisionExecutor* vision_executor,
-    AudioPreprocessor* audio_preprocessor, AudioExecutor* audio_executor,
+    VisionExecutor* vision_executor, AudioExecutor* audio_executor,
     const SessionConfig& session_config,
     std::optional<BenchmarkInfo> benchmark_info,
     ThreadPool* worker_thread_pool) {
@@ -185,9 +182,8 @@ absl::StatusOr<std::unique_ptr<SessionBasic>> SessionBasic::Create(
         stop_token_detector.AddStopTokenSequence(stop_token_sequence));
   }
   return absl::WrapUnique(new SessionBasic(
-      executor, tokenizer, image_preprocessor, vision_executor,
-      audio_preprocessor, audio_executor, std::move(sampler), session_config,
-      benchmark_info, worker_thread_pool, stop_token_detector));
+      executor, tokenizer, vision_executor, audio_executor, std::move(sampler),
+      session_config, benchmark_info, worker_thread_pool, stop_token_detector));
 }
 
 SessionBasic::~SessionBasic() {
@@ -404,38 +400,16 @@ absl::StatusOr<std::vector<InputData>> SessionBasic::PreprocessContents(
         ASSIGN_OR_RETURN(auto input_image_copy, input_image->CreateCopy());
         preprocessed_contents.emplace_back(std::move(input_image_copy));
       } else {
-        if (image_preprocessor_ == nullptr) {
-          return absl::InternalError("Image preprocessor is not available.");
-        }
-        ASSIGN_OR_RETURN(const auto& target_dims_vector,
-                         vision_executor_->GetExpectedInputDimension());
-
-        Dimensions target_dims(target_dims_vector.begin(),
-                               target_dims_vector.end());
-
-        ImagePreprocessParameter input_preprocess_parameters;
-        input_preprocess_parameters.SetTargetDimensions(target_dims);
-
-        ASSIGN_OR_RETURN(auto preprocessed_image,
-                         image_preprocessor_->Preprocess(
-                             *input_image, input_preprocess_parameters));
-
-        preprocessed_contents.emplace_back(
-            InputImage(std::move(preprocessed_image)));
+        return absl::InternalError(
+            "Image must be preprocessed before being used in SessionBasic.");
       }
     } else if (const auto* input_audio = std::get_if<InputAudio>(&content)) {
       if (input_audio->IsTensorBuffer()) {
         ASSIGN_OR_RETURN(auto input_audio_copy, input_audio->CreateCopy());
         preprocessed_contents.emplace_back(std::move(input_audio_copy));
       } else {
-        if (audio_preprocessor_ == nullptr) {
-          return absl::InternalError("Audio preprocessor is not available.");
-        }
-        ASSIGN_OR_RETURN(auto preprocessed_audio,
-                         audio_preprocessor_->Preprocess(*input_audio));
-        audio_preprocessor_->Reset();
-        preprocessed_contents.emplace_back(
-            InputAudio(std::move(preprocessed_audio)));
+        return absl::InternalError(
+            "Audio must be preprocessed before being used in SessionBasic.");
       }
     }
   }
 
@@ -25,8 +25,6 @@
 #include "absl/status/status.h"  // from @com_google_absl
 #include "absl/status/statusor.h"  // from @com_google_absl
 #include "absl/strings/string_view.h"  // from @com_google_absl
-#include "runtime/components/preprocessor/audio_preprocessor.h"
-#include "runtime/components/preprocessor/image_preprocessor.h"
 #include "runtime/components/sampler.h"
 #include "runtime/components/stop_token_detector.h"
 #include "runtime/components/tokenizer.h"
@@ -50,18 +48,15 @@ class SessionBasic : public Engine::Session {
   // Creates a SessionBasic object.
   // - executor: The initialized LLM Executor to call.
   // - tokenizer: The tokenizer to encode/decode the text into token ids.
-  // - image_preprocessor: The image preprocessor to preprocess the image input.
   // - vision_executor: The vision executor to encode the image input.
-  // - audio_preprocessor: The audio preprocessor to preprocess the audio input.
   // - audio_executor: The audio executor to encode the audio input.
   // - stop_token_ids: The token ids to stop the decoding process.
   // - sampler_params: The sampler parameters used for decoding. Note that if
   //   the sampler_params.type is TYPE_UNSPECIFIED, the sampling logic will be
   //   handled by the LLM Executor.
   static absl::StatusOr<std::unique_ptr<SessionBasic>> Create(
       LlmExecutor* absl_nonnull executor, Tokenizer* absl_nonnull tokenizer,
-      ImagePreprocessor* image_preprocessor, VisionExecutor* vision_executor,
-      AudioPreprocessor* audio_preprocessor, AudioExecutor* audio_executor,
+      VisionExecutor* vision_executor, AudioExecutor* audio_executor,
       const SessionConfig& session_config,
       std::optional<BenchmarkInfo> benchmark_info,
       ThreadPool* absl_nonnull worker_thread_pool);
@@ -188,19 +183,18 @@ class SessionBasic : public Engine::Session {
       std::vector<ExecutorVisionData>& executor_data);
 
  private:
-  explicit SessionBasic(
-      LlmExecutor* absl_nonnull executor, Tokenizer* absl_nonnull tokenizer,
-      ImagePreprocessor* image_preprocessor, VisionExecutor* vision_executor,
-      AudioPreprocessor* audio_preprocessor, AudioExecutor* audio_executor,
-      std::unique_ptr<Sampler> sampler, const SessionConfig& session_config,
-      std::optional<BenchmarkInfo> benchmark_info,
-      ThreadPool* absl_nonnull worker_thread_pool,
-      const StopTokenDetector& stop_token_detector)
+  explicit SessionBasic(LlmExecutor* absl_nonnull executor,
+                        Tokenizer* absl_nonnull tokenizer,
+                        VisionExecutor* vision_executor,
+                        AudioExecutor* audio_executor,
+                        std::unique_ptr<Sampler> sampler,
+                        const SessionConfig& session_config,
+                        std::optional<BenchmarkInfo> benchmark_info,
+                        ThreadPool* absl_nonnull worker_thread_pool,
+                        const StopTokenDetector& stop_token_detector)
       : executor_(*executor),
         tokenizer_(*tokenizer),
-        image_preprocessor_(image_preprocessor),
         vision_executor_(vision_executor),
-        audio_preprocessor_(audio_preprocessor),
         audio_executor_(audio_executor),
         sampler_(std::move(sampler)),
         session_config_(session_config),
@@ -230,15 +224,9 @@ class SessionBasic : public Engine::Session {
   // The tokenizer used for converting between text to token ids.
   Tokenizer& tokenizer_;
 
-  // The image preprocessor used for preprocessing the image input.
-  ImagePreprocessor* image_preprocessor_;
-
   // The vision executor used for run the LLM for prefill/decode.
   VisionExecutor* vision_executor_;
 
-  // The audio preprocessor used for preprocessing the audio input.
-  AudioPreprocessor* audio_preprocessor_;
-
   // The audio executor used for run the LLM for prefill/decode.
   AudioExecutor* audio_executor_;