Skip to content

Commit 81940ef

Browse files
ai-edge-botcopybara-github
authored andcommitted
Remove preprocessors from SessionBasic.
LiteRT-LM-PiperOrigin-RevId: 820874637
1 parent f9a9bec commit 81940ef

File tree

9 files changed

+69
-239
lines changed

9 files changed

+69
-239
lines changed

runtime/core/BUILD

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,6 @@ cc_library(
4141
"@com_google_absl//absl/time",
4242
"@litert//litert/cc:litert_macros",
4343
"//runtime/components:model_resources",
44-
"//runtime/components/preprocessor:audio_preprocessor",
45-
"//runtime/components/preprocessor:audio_preprocessor_miniaudio",
46-
"//runtime/components/preprocessor:image_preprocessor",
47-
"//runtime/components/preprocessor:stb_image_preprocessor",
4844
"//runtime/engine:engine_interface",
4945
"//runtime/engine:engine_settings",
5046
"//runtime/engine:io_types",
@@ -191,8 +187,6 @@ cc_library(
191187
"//runtime/components:stop_token_detector",
192188
"//runtime/components:tokenizer",
193189
"//runtime/components/constrained_decoding:constraint",
194-
"//runtime/components/preprocessor:audio_preprocessor",
195-
"//runtime/components/preprocessor:image_preprocessor",
196190
"//runtime/engine:engine_interface",
197191
"//runtime/engine:engine_settings",
198192
"//runtime/engine:io_types",
@@ -243,7 +237,6 @@ cc_test(
243237
"//runtime/components:sentencepiece_tokenizer",
244238
"//runtime/components:tokenizer",
245239
"//runtime/components/constrained_decoding:fake_constraint",
246-
"//runtime/components/preprocessor:by_pass_audio_preprocessor",
247240
"//runtime/engine:engine_settings",
248241
"//runtime/engine:io_types",
249242
"//runtime/executor:audio_executor_settings",
@@ -276,8 +269,6 @@ cc_library(
276269
"@com_google_absl//absl/base:nullability",
277270
"@com_google_absl//absl/status:statusor",
278271
"//runtime/components:tokenizer",
279-
"//runtime/components/preprocessor:audio_preprocessor",
280-
"//runtime/components/preprocessor:image_preprocessor",
281272
"//runtime/engine:engine_interface",
282273
"//runtime/engine:engine_settings",
283274
"//runtime/engine:io_types",

runtime/core/engine_impl.cc

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,6 @@
3232
#include "litert/cc/litert_environment.h" // from @litert
3333
#include "litert/cc/litert_macros.h" // from @litert
3434
#include "runtime/components/model_resources.h"
35-
#include "runtime/components/preprocessor/audio_preprocessor.h"
36-
#include "runtime/components/preprocessor/audio_preprocessor_miniaudio.h"
37-
#include "runtime/components/preprocessor/image_preprocessor.h"
38-
#include "runtime/components/preprocessor/stb_image_preprocessor.h"
3935
#include "runtime/core/session_factory.h"
4036
#include "runtime/engine/engine.h"
4137
#include "runtime/engine/engine_settings.h"
@@ -72,23 +68,19 @@ class EngineImpl : public Engine {
7268
explicit EngineImpl(EngineSettings engine_settings,
7369
std::unique_ptr<ModelResources> litert_model_resources,
7470
std::unique_ptr<Environment> lrt_env,
75-
std::unique_ptr<ImagePreprocessor> image_preprocessor,
7671
std::unique_ptr<LlmExecutor> executor,
7772
std::unique_ptr<VisionExecutor> vision_executor,
78-
std::unique_ptr<AudioPreprocessor> audio_preprocessor,
7973
std::unique_ptr<AudioExecutor> audio_executor,
8074
std::optional<BenchmarkInfo> benchmark_info,
8175
std::unique_ptr<ThreadPool> worker_thread_pool)
8276
: engine_settings_(std::move(engine_settings)),
8377
litert_model_resources_(std::move(litert_model_resources)),
8478
lrt_env_(std::move(lrt_env)),
85-
image_preprocessor_(std::move(image_preprocessor)),
8679
executor_(std::move(executor)),
8780
vision_executor_(std::move(vision_executor)),
81+
audio_executor_(std::move(audio_executor)),
8882
stop_token_ids_(),
8983
sampler_params_(),
90-
audio_preprocessor_(std::move(audio_preprocessor)),
91-
audio_executor_(std::move(audio_executor)),
9284
benchmark_info_(std::move(benchmark_info)),
9385
worker_thread_pool_(std::move(worker_thread_pool)) {}
9486

@@ -103,9 +95,7 @@ class EngineImpl : public Engine {
10395
ABSL_CHECK(litert_model_resources_ != nullptr);
10496
ASSIGN_OR_RETURN(auto* tokenizer, litert_model_resources_->GetTokenizer());
10597
return InitializeSession(executor_.get(), tokenizer,
106-
/*image_preprocessor=*/image_preprocessor_.get(),
10798
/*vision_executor=*/vision_executor_.get(),
108-
/*audio_preprocessor=*/audio_preprocessor_.get(),
10999
/*audio_executor=*/audio_executor_.get(), config,
110100
benchmark_info_, worker_thread_pool_.get());
111101
}
@@ -124,20 +114,16 @@ class EngineImpl : public Engine {
124114
std::unique_ptr<ModelResources> litert_model_resources_;
125115
// LiteRT environment.
126116
std::unique_ptr<Environment> lrt_env_;
127-
// Image preprocessor for the vision model.
128-
std::unique_ptr<ImagePreprocessor> image_preprocessor_;
129117
// Shared executor for all sessions.
130118
std::unique_ptr<LlmExecutor> executor_;
131119
// Shared vision executor for all sessions.
132120
std::unique_ptr<VisionExecutor> vision_executor_;
121+
// shared audio executor for all sessions.
122+
std::unique_ptr<AudioExecutor> audio_executor_;
133123
// Default stop token ids for all sessions loaded from the model file.
134124
std::vector<std::vector<int>> stop_token_ids_;
135125
proto::SamplerParameters sampler_params_;
136126

137-
// Shared audio preprocessor and executor for all sessions.
138-
std::unique_ptr<AudioPreprocessor> audio_preprocessor_;
139-
std::unique_ptr<AudioExecutor> audio_executor_;
140-
141127
// Benchmark info for the engine.
142128
std::optional<BenchmarkInfo> benchmark_info_;
143129

@@ -247,7 +233,6 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
247233
// TODO - b/436674053: Modularize the executor creation logic into a
248234
// separate executor class, and have unit test for it.
249235
std::unique_ptr<VisionExecutor> vision_executor;
250-
std::unique_ptr<ImagePreprocessor> image_preprocessor;
251236
if (engine_settings.GetVisionExecutorSettings().has_value()) {
252237
ASSIGN_OR_RETURN(
253238
auto vision_executor_settings,
@@ -258,13 +243,9 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
258243
/*adapter_backend=*/Backend::CPU));
259244
ASSIGN_OR_RETURN(vision_executor, VisionLiteRtCompiledModelExecutor::Create(
260245
vision_executor_settings, *lrt_env));
261-
// Create the image preprocessor for processing the image input only if
262-
// vision executor is enabled.
263-
image_preprocessor = std::make_unique<StbImagePreprocessor>();
264246
}
265247

266248
std::unique_ptr<AudioExecutor> audio_executor;
267-
std::unique_ptr<AudioPreprocessor> audio_preprocessor;
268249
if (engine_settings.GetAudioExecutorSettings().has_value()) {
269250
ASSIGN_OR_RETURN(
270251
auto audio_executor_settings,
@@ -274,9 +255,6 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
274255
engine_settings.GetAudioExecutorSettings()->GetBackend()));
275256
ASSIGN_OR_RETURN(audio_executor, AudioLiteRtCompiledModelExecutor::Create(
276257
audio_executor_settings, *lrt_env));
277-
ASSIGN_OR_RETURN(audio_preprocessor,
278-
AudioPreprocessorMiniAudio::Create(
279-
AudioPreprocessorConfig::CreateDefaultUsmConfig()));
280258
}
281259

282260
if (benchmark_info.has_value()) {
@@ -290,8 +268,7 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
290268
/*max_num_threads=*/1);
291269
auto llm_impl = std::make_unique<EngineImpl>(
292270
std::move(engine_settings), std::move(model_resources),
293-
std::move(lrt_env), std::move(image_preprocessor), std::move(executor),
294-
std::move(vision_executor), std::move(audio_preprocessor),
271+
std::move(lrt_env), std::move(executor), std::move(vision_executor),
295272
std::move(audio_executor), std::move(benchmark_info),
296273
std::move(worker_thread_pool));
297274

runtime/core/engine_legacy_impl.cc

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,6 @@
3333
#include "third_party/odml/infra/genai/inference/executor/llm_litert_xnnpack_executor.h"
3434
#include "litert/cc/litert_environment.h" // from @litert
3535
#include "litert/cc/litert_macros.h" // from @litert
36-
#include "runtime/components/preprocessor/audio_preprocessor.h"
37-
#include "runtime/components/preprocessor/audio_preprocessor_miniaudio.h"
38-
#include "runtime/components/preprocessor/image_preprocessor.h"
39-
#include "runtime/components/preprocessor/stb_image_preprocessor.h"
4036
#include "runtime/components/sentencepiece_tokenizer.h"
4137
#include "runtime/components/tokenizer.h"
4238
#include "runtime/core/session_factory.h"
@@ -121,9 +117,7 @@ class EngineImpl : public Engine {
121117
std::unique_ptr<oi::ExecutorModelResources> model_resources,
122118
std::unique_ptr<LlmExecutor> executor,
123119
std::unique_ptr<Tokenizer> task_tokenizer, Tokenizer* tokenizer,
124-
std::unique_ptr<ImagePreprocessor> image_preprocessor,
125120
std::unique_ptr<VisionExecutor> vision_executor,
126-
std::unique_ptr<AudioPreprocessor> audio_preprocessor,
127121
std::unique_ptr<AudioExecutor> audio_executor,
128122
std::optional<BenchmarkInfo> benchmark_info,
129123
std::unique_ptr<ThreadPool> worker_thread_pool)
@@ -133,9 +127,7 @@ class EngineImpl : public Engine {
133127
executor_(std::move(executor)),
134128
task_tokenizer_(std::move(task_tokenizer)),
135129
tokenizer_(tokenizer),
136-
image_preprocessor_(std::move(image_preprocessor)),
137130
vision_executor_(std::move(vision_executor)),
138-
audio_preprocessor_(std::move(audio_preprocessor)),
139131
audio_executor_(std::move(audio_executor)),
140132
stop_token_ids_(),
141133
benchmark_info_(std::move(benchmark_info)),
@@ -152,8 +144,7 @@ class EngineImpl : public Engine {
152144
config.GetMutableSamplerParams().set_type(
153145
proto::SamplerParameters::TYPE_UNSPECIFIED);
154146
return InitializeSession(executor_.get(), tokenizer_,
155-
image_preprocessor_.get(), vision_executor_.get(),
156-
audio_preprocessor_.get(), audio_executor_.get(),
147+
vision_executor_.get(), audio_executor_.get(),
157148
config, benchmark_info_,
158149
worker_thread_pool_.get());
159150
}
@@ -188,15 +179,9 @@ class EngineImpl : public Engine {
188179
// used in CreateSession().
189180
Tokenizer* tokenizer_ = nullptr;
190181

191-
// Image preprocessor for the vision model.
192-
std::unique_ptr<ImagePreprocessor> image_preprocessor_;
193-
194182
// Vision executor for all sessions.
195183
std::unique_ptr<VisionExecutor> vision_executor_;
196184

197-
// Audio executor for all sessions.
198-
std::unique_ptr<AudioPreprocessor> audio_preprocessor_;
199-
200185
// Audio executor for all sessions.
201186
std::unique_ptr<AudioExecutor> audio_executor_;
202187

@@ -272,7 +257,6 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
272257
auto lrt_env, Environment::Create(std::vector<Environment::Option>()));
273258

274259
std::unique_ptr<VisionExecutor> vision_executor;
275-
std::unique_ptr<ImagePreprocessor> image_preprocessor;
276260
if (engine_settings.GetVisionExecutorSettings().has_value()) {
277261
ASSIGN_OR_RETURN(
278262
auto vision_executor_settings,
@@ -283,12 +267,9 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
283267
/*adapter_backend=*/Backend::CPU));
284268
ASSIGN_OR_RETURN(vision_executor, VisionLiteRtCompiledModelExecutor::Create(
285269
vision_executor_settings, lrt_env));
286-
// Create the image preprocessor for processing the image input.
287-
image_preprocessor = std::make_unique<StbImagePreprocessor>();
288270
}
289271

290272
std::unique_ptr<AudioExecutor> audio_executor;
291-
std::unique_ptr<AudioPreprocessor> audio_preprocessor;
292273
if (engine_settings.GetAudioExecutorSettings().has_value()) {
293274
ASSIGN_OR_RETURN(
294275
auto audio_executor_settings,
@@ -299,9 +280,6 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
299280

300281
ASSIGN_OR_RETURN(audio_executor, AudioLiteRtCompiledModelExecutor::Create(
301282
audio_executor_settings, lrt_env));
302-
ASSIGN_OR_RETURN(audio_preprocessor,
303-
AudioPreprocessorMiniAudio::Create(
304-
AudioPreprocessorConfig::CreateDefaultUsmConfig()));
305283
}
306284

307285
if (benchmark_info.has_value()) {
@@ -327,8 +305,7 @@ absl::StatusOr<std::unique_ptr<Engine>> Engine::CreateEngine(
327305
std::move(engine_settings),
328306
std::make_unique<Environment>(std::move(lrt_env)),
329307
std::move(model_resources), std::move(executor),
330-
std::move(task_tokenizer), tokenizer, std::move(image_preprocessor),
331-
std::move(vision_executor), std::move(audio_preprocessor),
308+
std::move(task_tokenizer), tokenizer, std::move(vision_executor),
332309
std::move(audio_executor), std::move(benchmark_info),
333310
std::move(worker_thread_pool));
334311
return llm_impl;

runtime/core/session_basic.cc

Lines changed: 7 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@
3535
#include "litert/cc/litert_layout.h" // from @litert
3636
#include "litert/cc/litert_model.h" // from @litert
3737
#include "litert/cc/litert_tensor_buffer.h" // from @litert
38-
#include "runtime/components/preprocessor/audio_preprocessor.h"
39-
#include "runtime/components/preprocessor/image_preprocessor.h"
4038
#include "runtime/components/sampler.h"
4139
#include "runtime/components/sampler_factory.h"
4240
#include "runtime/components/stop_token_detector.h"
@@ -155,8 +153,7 @@ absl::StatusOr<T> CombineExecutorDataImpl(std::vector<T>& executor_data) {
155153
// static
156154
absl::StatusOr<std::unique_ptr<SessionBasic>> SessionBasic::Create(
157155
LlmExecutor* executor, Tokenizer* tokenizer,
158-
ImagePreprocessor* image_preprocessor, VisionExecutor* vision_executor,
159-
AudioPreprocessor* audio_preprocessor, AudioExecutor* audio_executor,
156+
VisionExecutor* vision_executor, AudioExecutor* audio_executor,
160157
const SessionConfig& session_config,
161158
std::optional<BenchmarkInfo> benchmark_info,
162159
ThreadPool* worker_thread_pool) {
@@ -185,9 +182,8 @@ absl::StatusOr<std::unique_ptr<SessionBasic>> SessionBasic::Create(
185182
stop_token_detector.AddStopTokenSequence(stop_token_sequence));
186183
}
187184
return absl::WrapUnique(new SessionBasic(
188-
executor, tokenizer, image_preprocessor, vision_executor,
189-
audio_preprocessor, audio_executor, std::move(sampler), session_config,
190-
benchmark_info, worker_thread_pool, stop_token_detector));
185+
executor, tokenizer, vision_executor, audio_executor, std::move(sampler),
186+
session_config, benchmark_info, worker_thread_pool, stop_token_detector));
191187
}
192188

193189
SessionBasic::~SessionBasic() {
@@ -404,38 +400,16 @@ absl::StatusOr<std::vector<InputData>> SessionBasic::PreprocessContents(
404400
ASSIGN_OR_RETURN(auto input_image_copy, input_image->CreateCopy());
405401
preprocessed_contents.emplace_back(std::move(input_image_copy));
406402
} else {
407-
if (image_preprocessor_ == nullptr) {
408-
return absl::InternalError("Image preprocessor is not available.");
409-
}
410-
ASSIGN_OR_RETURN(const auto& target_dims_vector,
411-
vision_executor_->GetExpectedInputDimension());
412-
413-
Dimensions target_dims(target_dims_vector.begin(),
414-
target_dims_vector.end());
415-
416-
ImagePreprocessParameter input_preprocess_parameters;
417-
input_preprocess_parameters.SetTargetDimensions(target_dims);
418-
419-
ASSIGN_OR_RETURN(auto preprocessed_image,
420-
image_preprocessor_->Preprocess(
421-
*input_image, input_preprocess_parameters));
422-
423-
preprocessed_contents.emplace_back(
424-
InputImage(std::move(preprocessed_image)));
403+
return absl::InternalError(
404+
"Image must be preprocessed before being used in SessionBasic.");
425405
}
426406
} else if (const auto* input_audio = std::get_if<InputAudio>(&content)) {
427407
if (input_audio->IsTensorBuffer()) {
428408
ASSIGN_OR_RETURN(auto input_audio_copy, input_audio->CreateCopy());
429409
preprocessed_contents.emplace_back(std::move(input_audio_copy));
430410
} else {
431-
if (audio_preprocessor_ == nullptr) {
432-
return absl::InternalError("Audio preprocessor is not available.");
433-
}
434-
ASSIGN_OR_RETURN(auto preprocessed_audio,
435-
audio_preprocessor_->Preprocess(*input_audio));
436-
audio_preprocessor_->Reset();
437-
preprocessed_contents.emplace_back(
438-
InputAudio(std::move(preprocessed_audio)));
411+
return absl::InternalError(
412+
"Audio must be preprocessed before being used in SessionBasic.");
439413
}
440414
}
441415
}

runtime/core/session_basic.h

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
#include "absl/status/status.h" // from @com_google_absl
2626
#include "absl/status/statusor.h" // from @com_google_absl
2727
#include "absl/strings/string_view.h" // from @com_google_absl
28-
#include "runtime/components/preprocessor/audio_preprocessor.h"
29-
#include "runtime/components/preprocessor/image_preprocessor.h"
3028
#include "runtime/components/sampler.h"
3129
#include "runtime/components/stop_token_detector.h"
3230
#include "runtime/components/tokenizer.h"
@@ -50,18 +48,15 @@ class SessionBasic : public Engine::Session {
5048
// Creates a SessionBasic object.
5149
// - executor: The initialized LLM Executor to call.
5250
// - tokenizer: The tokenizer to encode/decode the text into token ids.
53-
// - image_preprocessor: The image preprocessor to preprocess the image input.
5451
// - vision_executor: The vision executor to encode the image input.
55-
// - audio_preprocessor: The audio preprocessor to preprocess the audio input.
5652
// - audio_executor: The audio executor to encode the audio input.
5753
// - stop_token_ids: The token ids to stop the decoding process.
5854
// - sampler_params: The sampler parameters used for decoding. Note that if
5955
// the sampler_params.type is TYPE_UNSPECIFIED, the sampling logic will be
6056
// handled by the LLM Executor.
6157
static absl::StatusOr<std::unique_ptr<SessionBasic>> Create(
6258
LlmExecutor* absl_nonnull executor, Tokenizer* absl_nonnull tokenizer,
63-
ImagePreprocessor* image_preprocessor, VisionExecutor* vision_executor,
64-
AudioPreprocessor* audio_preprocessor, AudioExecutor* audio_executor,
59+
VisionExecutor* vision_executor, AudioExecutor* audio_executor,
6560
const SessionConfig& session_config,
6661
std::optional<BenchmarkInfo> benchmark_info,
6762
ThreadPool* absl_nonnull worker_thread_pool);
@@ -188,19 +183,18 @@ class SessionBasic : public Engine::Session {
188183
std::vector<ExecutorVisionData>& executor_data);
189184

190185
private:
191-
explicit SessionBasic(
192-
LlmExecutor* absl_nonnull executor, Tokenizer* absl_nonnull tokenizer,
193-
ImagePreprocessor* image_preprocessor, VisionExecutor* vision_executor,
194-
AudioPreprocessor* audio_preprocessor, AudioExecutor* audio_executor,
195-
std::unique_ptr<Sampler> sampler, const SessionConfig& session_config,
196-
std::optional<BenchmarkInfo> benchmark_info,
197-
ThreadPool* absl_nonnull worker_thread_pool,
198-
const StopTokenDetector& stop_token_detector)
186+
explicit SessionBasic(LlmExecutor* absl_nonnull executor,
187+
Tokenizer* absl_nonnull tokenizer,
188+
VisionExecutor* vision_executor,
189+
AudioExecutor* audio_executor,
190+
std::unique_ptr<Sampler> sampler,
191+
const SessionConfig& session_config,
192+
std::optional<BenchmarkInfo> benchmark_info,
193+
ThreadPool* absl_nonnull worker_thread_pool,
194+
const StopTokenDetector& stop_token_detector)
199195
: executor_(*executor),
200196
tokenizer_(*tokenizer),
201-
image_preprocessor_(image_preprocessor),
202197
vision_executor_(vision_executor),
203-
audio_preprocessor_(audio_preprocessor),
204198
audio_executor_(audio_executor),
205199
sampler_(std::move(sampler)),
206200
session_config_(session_config),
@@ -230,15 +224,9 @@ class SessionBasic : public Engine::Session {
230224
// The tokenizer used for converting between text to token ids.
231225
Tokenizer& tokenizer_;
232226

233-
// The image preprocessor used for preprocessing the image input.
234-
ImagePreprocessor* image_preprocessor_;
235-
236227
// The vision executor used for run the LLM for prefill/decode.
237228
VisionExecutor* vision_executor_;
238229

239-
// The audio preprocessor used for preprocessing the audio input.
240-
AudioPreprocessor* audio_preprocessor_;
241-
242230
// The audio executor used for run the LLM for prefill/decode.
243231
AudioExecutor* audio_executor_;
244232

0 commit comments

Comments
 (0)