diff --git a/extension/llm/runner/text_llm_runner.cpp b/extension/llm/runner/text_llm_runner.cpp index e3ec4501a6b..e0b9d434632 100644 --- a/extension/llm/runner/text_llm_runner.cpp +++ b/extension/llm/runner/text_llm_runner.cpp @@ -174,6 +174,7 @@ Error TextLLMRunner::generate( "RSS after prompt prefill: %f MiB (0 if unsupported)", get_rss_bytes() / 1024.0 / 1024.0); + printf("\033[32m"); // print the first token from prefill. No prev_token so use cur_token for it. auto decode_result = tokenizer_->decode(cur_token, cur_token); if (!decode_result.ok()) { @@ -195,6 +196,8 @@ Error TextLLMRunner::generate( max_new_tokens - 1, temperature_ == -1.0f ? config.temperature : temperature_, wrapped_callback); + + printf("\033[0m"); if (!generate_result.ok()) { return generate_result.error(); }