2323from pipecat .audio .turn .smart_turn .base_smart_turn import SmartTurnParams
2424from pipecat .audio .vad .vad_analyzer import VADParams
2525from pipecat .frames .frames import (
26+ AggregatedLLMTextFrame ,
2627 BotStartedSpeakingFrame ,
2728 BotStoppedSpeakingFrame ,
2829 CancelFrame ,
4647 LLMRunFrame ,
4748 LLMSetToolChoiceFrame ,
4849 LLMSetToolsFrame ,
50+ LLMTextFrame ,
4951 SpeechControlParamsFrame ,
5052 StartFrame ,
5153 TextFrame ,
6567 LLMUserAggregatorParams ,
6668)
6769from pipecat .processors .frame_processor import FrameDirection , FrameProcessor
70+ from pipecat .utils .string import match_endofsentence
6871from pipecat .utils .time import time_now_iso8601
6972
7073
@@ -565,6 +568,9 @@ def __init__(
565568 self ._function_calls_in_progress : Dict [str , Optional [FunctionCallInProgressFrame ]] = {}
566569 self ._context_updated_tasks : Set [asyncio .Task ] = set ()
567570
571+ self ._llm_aggregation : str = ""
572+ self ._skip_tts : Optional [bool ] = None
573+
568574 @property
569575 def has_function_calls_in_progress (self ) -> bool :
570576 """Check if there are any function calls currently in progress.
@@ -588,6 +594,8 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):
588594 await self .push_frame (frame , direction )
589595 elif isinstance (frame , LLMFullResponseStartFrame ):
590596 await self ._handle_llm_start (frame )
597+ elif isinstance (frame , LLMTextFrame ):
598+ await self ._handle_llm_text (frame )
591599 elif isinstance (frame , LLMFullResponseEndFrame ):
592600 await self ._handle_llm_end (frame )
593601 elif isinstance (frame , TextFrame ):
@@ -787,12 +795,50 @@ async def _handle_user_image_frame(self, frame: UserImageRawFrame):
787795 await self .push_aggregation ()
788796 await self .push_context_frame (FrameDirection .UPSTREAM )
789797
790- async def _handle_llm_start (self , _ : LLMFullResponseStartFrame ):
798+ async def _handle_llm_start (self , frame : LLMFullResponseStartFrame ):
791799 self ._started += 1
800+ if self ._skip_tts is None :
801+ self ._skip_tts = frame .skip_tts
802+ await self ._maybe_push_llm_aggregation (frame )
803+
804+ async def _handle_llm_text (self , frame : LLMTextFrame ):
805+ await self ._handle_text (frame )
806+ if self ._skip_tts or frame .skip_tts :
807+ self ._llm_aggregation += frame .text
808+ await self ._maybe_push_llm_aggregation (frame )
792809
793- async def _handle_llm_end (self , _ : LLMFullResponseEndFrame ):
810+ async def _handle_llm_end (self , frame : LLMFullResponseEndFrame ):
794811 self ._started -= 1
795812 await self .push_aggregation ()
813+ await self ._maybe_push_llm_aggregation (frame )
814+
815+ async def _maybe_push_llm_aggregation (
816+ self , frame : LLMFullResponseStartFrame | LLMTextFrame | LLMFullResponseEndFrame
817+ ):
818+ should_push = False
819+ if self ._skip_tts and not frame .skip_tts :
820+ # if the skip_tts flag switches, to false, push the current aggregation
821+ should_push = True
822+ self ._skip_tts = frame .skip_tts
823+ if self ._skip_tts :
824+ if self ._skip_tts and isinstance (frame , LLMFullResponseEndFrame ):
825+ # on end frame, always push the aggregation
826+ should_push = True
827+ elif len (self ._llm_aggregation ) > 0 and match_endofsentence (self ._llm_aggregation ):
828+ # push aggregation on end of sentence
829+ should_push = True
830+
831+ if not should_push :
832+ return
833+
834+ text = self ._llm_aggregation .lstrip ("\n " )
835+ if not text .strip ():
836+ # don't push empty text
837+ return
838+
839+ llm_frame = AggregatedLLMTextFrame (text = text , aggregated_by = "sentence" )
840+ await self .push_frame (llm_frame )
841+ self ._llm_aggregation = ""
796842
797843 async def _handle_text (self , frame : TextFrame ):
798844 if not self ._started or not frame .append_to_context :
0 commit comments