-
Notifications
You must be signed in to change notification settings - Fork 789
Description
If I shutdown during speaking into the microphone for a long time (10 seconds), I get a lot of warnings like the one below. Why can't I quit just by looking at this?
2025-08-27 18:53:22,506 [WARNING] Transcript process did not terminate in time. Terminating forcefully. RealTimeSTT: 2025-08-27 18:53:22,506 - root - WARNING - Transcript process did not terminate in time. Terminating forcefully. RealTimeSTT: 2025-08-27 18:53:22,506 - root - WARNING - Transcript process did not terminate in time. Terminating forcefully. RealTimeSTT: 2025-08-27 18:53:22,506 - root - WARNING - Transcript process did not terminate in time. Terminating forcefully. RealTimeSTT: 2025-08-27 18:53:22,506 - root - WARNING - Transcript process did not terminate in time. Terminating forcefully. RealTimeSTT: 2025-08-27 18:53:22,506 - root - WARNING - Transcript process did not terminate in time. Terminating forcefully.
Is it because model.transcribe is blocked and cannot terminate safely?
@staticmethod
def _transcription_worker(conn,
model_path,
compute_type,
gpu_device_index,
device,
ready_event,
shutdown_event,
interrupt_stop_event,
beam_size,
initial_prompt,
suppress_tokens
):
"""
Worker method that handles the continuous
process of transcribing audio data.
This method runs in a separate process and is responsible for:
- Initializing the `faster_whisper` model used for transcription.
- Receiving audio data sent through a pipe and using the model
to transcribe it.
- Sending transcription results back through the pipe.
- Continuously checking for a shutdown event to gracefully
terminate the transcription process.
Args:
conn (multiprocessing.Connection): The connection endpoint used
for receiving audio data and sending transcription results.
model_path (str): The path to the pre-trained faster_whisper model
for transcription.
compute_type (str): Specifies the type of computation to be used
for transcription.
gpu_device_index (int): Device ID to use.
device (str): Device for model to use.
ready_event (threading.Event): An event that is set when the
transcription model is successfully initialized and ready.
shutdown_event (threading.Event): An event that, when set,
signals this worker method to terminate.
interrupt_stop_event (threading.Event): An event that, when set,
signals this worker method to stop processing audio data.
beam_size (int): The beam size to use for beam search decoding.
initial_prompt (str or iterable of int): Initial prompt to be fed
to the transcription model.
suppress_tokens (list of int): Tokens to be suppressed from the
transcription output.
Raises:
Exception: If there is an error while initializing the
transcription model.
"""
logging.info("Initializing faster_whisper "
f"main transcription model {model_path}"
)
try:
model = faster_whisper.WhisperModel(
model_size_or_path=model_path,
device=device,
compute_type=compute_type,
device_index=gpu_device_index,
)
except Exception as e:
logging.exception("Error initializing main "
f"faster_whisper transcription model: {e}"
)
raise
# self.main_transcription_ready_event enable
ready_event.set()
logging.debug("Faster_whisper main speech to text "
"transcription model initialized successfully"
)
while not shutdown_event.is_set():
try:
if conn.poll(0.01): # poll(0.5)을 사용하면 작업자가 최대 0.5초 동안 데이터를 기다리고, 데이터를 사용할 수 없으면 다른 코드를 계속 실행하거나 데이터가 없는 것을 적절히 처리
try:
msg = conn.recv()
except EOFError:
break # EOFError: 파이프의 한 쪽 끝이 닫히면 발생. 즉, 다른 쪽 끝에서 더 이상 데이터를 보낼 수 없을 때 발생합니다.
# 1) 종료 센티널
if isinstance(msg, tuple) and len(msg) >= 1 and isinstance(msg[0], str) and msg[0] == 'shutdown':
break
try:
audio, language = msg # msg[0] == 'audio' and msg[1] == 'language'
except ValueError:
logging.error("Received invalid message format. "
"Expected tuple with audio and language."
)
continue
# audio, language = conn.recv() # self.parent_transcription_pipe.send((self.audio, self.language)) 이 호출되면, 파라미터 값을 반환 받음.
try:
segments = model.transcribe(
audio,
language=language if language else None,
beam_size=beam_size,
initial_prompt=initial_prompt,
suppress_tokens=suppress_tokens,
hotwords="환자분,음성 인식 시작,음성 인식 종료,리얼 네비 내려줘,리얼 네비 올려줘,덴탈라이트 켜줘, 덴탈라이트 꺼줘" # max_length=448에 따라서 hotwords의 길이를 정할 수 있다. 허용 되는 토큰 수 = max_length // 2 - 1. 근데 한 100 단어까진 가능할 듯.
)
segments = segments[0]
# for seg in segments:
# print(f"seg: {seg}\n")
# seg: Segment(id=1, seek=208, start=0.0, end=2.0, text=' 반갑습니다.', tokens=[50364, 16396, 27358, 3115, 13, 50464], temperature=0.0, avg_logprob=-0.4255022406578064, compression_ratio=0.64, no_speech_prob=0.210693359375, words=None)
transcription = " ".join(seg.text for seg in segments)
transcription = transcription.strip()
# 여기서 conn.send(('success', transcription))를 활용해서 status, result = self.parent_transcription_pipe.recv()로 반환하는 값을 전달.
conn.send(('success', transcription))
except Exception as e:
logging.error(f"General transcription error: {e}")
conn.send(('error', str(e)))
else:
# If there's no data, sleep / prevent busy waiting
time.sleep(0.02)
except KeyboardInterrupt: # KeyboardInterrupt: 이 예외는 사용자가 일반적으로 Ctrl+C를 사용하여 프로그램 실행을 수동으로 중단할 때 발생
interrupt_stop_event.set() # self.interrupt_stop_event -> True
logging.debug("Transcription worker process "
"finished due to KeyboardInterrupt"
)
break # 이것은 while 루프를 종료하여 전사 작업자 함수를 효과적으로 종료합니다.