Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 2025-04-11 - Vectorize audio processing loops
**Learning:** Python `for` loops iterating over array slices are slow for audio framing tasks.
**Action:** Use NumPy vectorization by reshaping the 1D audio array to 2D and computing aggregations like `np.mean(..., axis=1)`.
Comment on lines +1 to +3
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟑 Minor

Fix markdown heading lint violations (MD041, MD022).

Line 1 should be an H1, and add a blank line after the heading to satisfy markdownlint defaults.

Suggested patch
-## 2025-04-11 - Vectorize audio processing loops
+# 2025-04-11 - Vectorize audio processing loops
+
 **Learning:** Python `for` loops iterating over array slices are slow for audio framing tasks.
 **Action:** Use NumPy vectorization by reshaping the 1D audio array to 2D and computing aggregations like `np.mean(..., axis=1)`.
πŸ“ Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
## 2025-04-11 - Vectorize audio processing loops
**Learning:** Python `for` loops iterating over array slices are slow for audio framing tasks.
**Action:** Use NumPy vectorization by reshaping the 1D audio array to 2D and computing aggregations like `np.mean(..., axis=1)`.
# 2025-04-11 - Vectorize audio processing loops
**Learning:** Python `for` loops iterating over array slices are slow for audio framing tasks.
**Action:** Use NumPy vectorization by reshaping the 1D audio array to 2D and computing aggregations like `np.mean(..., axis=1)`.
🧰 Tools
πŸͺ› LanguageTool

[typographical] ~1-~1: To join two clauses or introduce examples, consider using an em dash.
Context: ## 2025-04-11 - Vectorize audio processing loops **Learn...

(DASH_RULE)


[style] ~3-~3: Consider using the typographical ellipsis character here instead.
Context: ...y to 2D and computing aggregations like np.mean(..., axis=1).

(ELLIPSIS)

πŸͺ› markdownlint-cli2 (0.22.0)

[warning] 1-1: Headings should be surrounded by blank lines
Expected: 1; Actual: 0; Below

(MD022, blanks-around-headings)


[warning] 1-1: First line in a file should be a top-level heading

(MD041, first-line-heading, first-line-h1)

πŸ€– Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In @.jules/bolt.md around lines 1 - 3, Change the heading "## 2025-04-11 -
Vectorize audio processing loops" to an H1 (prefix with a single "#") and insert
a single blank line immediately after that heading to satisfy markdownlint rules
MD041 and MD022; ensure the rest of the content (the Learning and Action lines)
remains unchanged and directly follows the added blank line so the document
renders correctly.

3 changes: 3 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ recursive-include config *
recursive-include k8s *
recursive-include .github *.yml *.yaml *.md

# Slower Whisper package explicitly included
recursive-include slower_whisper *.py

# Exclude compiled/temporary artifacts
global-exclude __pycache__
global-exclude *.py[cod]
Expand Down
2 changes: 2 additions & 0 deletions config/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ COPY pyproject.toml uv.lock README.md ./
COPY transcription/ ./transcription/
COPY scripts/ ./scripts/
COPY integrations/ ./integrations/
COPY slower_whisper/ ./slower_whisper/

# Install Python dependencies using uv
# ARG INSTALL_MODE controls which dependencies to install
Expand Down Expand Up @@ -128,6 +129,7 @@ COPY --from=builder /usr/local/bin /usr/local/bin
COPY --chown=appuser:appuser transcription/ /app/transcription/
COPY --chown=appuser:appuser scripts/ /app/scripts/
COPY --chown=appuser:appuser integrations/ /app/integrations/
COPY --chown=appuser:appuser slower_whisper/ /app/slower_whisper/
COPY --chown=appuser:appuser pyproject.toml /app/

# Create data directories
Expand Down
1 change: 1 addition & 0 deletions config/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ COPY pyproject.toml uv.lock README.md ./
COPY transcription/ ./transcription/
COPY scripts/ ./scripts/
COPY integrations/ ./integrations/
COPY slower_whisper/ ./slower_whisper/

# Install Python dependencies using uv
# ARG INSTALL_MODE controls which dependencies to install
Expand Down
33 changes: 0 additions & 33 deletions tests/test_streaming_asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,39 +139,6 @@ def test_pcm_bytes_to_float32_normalization(self):
# Should be close to 1.0 (32767/32768)
assert float32[0] == pytest.approx(32767 / 32768, rel=1e-4)

def test_calculate_energy_silence(self):
"""Test energy calculation for silence."""
model = MockWhisperModel()
adapter = StreamingASRAdapter(model)

silence = np.zeros(1600, dtype=np.float32)
energy = adapter._calculate_energy(silence)

assert energy == 0.0

def test_calculate_energy_signal(self):
"""Test energy calculation for non-zero signal."""
model = MockWhisperModel()
adapter = StreamingASRAdapter(model)

# Sine wave should have non-zero energy
t = np.linspace(0, 1, 16000, dtype=np.float32)
signal = 0.5 * np.sin(2 * np.pi * 440 * t) # 440 Hz tone
energy = adapter._calculate_energy(signal)

assert energy > 0.0
assert energy < 1.0

def test_calculate_energy_empty(self):
"""Test energy calculation for empty array."""
model = MockWhisperModel()
adapter = StreamingASRAdapter(model)

empty = np.array([], dtype=np.float32)
energy = adapter._calculate_energy(empty)

assert energy == 0.0

@pytest.mark.asyncio
async def test_ingest_audio_empty(self):
"""Test ingesting empty audio."""
Expand Down
32 changes: 13 additions & 19 deletions transcription/streaming_asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,19 +173,6 @@ def _pcm_bytes_to_float32(self, pcm_bytes: bytes) -> np.ndarray:

return float32_array

def _calculate_energy(self, audio: np.ndarray) -> float:
"""Calculate RMS energy of audio segment.

Args:
audio: Float32 audio array.

Returns:
RMS energy value (0.0-1.0 for normalized audio).
"""
if len(audio) == 0:
return 0.0
return float(np.sqrt(np.mean(audio**2)))

def _detect_speech_frames(self, audio: np.ndarray, frame_size_ms: int = 30) -> list[bool]:
"""Detect speech in audio using frame-wise energy analysis.

Expand All @@ -199,13 +186,20 @@ def _detect_speech_frames(self, audio: np.ndarray, frame_size_ms: int = 30) -> l
frame_size = int(self.config.sample_rate * frame_size_ms / 1000)
num_frames = len(audio) // frame_size

speech_frames = []
for i in range(num_frames):
frame = audio[i * frame_size : (i + 1) * frame_size]
energy = self._calculate_energy(frame)
speech_frames.append(energy > self.config.vad_energy_threshold)
if num_frames == 0:
return []

# Truncate to exact multiple of frame_size
truncated_audio = audio[: num_frames * frame_size]

# Reshape into (num_frames, frame_size)
frames = truncated_audio.reshape((num_frames, frame_size))

# Calculate energy across frames: sqrt(mean(audio^2, axis=1))
energies = np.sqrt(np.mean(frames**2, axis=1))

return speech_frames
# Boolean array to list of bools
return [bool(e > self.config.vad_energy_threshold) for e in energies]

def _process_vad(
self,
Expand Down
Loading