From 66e4238b81a25e60152b3290c3a1d8943f94c295 Mon Sep 17 00:00:00 2001
From: Daniel Joaquin Trujillo
<54636507+danieljtrujillo@users.noreply.github.com>
Date: Sat, 13 Jun 2026 19:43:41 -0700
Subject: [PATCH 1/3] feat(library): make stems & MIDI first-class
Stems and MIDI in the library can now be played, favorited, deleted, and
routed anywhere audio goes, instead of only offering a right-click send menu.
- DB schema v5: favorite column on stems + midis; get/set-favorite/delete
methods for both.
- Endpoints: PATCH/DELETE /api/library/stems/{id} and
PATCH/DELETE /api/midi/file/{id} (delete removes the file + row, leaves
the parent track and siblings intact).
- Library STEMS/MIDI rows get inline play/pause, favorite star, and delete;
MIDI right-click now routes to editor/init/inpaint/chimera.
- Shared lib/midiSynth.ts extracts the offline synth voice + WAV encoder
(renderNotesToBlob / renderStepNotesToBlob / renderMidiBufferToBlob);
PianoRoll delegates to it so previews, the editor bounce, and library MIDI
all sound identical. midiIdToSendable() makes any library MIDI a lazy
SendableAudio.
---
backend/modules/library/db.py | 69 +++++-
backend/modules/library/router.py | 37 ++++
backend/modules/midi/engine.py | 32 ++-
backend/modules/midi/router.py | 40 +++-
backend/modules/stems/sidecar.py | 132 ++++++++++--
docs/reports/feature-doc-coverage-report.md | 2 +-
docs/reports/feature-doc-coverage.json | 4 +-
docs/screenshots/manifest.json | 2 +-
frontend/src/components/audio/PianoRoll.tsx | 102 +--------
frontend/src/lib/midiSynth.ts | 160 ++++++++++++++
frontend/src/lib/sendToTargets.ts | 21 ++
frontend/src/views/LibraryView.tsx | 219 ++++++++++++++++++--
12 files changed, 675 insertions(+), 145 deletions(-)
create mode 100644 frontend/src/lib/midiSynth.ts
diff --git a/backend/modules/library/db.py b/backend/modules/library/db.py
index 8e68800..f5c4d3e 100644
--- a/backend/modules/library/db.py
+++ b/backend/modules/library/db.py
@@ -36,7 +36,7 @@
log = logging.getLogger(__name__)
-SCHEMA_VERSION = 4
+SCHEMA_VERSION = 5
# Each tuple is (schema_version_after_running, statements list).
@@ -234,6 +234,16 @@
"CREATE INDEX IF NOT EXISTS idx_entries_play_count ON entries(play_count DESC)",
],
),
+ (
+ 5,
+ [
+ # Stems and MIDI become first-class library items: they can be
+ # favorited just like parent tracks. Default 0 keeps existing
+ # rows unflagged.
+ "ALTER TABLE stems ADD COLUMN favorite INTEGER NOT NULL DEFAULT 0",
+ "ALTER TABLE midis ADD COLUMN favorite INTEGER NOT NULL DEFAULT 0",
+ ],
+ ),
]
@@ -708,6 +718,35 @@ def list_stems(self, entry_id: str) -> list[dict[str, Any]]:
cur.close()
return [dict(r) for r in rows]
+ def get_stem(self, stem_id: str) -> Optional[dict[str, Any]]:
+ """Look one stem row up by its globally-unique id."""
+ with self._writelock:
+ cur = self._conn.cursor()
+ row = cur.execute("SELECT * FROM stems WHERE id = ?", (stem_id,)).fetchone()
+ cur.close()
+ return dict(row) if row else None
+
+ def set_stem_favorite(self, stem_id: str, favorite: bool) -> bool:
+ with self._txn() as cur:
+ cur.execute(
+ "UPDATE stems SET favorite = ? WHERE id = ?",
+ (1 if favorite else 0, stem_id),
+ )
+ return cur.rowcount > 0
+
+ def delete_stem(self, stem_id: str) -> bool:
+ """Drop one stem row. Caller is responsible for deleting the file on
+ disk (the path lives in ``audio_path``)."""
+ with self._txn() as cur:
+ cur.execute("DELETE FROM stems WHERE id = ?", (stem_id,))
+ deleted = cur.rowcount > 0
+ # Polymorphic edges may reference this stem id (stems-of / midi-of).
+ cur.execute(
+ "DELETE FROM relations WHERE from_id = ? OR to_id = ?",
+ (stem_id, stem_id),
+ )
+ return deleted
+
def add_midi(
self,
*,
@@ -751,6 +790,34 @@ def list_midis(self, entry_id: str) -> list[dict[str, Any]]:
cur.close()
return [dict(r) for r in rows]
+ def get_midi(self, midi_id: str) -> Optional[dict[str, Any]]:
+ """Look one MIDI row up by its globally-unique id."""
+ with self._writelock:
+ cur = self._conn.cursor()
+ row = cur.execute("SELECT * FROM midis WHERE id = ?", (midi_id,)).fetchone()
+ cur.close()
+ return dict(row) if row else None
+
+ def set_midi_favorite(self, midi_id: str, favorite: bool) -> bool:
+ with self._txn() as cur:
+ cur.execute(
+ "UPDATE midis SET favorite = ? WHERE id = ?",
+ (1 if favorite else 0, midi_id),
+ )
+ return cur.rowcount > 0
+
+ def delete_midi(self, midi_id: str) -> bool:
+ """Drop one MIDI row. Caller deletes the .mid file on disk
+ (path lives in ``midi_path``)."""
+ with self._txn() as cur:
+ cur.execute("DELETE FROM midis WHERE id = ?", (midi_id,))
+ deleted = cur.rowcount > 0
+ cur.execute(
+ "DELETE FROM relations WHERE from_id = ? OR to_id = ?",
+ (midi_id, midi_id),
+ )
+ return deleted
+
def add_notation_artifact(
self,
*,
diff --git a/backend/modules/library/router.py b/backend/modules/library/router.py
index 7f41bee..376b4b9 100644
--- a/backend/modules/library/router.py
+++ b/backend/modules/library/router.py
@@ -177,6 +177,43 @@ def stream_stem_audio(stem_id: str) -> FileResponse:
raise HTTPException(404, f"stem {stem_id!r} not found")
+@router.patch("/stems/{stem_id}")
+def update_stem(stem_id: str, patch: dict[str, Any] = Body(...)) -> dict[str, Any]:
+ """Mutate a stem row. Currently only ``favorite`` is user-mutable so
+ stems behave like first-class library items."""
+ store = get_store()
+ if store.db is None:
+ raise HTTPException(503, "library DB not available")
+ if "favorite" in patch:
+ ok = store.db.set_stem_favorite(stem_id, bool(patch["favorite"]))
+ if not ok:
+ raise HTTPException(404, f"stem {stem_id!r} not found")
+ row = store.db.get_stem(stem_id)
+ if row is None:
+ raise HTTPException(404, f"stem {stem_id!r} not found")
+ return dict(row)
+
+
+@router.delete("/stems/{stem_id}")
+def delete_stem(stem_id: str) -> dict[str, Any]:
+ """Delete one separated stem (its WAV on disk + its DB row), leaving the
+ parent track and sibling stems untouched."""
+ store = get_store()
+ if store.db is None:
+ raise HTTPException(503, "library DB not available")
+ row = store.db.get_stem(stem_id)
+ if row is None:
+ raise HTTPException(404, f"stem {stem_id!r} not found")
+ audio_path = Path(row.get("audio_path") or "")
+ if audio_path.is_file():
+ try:
+ audio_path.unlink()
+ except OSError as e:
+ log.warning("library: failed to delete stem file %s: %s", audio_path, e)
+ store.db.delete_stem(stem_id)
+ return {"deleted": stem_id}
+
+
@router.get("/media/{entry_id}")
def stream_media(entry_id: str) -> FileResponse:
"""Stream a video/image library entry. FileResponse honors Range
diff --git a/backend/modules/midi/engine.py b/backend/modules/midi/engine.py
index 5120fd6..e3900d1 100644
--- a/backend/modules/midi/engine.py
+++ b/backend/modules/midi/engine.py
@@ -17,7 +17,9 @@
from __future__ import annotations
+import contextlib
import importlib
+import io
import logging
import shutil
import subprocess
@@ -226,15 +228,27 @@ def _run_basic_pitch(audio_path: Path, output_path: Path) -> dict:
output_path.parent.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory(dir=str(output_path.parent)) as td:
td_path = Path(td)
- predict_and_save(
- audio_path_list=[str(audio_path)],
- output_directory=str(td_path),
- save_midi=True,
- sonify_midi=False,
- save_model_outputs=False,
- save_notes=False,
- model_or_model_path=ICASSP_2022_MODEL_PATH,
- )
+ # basic-pitch prints status with emoji (🚨, etc.). On Windows the
+ # console/log stream is often a legacy code page (cp1252), so the
+ # library's own print() raises UnicodeEncodeError ('charmap' codec
+ # can't encode '\U0001f6a8') and kills a conversion that would
+ # otherwise succeed. Capture its stdout/stderr into a str buffer —
+ # StringIO holds text, never encodes, so it cannot crash — then log
+ # the (now harmless) chatter at debug level.
+ chatter = io.StringIO()
+ with contextlib.redirect_stdout(chatter), contextlib.redirect_stderr(chatter):
+ predict_and_save(
+ audio_path_list=[str(audio_path)],
+ output_directory=str(td_path),
+ save_midi=True,
+ sonify_midi=False,
+ save_model_outputs=False,
+ save_notes=False,
+ model_or_model_path=ICASSP_2022_MODEL_PATH,
+ )
+ captured = chatter.getvalue().strip()
+ if captured:
+ log.debug("basic_pitch output: %s", captured)
# basic-pitch names: _basic_pitch.mid
produced = next(td_path.glob("*_basic_pitch.mid"), None)
if produced is None:
diff --git a/backend/modules/midi/router.py b/backend/modules/midi/router.py
index 00c4ddb..90cff3f 100644
--- a/backend/modules/midi/router.py
+++ b/backend/modules/midi/router.py
@@ -11,8 +11,9 @@
import logging
from pathlib import Path
+from typing import Any
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, Body, HTTPException
from fastapi.responses import FileResponse
from backend.modules.library.router import get_store as get_library_store
@@ -89,6 +90,43 @@ def get_midi_file(midi_id: str) -> FileResponse:
raise HTTPException(404, f"midi row {midi_id!r} not found")
+@router.patch("/file/{midi_id}")
+def update_midi(midi_id: str, patch: dict[str, Any] = Body(...)) -> dict:
+ """Mutate a MIDI row. Only ``favorite`` is user-mutable so MIDI rows
+ behave like first-class library items."""
+ store = get_library_store()
+ if store.db is None:
+ raise HTTPException(503, "library DB not available")
+ if "favorite" in patch:
+ ok = store.db.set_midi_favorite(midi_id, bool(patch["favorite"]))
+ if not ok:
+ raise HTTPException(404, f"midi row {midi_id!r} not found")
+ row = store.db.get_midi(midi_id)
+ if row is None:
+ raise HTTPException(404, f"midi row {midi_id!r} not found")
+ return dict(row)
+
+
+@router.delete("/file/{midi_id}")
+def delete_midi_file(midi_id: str) -> dict:
+ """Delete one MIDI conversion (its .mid on disk + its DB row), leaving
+ the parent track untouched."""
+ store = get_library_store()
+ if store.db is None:
+ raise HTTPException(503, "library DB not available")
+ row = store.db.get_midi(midi_id)
+ if row is None:
+ raise HTTPException(404, f"midi row {midi_id!r} not found")
+ midi_path = Path(row.get("midi_path") or "")
+ if midi_path.is_file():
+ try:
+ midi_path.unlink()
+ except OSError as e:
+ log.warning("midi: failed to delete file %s: %s", midi_path, e)
+ store.db.delete_midi(midi_id)
+ return {"deleted": midi_id}
+
+
@router.get("/{entry_id}")
def list_entry_midis(entry_id: str) -> dict:
store = get_library_store()
diff --git a/backend/modules/stems/sidecar.py b/backend/modules/stems/sidecar.py
index d6adcd9..e49295a 100644
--- a/backend/modules/stems/sidecar.py
+++ b/backend/modules/stems/sidecar.py
@@ -27,6 +27,7 @@
from __future__ import annotations
+import json
import logging
import os
import socket
@@ -42,6 +43,50 @@
log = logging.getLogger(__name__)
+# Packages the sidecar genuinely needs to separate stems. demucs imports but
+# is useless without torch/torchaudio; torchcrepe drives the crepe pitch path.
+# The historical probe only checked demucs, so a venv with demucs present but
+# torch/torchcrepe missing spawned anyway — then run_backend.py tried to self-
+# install them and blew the entire 300s readiness window. We now gate on ALL of
+# these being importable before spawning.
+_CRITICAL_PACKAGES: tuple[str, ...] = ("demucs", "torch", "torchaudio", "torchcrepe")
+
+
+def _probe_packages(python_exe: Path) -> dict:
+ """Import every critical package in the sidecar Python in ONE subprocess.
+
+ Returns ``{pkg: {"ok": bool, "version": str|None, "error": str|None}}``,
+ or ``{"_error": ...}`` if the probe itself couldn't run. Cheap (a single
+ interpreter start) and never raises."""
+ script = (
+ "import json, importlib\n"
+ f"pkgs = {list(_CRITICAL_PACKAGES)!r}\n"
+ "out = {}\n"
+ "for p in pkgs:\n"
+ " try:\n"
+ " m = importlib.import_module(p)\n"
+ " out[p] = {'ok': True, 'version': getattr(m, '__version__', None)}\n"
+ " except Exception as e:\n"
+ " out[p] = {'ok': False, 'error': repr(e)[:300]}\n"
+ "print(json.dumps(out))\n"
+ )
+ try:
+ result = subprocess.run(
+ [str(python_exe), "-c", script],
+ capture_output=True,
+ text=True,
+ timeout=30,
+ )
+ except (subprocess.TimeoutExpired, OSError) as e:
+ return {"_error": repr(e)}
+ if result.returncode != 0:
+ return {"_error": result.stderr.strip()[:300] or "probe subprocess failed"}
+ try:
+ return json.loads(result.stdout.strip().splitlines()[-1])
+ except (ValueError, IndexError) as e:
+ return {"_error": f"probe parse failed: {e}"}
+
+
DEFAULT_PACKAGE_PATH = Path(r"D:/StableAudio/JoshOG/integration-package/backend")
PORT_FILENAME = "backend_port.txt"
# run_backend.py does a dependency check + possible pip install on first
@@ -173,23 +218,38 @@ def probe(cfg: Optional[SidecarConfig] = None) -> dict:
)
return out
+ # Per-package import check (demucs + torch + torchaudio + torchcrepe), not
+ # demucs alone — a venv can import demucs while torch/torchcrepe are missing
+ # or broken, which is exactly what stalled the sidecar before.
+ out["packages"] = {}
+ out["missing_critical"] = []
+ out["critical_ok"] = False
if cfg.python_exe.is_file():
- try:
- result = subprocess.run(
- [str(cfg.python_exe), "-c", "import demucs; print(demucs.__version__)"],
- capture_output=True,
- text=True,
- timeout=15,
- )
- if result.returncode == 0:
- out["demucs_importable"] = True
- out["demucs_version"] = result.stdout.strip()
+ pkgs = _probe_packages(cfg.python_exe)
+ if "_error" in pkgs:
+ out["demucs_error"] = pkgs["_error"]
+ out["missing_critical"] = list(_CRITICAL_PACKAGES)
+ else:
+ out["packages"] = pkgs
+ out["missing_critical"] = [
+ p for p in _CRITICAL_PACKAGES if not pkgs.get(p, {}).get("ok")
+ ]
+ out["critical_ok"] = len(out["missing_critical"]) == 0
+ demucs_info = pkgs.get("demucs", {})
+ out["demucs_importable"] = bool(demucs_info.get("ok"))
+ if demucs_info.get("ok"):
+ out["demucs_version"] = demucs_info.get("version")
else:
- out["demucs_error"] = result.stderr.strip()[:300]
- except (subprocess.TimeoutExpired, OSError) as e:
- out["demucs_error"] = repr(e)
+ out["demucs_error"] = demucs_info.get("error")
+ # Surface the first broken critical so logs/UI name a real cause.
+ if out["missing_critical"]:
+ first = out["missing_critical"][0]
+ first_err = pkgs.get(first, {}).get("error")
+ if first_err and not out.get("demucs_error"):
+ out["demucs_error"] = f"{first}: {first_err}"
else:
out["demucs_error"] = f"python_exe not found: {cfg.python_exe}"
+ out["missing_critical"] = list(_CRITICAL_PACKAGES)
port_file = _port_file(cfg)
if port_file.is_file():
@@ -201,7 +261,7 @@ def probe(cfg: Optional[SidecarConfig] = None) -> dict:
pass
out["ok"] = (
- out["package_exists"] and out["run_backend_exists"] and out["demucs_importable"]
+ out["package_exists"] and out["run_backend_exists"] and out["critical_ok"]
)
return out
@@ -261,12 +321,19 @@ def ensure_running(self) -> int:
if not run_backend.is_file():
raise RuntimeError(f"stems sidecar launcher missing: {run_backend}")
- # If demucs isn't importable in the configured Python, install
- # deps ourselves rather than letting run_backend.py try (it uses
- # plain `python -m pip` which fails in uv-managed venvs that
- # ship without pip). We use ensurepip / uv-pip fallback.
- if not probe(self.cfg).get("demucs_importable"):
- log.info("stems.sidecar: demucs not importable — installing deps first")
+ # If ANY critical package (demucs/torch/torchaudio/torchcrepe) is
+ # missing or broken, install deps ourselves BEFORE spawning rather
+ # than letting run_backend.py try (it uses plain `python -m pip`,
+ # which fails in uv-managed venvs without pip AND can spend the whole
+ # readiness window resolving torch conflicts, the original 300s-stall
+ # bug). We use ensurepip / uv-pip fallback.
+ pr = probe(self.cfg)
+ if not pr.get("critical_ok"):
+ missing = pr.get("missing_critical") or ["demucs"]
+ log.info(
+ "stems.sidecar: critical deps not ready (%s) — installing first",
+ ", ".join(missing),
+ )
install_result = install_dependencies(self.cfg)
if not install_result.get("ok"):
err_blob = (
@@ -274,9 +341,20 @@ def ensure_running(self) -> int:
)
raise RuntimeError(
"stems sidecar dep install failed "
- f"({install_result.get('install_mode', 'unknown')}): "
+ f"({install_result.get('install_mode', 'unknown')}); "
+ f"missing before install: {', '.join(missing)}. "
f"{err_blob[:600]}"
)
+ # Re-probe so a post-install gap surfaces here with a clear list
+ # instead of as an opaque 300s port-file timeout downstream.
+ pr2 = probe(self.cfg)
+ if not pr2.get("critical_ok"):
+ still = pr2.get("missing_critical") or []
+ raise RuntimeError(
+ "stems sidecar deps still missing after install: "
+ f"{', '.join(still)}. See install logs / sidecar venv "
+ f"({self.cfg.python_exe})."
+ )
# Clear any stale port file.
port_file = _port_file(self.cfg)
@@ -315,10 +393,20 @@ def ensure_running(self) -> int:
if port is None:
stdout_tail = _tail_log(self._stdout_log)
stderr_tail = _tail_log(self._stderr_log)
+ # Snapshot dep state so the failure names a concrete cause rather
+ # than just "timed out" (deps were already installed above, so a
+ # gap here points at a different boot problem).
+ post = probe(self.cfg)
+ missing = post.get("missing_critical") or []
self.stop()
+ dep_note = (
+ f" Critical deps still missing: {', '.join(missing)}."
+ if missing
+ else " All critical deps import OK — check the log tails for a boot error."
+ )
raise RuntimeError(
f"stems sidecar didn't write {PORT_FILENAME} within "
- f"{HEALTH_TIMEOUT_SEC}s.\n"
+ f"{HEALTH_TIMEOUT_SEC}s.{dep_note}\n"
f"stdout tail: {stdout_tail[:500]}\n"
f"stderr tail: {stderr_tail[:500]}"
)
diff --git a/docs/reports/feature-doc-coverage-report.md b/docs/reports/feature-doc-coverage-report.md
index ecd6c85..9a7aa9a 100644
--- a/docs/reports/feature-doc-coverage-report.md
+++ b/docs/reports/feature-doc-coverage-report.md
@@ -1,7 +1,7 @@
# Feature Documentation Coverage Report
> [!NOTE]
-> Generated: 2026-06-13T13:00:04.808Z · Git revision: `bc466fda402d` · Repomix tracked: **no**
+> Generated: 2026-06-14T02:43:45.356Z · Git revision: `9cdeed22cf1f` · Repomix tracked: **no**
## Audit Dashboard
diff --git a/docs/reports/feature-doc-coverage.json b/docs/reports/feature-doc-coverage.json
index c079e13..a67a350 100644
--- a/docs/reports/feature-doc-coverage.json
+++ b/docs/reports/feature-doc-coverage.json
@@ -1,6 +1,6 @@
{
- "generatedAt": "2026-06-13T13:00:04.808Z",
- "repoRevision": "bc466fda402d",
+ "generatedAt": "2026-06-14T02:43:45.356Z",
+ "repoRevision": "9cdeed22cf1f",
"repomixContext": {
"path": "repomix-output.md",
"present": false,
diff --git a/docs/screenshots/manifest.json b/docs/screenshots/manifest.json
index bca98e7..7ed7253 100644
--- a/docs/screenshots/manifest.json
+++ b/docs/screenshots/manifest.json
@@ -1,5 +1,5 @@
{
- "generatedAt": "2026-06-13T13:02:09.249Z",
+ "generatedAt": "2026-06-14T02:46:37.642Z",
"entries": [
{
"file": "01-shell-make.png",
diff --git a/frontend/src/components/audio/PianoRoll.tsx b/frontend/src/components/audio/PianoRoll.tsx
index 9411279..5b580a4 100644
--- a/frontend/src/components/audio/PianoRoll.tsx
+++ b/frontend/src/components/audio/PianoRoll.tsx
@@ -8,6 +8,7 @@ import { downloadMidi, parseMidi } from '../../utils/midi';
import { logError, logInfo } from '../../state/logStore';
import { MidiMapper } from './MidiMapper';
import { ContextMenu, useContextMenu, type ContextMenuItem } from '../ui/ContextMenu';
+import { triggerSynthVoice, renderStepNotesToBlob } from '../../lib/midiSynth';
const NOTE_HEIGHT = 12;
const HEADER_HEIGHT = 22;
@@ -17,43 +18,13 @@ const NOTE_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#',
const isBlackKey = (midi: number) => [1, 3, 6, 8, 10].includes(midi % 12);
const noteLabel = (midi: number) => `${NOTE_NAMES[midi % 12]}${Math.floor(midi / 12) - 1}`;
-/**
- * Schedule a single sawtooth+lowpass+env voice on the given context. Used both
- * for live playback (engine ctx + master gain) and offline rendering
- * (OfflineAudioContext + its destination).
- */
-const triggerPianoNoteOn = (
- ctx: BaseAudioContext,
- dest: AudioNode,
- midi: number,
- velocity: number,
- when: number,
- duration: number,
- master: number,
-): void => {
- const freq = 440 * Math.pow(2, (midi - 69) / 12);
- const osc = ctx.createOscillator();
- osc.type = 'sawtooth';
- osc.frequency.setValueAtTime(freq, when);
- const lp = ctx.createBiquadFilter();
- lp.type = 'lowpass';
- lp.frequency.setValueAtTime(Math.min(8000, freq * 6), when);
- const env = ctx.createGain();
- const peak = (velocity / 127) * 0.7 * master;
- env.gain.setValueAtTime(0.001, when);
- env.gain.exponentialRampToValueAtTime(peak, when + 0.008);
- env.gain.setTargetAtTime(peak * 0.5, when + 0.05, 0.08);
- env.gain.setTargetAtTime(0.001, when + duration, 0.05);
- osc.connect(lp).connect(env).connect(dest);
- osc.start(when);
- osc.stop(when + duration + 0.2);
-};
-
-/** Live preview convenience: route through the shared engine master/analyser. */
+/** Live preview convenience: route the shared synth voice through the engine
+ * master/analyser. The voice itself lives in `lib/midiSynth` so previews,
+ * bounces, and library MIDI renders all sound identical. */
const triggerPianoNote = (midi: number, velocity: number, when: number, duration: number, master: number) => {
const ctx = getEngineCtx();
if (ctx.state === 'suspended') void ctx.resume();
- triggerPianoNoteOn(ctx, getMasterGain(), midi, velocity, when, duration, master);
+ triggerSynthVoice(ctx, getMasterGain(), midi, velocity, when, duration, master);
};
/**
@@ -75,67 +46,14 @@ export const triggerPianoNoteFromMidi = (midi: number, velocity = 100, duration
triggerPianoNote(midi, velocity, ctx.currentTime + 0.02, duration, 0.8);
};
-// --- WAV encoder (16-bit PCM, mirrors WaveformEditor.encodeWav) ---
-const encodeWavBlob = (audioBuf: AudioBuffer): Blob => {
- const numCh = audioBuf.numberOfChannels;
- const sr = audioBuf.sampleRate;
- const len = audioBuf.length;
- const buffer = new ArrayBuffer(44 + len * numCh * 2);
- const view = new DataView(buffer);
- const writeStr = (off: number, s: string) => {
- for (let i = 0; i < s.length; i += 1) view.setUint8(off + i, s.charCodeAt(i));
- };
- writeStr(0, 'RIFF');
- view.setUint32(4, 36 + len * numCh * 2, true);
- writeStr(8, 'WAVE');
- writeStr(12, 'fmt ');
- view.setUint32(16, 16, true);
- view.setUint16(20, 1, true);
- view.setUint16(22, numCh, true);
- view.setUint32(24, sr, true);
- view.setUint32(28, sr * numCh * 2, true);
- view.setUint16(32, numCh * 2, true);
- view.setUint16(34, 16, true);
- writeStr(36, 'data');
- view.setUint32(40, len * numCh * 2, true);
- const channels: Float32Array[] = [];
- for (let c = 0; c < numCh; c += 1) channels.push(audioBuf.getChannelData(c));
- let offset = 44;
- for (let i = 0; i < len; i += 1) {
- for (let c = 0; c < numCh; c += 1) {
- const sample = Math.max(-1, Math.min(1, channels[c][i]));
- view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7fff, true);
- offset += 2;
- }
- }
- return new Blob([buffer], { type: 'audio/wav' });
-};
-
-/** Render the current pattern offline to a WAV Blob. Used by SEND TO EDITOR. */
-const renderPianoRollToBlob = async (
+/** Render the current pattern offline to a WAV Blob. Used by SEND TO EDITOR.
+ * Delegates to the shared step renderer in `lib/midiSynth`. */
+const renderPianoRollToBlob = (
notes: PianoNote[],
bpm: number,
totalSteps: number,
-): Promise<{ blob: Blob; duration: number }> => {
- const sr = 44100;
- const stepSec = 60 / Math.max(40, bpm) / 4; // 16th note seconds
- // Total length = (last note end + 0.5s tail).
- let maxEnd = 0;
- for (const n of notes) {
- const end = (n.step + n.length) * stepSec;
- if (end > maxEnd) maxEnd = end;
- }
- const padTail = 0.6;
- const totalSec = Math.max(maxEnd, totalSteps * stepSec) + padTail;
- const offline = new OfflineAudioContext(2, Math.ceil(totalSec * sr), sr);
- for (const n of notes) {
- const when = n.step * stepSec;
- const dur = n.length * stepSec;
- triggerPianoNoteOn(offline, offline.destination, n.note, n.velocity, when, dur, 1);
- }
- const rendered = await offline.startRendering();
- return { blob: encodeWavBlob(rendered), duration: rendered.duration };
-};
+): Promise<{ blob: Blob; duration: number }> =>
+ renderStepNotesToBlob(notes, bpm, totalSteps);
// Re-declared after the imports section so it picks up the imported
// MidiMapper symbol without circular-import gymnastics.
diff --git a/frontend/src/lib/midiSynth.ts b/frontend/src/lib/midiSynth.ts
new file mode 100644
index 0000000..5f3b74d
--- /dev/null
+++ b/frontend/src/lib/midiSynth.ts
@@ -0,0 +1,160 @@
+/**
+ * Shared MIDI → audio synthesis.
+ *
+ * Centralizes the offline render path so MIDI is usable everywhere audio is:
+ * preview playback, init audio, chimera fodder, and the piano roll's SEND TO
+ * EDITOR bounce. Today the only engine is a built-in subtractive sawtooth
+ * voice (no soundfont dependency), but the public surface is engine-shaped so
+ * a sample/soundfont engine can be dropped in later without touching callers.
+ *
+ * The voice is byte-for-byte the same one the piano roll used inline before
+ * this module existed, so previews and bounces stay consistent.
+ */
+import { parseMidi } from './midi';
+
+/** One note in absolute seconds — the engine-neutral render unit. */
+export interface RenderNote {
+ /** MIDI note number 0-127 (60 = middle C). */
+ midi: number;
+ /** Start time in seconds from the render origin. */
+ startSec: number;
+ /** Sounding length in seconds. */
+ durationSec: number;
+ /** Velocity 1-127. */
+ velocity: number;
+}
+
+export interface RenderOptions {
+ /** Output sample rate. Defaults to 44.1kHz to match the rest of the app. */
+ sampleRate?: number;
+ /** Silence appended after the last note so tails aren't clipped. */
+ tailSec?: number;
+}
+
+/**
+ * Schedule a single sawtooth + lowpass + envelope voice on any audio context.
+ * Works on both a live `AudioContext` (preview) and an `OfflineAudioContext`
+ * (render), since it only touches the standard `BaseAudioContext` surface.
+ */
+export const triggerSynthVoice = (
+ ctx: BaseAudioContext,
+ dest: AudioNode,
+ midi: number,
+ velocity: number,
+ when: number,
+ duration: number,
+ master: number,
+): void => {
+ const freq = 440 * Math.pow(2, (midi - 69) / 12);
+ const osc = ctx.createOscillator();
+ osc.type = 'sawtooth';
+ osc.frequency.setValueAtTime(freq, when);
+ const lp = ctx.createBiquadFilter();
+ lp.type = 'lowpass';
+ lp.frequency.setValueAtTime(Math.min(8000, freq * 6), when);
+ const env = ctx.createGain();
+ const peak = (velocity / 127) * 0.7 * master;
+ env.gain.setValueAtTime(0.001, when);
+ env.gain.exponentialRampToValueAtTime(peak, when + 0.008);
+ env.gain.setTargetAtTime(peak * 0.5, when + 0.05, 0.08);
+ env.gain.setTargetAtTime(0.001, when + duration, 0.05);
+ osc.connect(lp).connect(env).connect(dest);
+ osc.start(when);
+ osc.stop(when + duration + 0.2);
+};
+
+/** Encode an AudioBuffer to a 16-bit PCM WAV Blob. */
+export const encodeWavBlob = (audioBuf: AudioBuffer): Blob => {
+ const numCh = audioBuf.numberOfChannels;
+ const sr = audioBuf.sampleRate;
+ const len = audioBuf.length;
+ const buffer = new ArrayBuffer(44 + len * numCh * 2);
+ const view = new DataView(buffer);
+ const writeStr = (off: number, s: string) => {
+ for (let i = 0; i < s.length; i += 1) view.setUint8(off + i, s.charCodeAt(i));
+ };
+ writeStr(0, 'RIFF');
+ view.setUint32(4, 36 + len * numCh * 2, true);
+ writeStr(8, 'WAVE');
+ writeStr(12, 'fmt ');
+ view.setUint32(16, 16, true);
+ view.setUint16(20, 1, true);
+ view.setUint16(22, numCh, true);
+ view.setUint32(24, sr, true);
+ view.setUint32(28, sr * numCh * 2, true);
+ view.setUint16(32, numCh * 2, true);
+ view.setUint16(34, 16, true);
+ writeStr(36, 'data');
+ view.setUint32(40, len * numCh * 2, true);
+ const channels: Float32Array[] = [];
+ for (let c = 0; c < numCh; c += 1) channels.push(audioBuf.getChannelData(c));
+ let offset = 44;
+ for (let i = 0; i < len; i += 1) {
+ for (let c = 0; c < numCh; c += 1) {
+ const sample = Math.max(-1, Math.min(1, channels[c][i]));
+ view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7fff, true);
+ offset += 2;
+ }
+ }
+ return new Blob([buffer], { type: 'audio/wav' });
+};
+
+/** Render absolute-seconds notes to a WAV Blob via the built-in synth. */
+export const renderNotesToBlob = async (
+ notes: RenderNote[],
+ opts: RenderOptions = {},
+): Promise<{ blob: Blob; duration: number }> => {
+ const sr = opts.sampleRate ?? 44100;
+ const tail = opts.tailSec ?? 0.6;
+ let maxEnd = 0;
+ for (const n of notes) {
+ const end = n.startSec + n.durationSec;
+ if (end > maxEnd) maxEnd = end;
+ }
+ const totalSec = Math.max(0.1, maxEnd + tail);
+ const offline = new OfflineAudioContext(2, Math.ceil(totalSec * sr), sr);
+ for (const n of notes) {
+ triggerSynthVoice(offline, offline.destination, n.midi, n.velocity, n.startSec, n.durationSec, 1);
+ }
+ const rendered = await offline.startRendering();
+ return { blob: encodeWavBlob(rendered), duration: rendered.duration };
+};
+
+/** Render step-grid notes (piano roll / step sequencer) to a WAV Blob. */
+export const renderStepNotesToBlob = async (
+ notes: Array<{ note: number; velocity: number; step: number; length: number }>,
+ bpm: number,
+ totalSteps: number,
+): Promise<{ blob: Blob; duration: number }> => {
+ const stepSec = 60 / Math.max(40, bpm) / 4; // 16th-note seconds
+ const renderNotes: RenderNote[] = notes.map((n) => ({
+ midi: n.note,
+ velocity: n.velocity,
+ startSec: n.step * stepSec,
+ durationSec: n.length * stepSec,
+ }));
+ // Pad to the pattern's nominal length so trailing rests are preserved.
+ const result = await renderNotesToBlob(renderNotes, { tailSec: 0.6 });
+ const nominal = totalSteps * stepSec;
+ return { blob: result.blob, duration: Math.max(result.duration, nominal) };
+};
+
+/** Parse a Standard MIDI File buffer and render it to a WAV Blob. */
+export const renderMidiBufferToBlob = async (
+ buf: ArrayBuffer | Uint8Array,
+): Promise<{ blob: Blob; duration: number }> => {
+ const midi = parseMidi(buf);
+ const ppq = midi.ppq || 480;
+ const bpm = midi.bpm || 120;
+ const secPerTick = 60 / Math.max(20, bpm) / ppq;
+ const notes: RenderNote[] = midi.tracks.flatMap((t) =>
+ t.notes.map((n) => ({
+ midi: n.note,
+ velocity: n.velocity,
+ startSec: n.tick * secPerTick,
+ durationSec: Math.max(0.02, n.durationTicks * secPerTick),
+ })),
+ );
+ if (notes.length === 0) throw new Error('MIDI has no playable notes');
+ return renderNotesToBlob(notes);
+};
diff --git a/frontend/src/lib/sendToTargets.ts b/frontend/src/lib/sendToTargets.ts
index 0823efb..35c2cca 100644
--- a/frontend/src/lib/sendToTargets.ts
+++ b/frontend/src/lib/sendToTargets.ts
@@ -21,6 +21,7 @@ import { useBottomPanelStore } from '../state/bottomPanelStore';
import { usePianoRollStore } from '../state/pianoRollStore';
import { addBlobsToChimera } from './chimeraClient';
import { parseMidi } from './midi';
+import { renderMidiBufferToBlob } from './midiSynth';
import { logError, logInfo } from '../state/logStore';
/** Default mime for stems / mic recordings when none provided. */
@@ -205,6 +206,26 @@ export async function sendMidiIdToTarget(midiId: string, target: MidiSendTarget)
}
}
+/**
+ * Build a SendableAudio that synthesizes a library MIDI row into audio on
+ * demand. Lets MIDI flow into every audio destination (editor / init / inpaint
+ * / chimera) the same way a stem or track does. Rendering is lazy — the synth
+ * only runs when a consumer actually pulls the blob.
+ */
+export function midiIdToSendable(midiId: string, label = 'midi'): SendableAudio {
+ return {
+ label,
+ mimeType: 'audio/wav',
+ fetcher: async () => {
+ const res = await fetch(`/api/midi/file/${midiId}`);
+ if (!res.ok) throw new Error(`midi ${midiId} fetch HTTP ${res.status}`);
+ const buf = await res.arrayBuffer();
+ const { blob } = await renderMidiBufferToBlob(buf);
+ return blob;
+ },
+ };
+}
+
/** Build a SendableAudio from a stem row pulled from /api/library/_all/stems. */
export function stemRowToSendable(row: Record): SendableAudio {
const stemId = String(row.id ?? '');
diff --git a/frontend/src/views/LibraryView.tsx b/frontend/src/views/LibraryView.tsx
index 7a71c94..052da24 100644
--- a/frontend/src/views/LibraryView.tsx
+++ b/frontend/src/views/LibraryView.tsx
@@ -24,8 +24,10 @@ import { logError, logInfo } from '../state/logStore';
import { addBlobsToChimera } from '../lib/chimeraClient';
import { listMedia, importMedia, deleteMedia, MEDIA_ACCEPT } from '../lib/mediaLibrary';
import { setAudioDragData } from '../lib/audioDnD';
+import { renderMidiBufferToBlob } from '../lib/midiSynth';
import {
loadMidiIntoPianoRoll,
+ midiIdToSendable,
sendAudioToChimera,
sendAudioToEditor,
sendAudioToInit,
@@ -308,6 +310,27 @@ export const LibraryView: React.FC<{ onSwitchTab?: (tab: string) => void; onExpa
}
}, []);
+ // In-place refresh of the stems / midi indexes (no null-flicker, unlike the
+ // lazy first-load). Passed to SubTabList so favorite / delete update the list
+ // without resetting the sub-tab to its "Loading…" placeholder.
+ const refreshStems = React.useCallback(async () => {
+ try {
+ const j = await fetch('/api/library/_all/stems').then((r) => r.json());
+ setAllStems(j.stems || []);
+ } catch (e) {
+ logError('library', `Failed to refresh stems: ${e instanceof Error ? e.message : String(e)}`);
+ }
+ }, []);
+
+ const refreshMidi = React.useCallback(async () => {
+ try {
+ const j = await fetch('/api/library/_all/midi').then((r) => r.json());
+ setAllMidis(j.midis || []);
+ } catch (e) {
+ logError('library', `Failed to refresh MIDI: ${e instanceof Error ? e.message : String(e)}`);
+ }
+ }, []);
+
const stemsByParent = useMemo(() => {
const map: Record>> = {};
(allStems || []).forEach((s) => {
@@ -1026,6 +1049,7 @@ export const LibraryView: React.FC<{ onSwitchTab?: (tab: string) => void; onExpa
parentTitles={Object.fromEntries(entries.map((e) => [e.id, e.title]))}
kind="stem"
placeholder={allStems === null ? 'Loading stems…' : 'No stems yet. Enable auto-stems in Settings or right-click a track → Separate stems.'}
+ onMutated={refreshStems}
/>
)}
{subTab === 'midi' && (
@@ -1034,6 +1058,7 @@ export const LibraryView: React.FC<{ onSwitchTab?: (tab: string) => void; onExpa
parentTitles={Object.fromEntries(entries.map((e) => [e.id, e.title]))}
kind="midi"
placeholder={allMidis === null ? 'Loading MIDI…' : 'No MIDI yet. Enable auto-MIDI in Settings or right-click a track → Convert to MIDI.'}
+ onMutated={refreshMidi}
/>
)}
{subTab === 'video' && (
@@ -1589,6 +1614,8 @@ interface SubTabListProps {
parentTitles: Record;
kind: 'stem' | 'midi';
placeholder: string;
+ /** Re-fetch the index in place after a favorite toggle or delete. */
+ onMutated: () => void | Promise;
}
type SubTabRowPayload =
@@ -1596,13 +1623,77 @@ type SubTabRowPayload =
| { kind: 'stem'; row: Record };
-const SubTabList: React.FC = ({ byParent, parentTitles, kind, placeholder }) => {
+const SubTabList: React.FC = ({ byParent, parentTitles, kind, placeholder, onMutated }) => {
const parentIds = Object.keys(byParent);
// Shared ContextMenu primitive — fixes drift under .dense-layout
// zoom and gives consistent close-on-outside behavior across the
// app (plan step 3d migration).
const rowMenu = useContextMenu();
+ // Stems and MIDI are first-class library items: they play through the
+ // global engine, can be favorited, and can be deleted independently of
+ // their parent track. MIDI playback synthesizes via the shared sawtooth
+ // engine in lib/midiSynth (no soundfont needed).
+ const engineLoad = usePlayerStore((s) => s.load);
+ const enginePlay = usePlayerStore((s) => s.play);
+ const enginePause = usePlayerStore((s) => s.pause);
+ const engineIsPlaying = usePlayerStore((s) => s.isPlaying);
+ const engineEntryId = usePlayerStore((s) => s.currentEntryId);
+ const [playingRowKey, setPlayingRowKey] = useState(null);
+ const [busyRowKey, setBusyRowKey] = useState(null);
+
+ // Stems / MIDI load with no entryId, so currentEntryId is null while one is
+ // playing. If a real track takes over the engine, currentEntryId goes
+ // non-null and our rows stop showing the pause state.
+ const rowIsPlaying = (rowKey: string) =>
+ playingRowKey === rowKey && engineIsPlaying && engineEntryId === null;
+
+ const playRow = async (rowKey: string, label: string, fetchBlob: () => Promise) => {
+ if (rowIsPlaying(rowKey)) {
+ enginePause();
+ return;
+ }
+ setBusyRowKey(rowKey);
+ try {
+ const blob = await fetchBlob();
+ await engineLoad(blob, { label });
+ enginePlay();
+ setPlayingRowKey(rowKey);
+ } catch (e) {
+ logError('library', `Could not play ${label}: ${e instanceof Error ? e.message : String(e)}`);
+ } finally {
+ setBusyRowKey(null);
+ }
+ };
+
+ const toggleFavorite = async (isMidi: boolean, rowId: string, current: boolean) => {
+ const url = isMidi ? `/api/midi/file/${rowId}` : `/api/library/stems/${rowId}`;
+ try {
+ const res = await fetch(url, {
+ method: 'PATCH',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ favorite: !current }),
+ });
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ await onMutated();
+ } catch (e) {
+ logError('library', `Could not update favorite: ${e instanceof Error ? e.message : String(e)}`);
+ }
+ };
+
+ const deleteRow = async (isMidi: boolean, rowId: string, label: string) => {
+ if (!window.confirm(`Delete "${label}"? This removes the file from disk and cannot be undone.`)) return;
+ const url = isMidi ? `/api/midi/file/${rowId}` : `/api/library/stems/${rowId}`;
+ try {
+ const res = await fetch(url, { method: 'DELETE' });
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ logInfo('library', `Deleted ${isMidi ? 'MIDI' : 'stem'} "${label}".`);
+ await onMutated();
+ } catch (e) {
+ logError('library', `Could not delete: ${e instanceof Error ? e.message : String(e)}`);
+ }
+ };
+
if (parentIds.length === 0) {
return {placeholder}
;
}
@@ -1612,6 +1703,7 @@ const SubTabList: React.FC = ({ byParent, parentTitles, kind, p
let menuTitle = '';
if (payload?.kind === 'midi') {
+ const sendable = midiIdToSendable(payload.midiId, payload.label);
menuTitle = `MIDI · ${payload.label}`;
menuItems = [
{
@@ -1626,6 +1718,33 @@ const SubTabList: React.FC = ({ byParent, parentTitles, kind, p
icon: ,
onSelect: () => { void sendMidiIdToTarget(payload.midiId, 'step-seq'); },
},
+ { type: 'separator' },
+ {
+ type: 'item',
+ label: 'Send to editor (synth)',
+ icon: ,
+ hint: 'new track',
+ onSelect: () => { void sendAudioToEditor(sendable, 'editor-new-track'); },
+ },
+ {
+ type: 'item',
+ label: 'Send to Init audio (synth)',
+ icon: ,
+ onSelect: () => { void sendAudioToInit(sendable); },
+ },
+ {
+ type: 'item',
+ label: 'Send to Inpaint (synth)',
+ icon: ,
+ onSelect: () => { void sendAudioToInpaint(sendable); },
+ },
+ {
+ type: 'item',
+ label: 'Add to Chimera (synth)',
+ icon: ,
+ onSelect: () => { void sendAudioToChimera([sendable]); },
+ },
+ { type: 'separator' },
{
type: 'item',
label: 'Download .mid',
@@ -1639,6 +1758,13 @@ const SubTabList: React.FC = ({ byParent, parentTitles, kind, p
document.body.removeChild(a);
},
},
+ {
+ type: 'item',
+ label: 'Delete MIDI',
+ icon: ,
+ danger: true,
+ onSelect: () => { void deleteRow(true, payload.midiId, payload.label); },
+ },
];
} else if (payload?.kind === 'stem') {
const stemId = String(payload.row.id ?? '');
@@ -1671,6 +1797,7 @@ const SubTabList: React.FC = ({ byParent, parentTitles, kind, p
icon: ,
onSelect: () => { void sendAudioToChimera([sendable]); },
},
+ { type: 'separator' },
{
type: 'item',
label: 'Download .wav',
@@ -1684,6 +1811,13 @@ const SubTabList: React.FC = ({ byParent, parentTitles, kind, p
document.body.removeChild(a);
},
},
+ {
+ type: 'item',
+ label: 'Delete stem',
+ icon: ,
+ danger: true,
+ onSelect: () => { void deleteRow(false, stemId, stemName); },
+ },
];
}
@@ -1697,30 +1831,83 @@ const SubTabList: React.FC = ({ byParent, parentTitles, kind, p
{byParent[pid].map((row, idx) => {
const isMidi = kind === 'midi';
- const midiId = String(row.id ?? '');
- const label = String((isMidi ? row.source : row.stem_name) ?? 'item');
+ const rowId = String(row.id ?? '');
+ const name = isMidi ? String(row.source ?? 'midi') : String(row.stem_name ?? 'stem');
+ const label = parentTitles[pid] ? `${parentTitles[pid]} · ${name}` : name;
+ const favorite = !!row.favorite;
+ const rowKey = `${kind}:${rowId}`;
+ const busy = busyRowKey === rowKey;
+ const playing = rowIsPlaying(rowKey);
+ const meta = isMidi
+ ? `${row.engine ?? ''}`
+ : `${row.model ?? ''} ${row.model_variant ?? ''}`.trim();
return (
{
if (isMidi) {
- if (!midiId) return;
- rowMenu.open(e, { kind: 'midi', midiId, label });
+ if (!rowId) return;
+ rowMenu.open(e, { kind: 'midi', midiId: rowId, label });
} else {
rowMenu.open(e, { kind: 'stem', row });
}
}}
- title="Right-click to send this anywhere"
+ title="Right-click for more — send to editor / init / inpaint / chimera"
>
-
- {kind === 'stem' ? String(row.stem_name ?? 'stem') : String(row.source ?? 'midi')}
-
-
- {kind === 'stem'
- ? `${row.model ?? ''} ${row.model_variant ?? ''}`.trim()
- : `${row.engine ?? ''}`}
-
+
{ void toggleFavorite(isMidi, rowId, favorite); }}
+ title={favorite ? 'Unfavorite' : 'Favorite'}
+ aria-label={favorite ? `Unfavorite ${name}` : `Favorite ${name}`}
+ >
+
+
+
{
+ void playRow(
+ rowKey,
+ label,
+ isMidi
+ ? async () => {
+ const res = await fetch(`/api/midi/file/${rowId}`);
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const { blob } = await renderMidiBufferToBlob(await res.arrayBuffer());
+ return blob;
+ }
+ : async () => {
+ const res = await fetch(`/api/library/stems/${rowId}/audio`);
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ return res.blob();
+ },
+ );
+ }}
+ title={playing ? 'Pause' : isMidi ? 'Play (synth)' : 'Play'}
+ aria-label={playing ? `Pause ${name}` : `Play ${name}`}
+ >
+ {busy ? (
+
+ ) : playing ? (
+
+ ) : (
+
+ )}
+
+
{name}
+
{meta}
+
{ void deleteRow(isMidi, rowId, name); }}
+ title="Delete"
+ aria-label={`Delete ${name}`}
+ >
+
+
);
})}
From 53c1d280eefa15abcf802dbca4349bdf323da3c7 Mon Sep 17 00:00:00 2001
From: Daniel Joaquin Trujillo
<54636507+danieljtrujillo@users.noreply.github.com>
Date: Sat, 13 Jun 2026 19:47:29 -0700
Subject: [PATCH 2/3] fix: module-list reliability, Windows MIDI/stems
incidents, Suno a11y
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Settings 'Backend Modules' could show a false 'No modules found' when opened
during a backend (re)start, because the modal fetched on open and silently
blanked on any error. Now the catalog loads once on backend-ready into a
shared moduleStore (retry-until-success, cached) and Settings just reads it,
so there is nothing to fail on open; a real error shows an error + Retry.
Also:
- MIDI (Windows): basic-pitch's emoji status output no longer crashes MIDI
conversion on cp1252 consoles — its stdout/stderr is captured into a text
buffer (engine.py), so transcription failures report the real cause.
- Stems sidecar: probe now checks all critical packages (demucs/torch/
torchaudio/torchcrepe), and ensure_running installs deps BEFORE spawning
when any are missing instead of letting run_backend.py self-install and burn
the 300s readiness window; the timeout error now names the dep state.
- Suno API key field is wrapped in a
+ ) : moduleError ? (
+
+
+
Couldn't reach the backend ({moduleError}).
+
Modules are loaded from the server — this is a connection issue, not missing modules.
+
void loadModules()}
+ className="mt-1 px-3 py-1 rounded border border-purple-500/40 bg-purple-500/15 hover:bg-purple-500/25 text-purple-200 text-[9px] font-black uppercase tracking-widest"
+ >
+ Retry
+
+
) : modules.length === 0 ? (
No modules found in backend/modules/
) : (
diff --git a/frontend/src/state/moduleStore.ts b/frontend/src/state/moduleStore.ts
new file mode 100644
index 0000000..4f625ed
--- /dev/null
+++ b/frontend/src/state/moduleStore.ts
@@ -0,0 +1,87 @@
+/**
+ * Backend module catalog store.
+ *
+ * The Settings modal used to fetch `/api/modules/all` every time it opened —
+ * which meant opening it during a backend (re)start hit a transient failure
+ * and rendered a misleading "No modules found", as if every module had
+ * vanished. This store fixes the root cause: the catalog loads ONCE when the
+ * backend becomes ready (preloaded from App), retries until it succeeds, and
+ * is cached. By the time the user opens Settings the list is already there, so
+ * there is nothing to fail.
+ */
+import { create } from 'zustand';
+
+export interface ModuleConfig {
+ name: string;
+ label?: string;
+ description?: string;
+ version?: string;
+ enabled: boolean;
+ api_prefix?: string;
+ _dir?: string;
+ _loaded?: boolean;
+ [key: string]: unknown;
+}
+
+interface ModuleStore {
+ modules: ModuleConfig[];
+ loaded: boolean;
+ loading: boolean;
+ error: string | null;
+ /** Load (or reload) the catalog, retrying transient failures until success. */
+ load: (opts?: { force?: boolean }) => Promise;
+ /** Toggle a module enabled flag (persists to its module.json on the backend). */
+ setEnabled: (dirName: string, enabled: boolean) => Promise;
+}
+
+const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
+
+export const useModuleStore = create((set, get) => ({
+ modules: [],
+ loaded: false,
+ loading: false,
+ error: null,
+
+ load: async (opts = {}) => {
+ const s = get();
+ if (s.loading) return;
+ if (s.loaded && !opts.force) return;
+ set({ loading: true, error: null });
+ // Retry with backoff: the backend may still be binding right after launch.
+ let delay = 500;
+ for (let attempt = 0; attempt < 6; attempt += 1) {
+ try {
+ const res = await fetch('/api/modules/all');
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const data = (await res.json()) as ModuleConfig[];
+ if (!Array.isArray(data)) throw new Error('unexpected response shape');
+ set({ modules: data, loaded: true, loading: false, error: null });
+ return;
+ } catch (e) {
+ if (attempt === 5) {
+ set({ loading: false, error: e instanceof Error ? e.message : String(e) });
+ return;
+ }
+ await sleep(delay);
+ delay = Math.min(delay * 2, 4000);
+ }
+ }
+ },
+
+ setEnabled: async (dirName, enabled) => {
+ try {
+ const res = await fetch(`/api/modules/${dirName}/enabled`, {
+ method: 'PATCH',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ enabled }),
+ });
+ if (!res.ok) return false;
+ set((state) => ({
+ modules: state.modules.map((m) => (m._dir === dirName ? { ...m, enabled } : m)),
+ }));
+ return true;
+ } catch {
+ return false;
+ }
+ },
+}));
diff --git a/frontend/src/suno/SunoKeySettings.tsx b/frontend/src/suno/SunoKeySettings.tsx
index 14e3516..3c8add5 100644
--- a/frontend/src/suno/SunoKeySettings.tsx
+++ b/frontend/src/suno/SunoKeySettings.tsx
@@ -99,24 +99,36 @@ export const SunoKeySettings: React.FC = () => {
platform console. It's stored on the backend — never in the browser — and used for cloud generation.
-
+
{err && {err} }
diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts
index d2d3dea..4af29a4 100644
--- a/frontend/vite.config.ts
+++ b/frontend/vite.config.ts
@@ -58,14 +58,15 @@ export default defineConfig(({mode}) => {
timeout: 0,
proxyTimeout: 0,
configure: (proxy) => {
- proxy.on('error', (err, _req, res) => {
- // Return a proper JSON error instead of silently swallowing.
- // Without this, failed proxy requests hang indefinitely or
- // fall through to Vite's SPA handler producing misleading
- // "Not Found" or HTML responses instead of clear error JSON.
- if (res && !res.headersSent) {
- res.writeHead(502, { 'Content-Type': 'application/json' });
- res.end(JSON.stringify({
+ proxy.on('error', (_err, _req, res) => {
+ // For HTTP errors res is ServerResponse; for WebSocket errors it
+ // is a net.Socket (no writeHead). Guard before writing headers.
+ const r = res as Record;
+ if (typeof r['writeHead'] === 'function' && !r['headersSent']) {
+ (r['writeHead'] as (s: number, h: Record) => void)(
+ 502, { 'Content-Type': 'application/json' }
+ );
+ (r['end'] as (b: string) => void)(JSON.stringify({
detail: 'Backend unreachable — is the server running on port 8600?',
}));
}
From a301d43291f7ad95e73cdd59e87c50d811fb99a0 Mon Sep 17 00:00:00 2001
From: Daniel Joaquin Trujillo
<54636507+danieljtrujillo@users.noreply.github.com>
Date: Sat, 13 Jun 2026 20:19:11 -0700
Subject: [PATCH 3/3] fix(generate): reliably surface freshly-generated tracks
in the library (G1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
After a generation completes the backend writes artifacts synchronously, but
the library list index can lag that write by a beat — when it did, the entry
wasn't in the just-refreshed list and the track silently failed to appear
('manual reload'). Now the post-generation reconciliation retries the refresh
a few times until the expected _ id shows up, and on genuine
failure surfaces a visible status message instead of only logging.
Verified the rest of the add-to-library chain end-to-end: the /api/library/
import endpoint (200 + full entry), the frontend contract, importEntry's
in-place store update, and the backend's _ id scheme all match.
docs: record G15 research findings (Quest video-in without MQDH — scrcpy+OBS
virtual cam Tier 1 needs only a VJ device picker; raw passthrough is Tier 2
via the Unity Passthrough Camera API).
---
...al-layout-vj-library-optimizations-plan.md | 7 +++++
docs/reports/feature-doc-coverage-report.md | 2 +-
docs/reports/feature-doc-coverage.json | 4 +--
docs/screenshots/manifest.json | 2 +-
frontend/src/state/generateStore.ts | 28 +++++++++++++++----
5 files changed, 34 insertions(+), 9 deletions(-)
diff --git a/docs/plans/2026-06-13-global-layout-vj-library-optimizations-plan.md b/docs/plans/2026-06-13-global-layout-vj-library-optimizations-plan.md
index 37633de..24c74c7 100644
--- a/docs/plans/2026-06-13-global-layout-vj-library-optimizations-plan.md
+++ b/docs/plans/2026-06-13-global-layout-vj-library-optimizations-plan.md
@@ -425,6 +425,13 @@ Two related problems, one task:
Guard rail: the VJ app lives in a separate repo working tree — the error-message fix is a separate commit there (standing constraint §7).
+**Research findings (2026-06-13).** Quest video-in without MQDH is feasible; two tiers:
+
+- **Tier 1 — rendered headset view, zero app changes to the source (RECOMMENDED first).** The Quest is Android, so `scrcpy` (FOSS, ADB over USB or Wi-Fi, no MQDH) mirrors the in-headset rendered view to a desktop window. Pipe that window through OBS → **OBS Virtual Camera**, which then appears as a normal `videoinput` device. The VJ app already takes a camera via `getUserMedia`, so the only code gap is the camera source: `GANTASMO-LIVE-VJ/src/useMedia.ts:71` hardcodes `{ facingMode: 'environment' }` with NO device picker, so it grabs the default camera and the user can't choose the OBS virtual cam. **Concrete task: add a device picker** — `navigator.mediaDevices.enumerateDevices()` → list `videoinput`s → request `{ deviceId: { exact } }`. That single change unlocks scrcpy→OBS→VJ (and any other capture device) with no Quest-side dependency beyond scrcpy+OBS.
+- **Tier 2 — raw passthrough, needs Unity work.** The Quest browser cannot access the passthrough/headset cameras (no web API). Raw passthrough requires the **Quest Passthrough Camera API** (Quest 3, recent Meta SDK) inside a native/Unity app, which then streams out over WebRTC/RTSP to the VJ app as a source. This ties into the existing `GANTASMO-MIDI` Unity app (memory `project_gantasmo_midi_unity`) — reuse its transport. Bigger lift; only pursue if Tier 1's rendered view isn't enough.
+
+Dependency footprint: Tier 1 = scrcpy + OBS (both FOSS, user-installed, no app deps) + a ~30-line device-picker change in the VJ app. Tier 2 = Unity Passthrough Camera API + a WebRTC/RTSP path. Recommend shipping Tier 1's device picker first.
+
## 6.8. Phase H — active incident fix: stems sidecar timeout + MIDI charmap error
User ask (2026-06-13): resolve the backend failure where library import returned 200, stems sidecar failed to write `backend_port.txt` within 300 seconds after missing `torch`/`torchcrepe`, `torchvision 0.27.0` required `torch==2.12.0` but `torch 2.11.0+cu128` was installed, and `basic_pitch` MIDI conversion failed on Windows with a `charmap` emoji encoding error.
diff --git a/docs/reports/feature-doc-coverage-report.md b/docs/reports/feature-doc-coverage-report.md
index bb18e62..872cc5e 100644
--- a/docs/reports/feature-doc-coverage-report.md
+++ b/docs/reports/feature-doc-coverage-report.md
@@ -1,7 +1,7 @@
# Feature Documentation Coverage Report
> [!NOTE]
-> Generated: 2026-06-14T02:47:32.303Z · Git revision: `66e4238b81a2` · Repomix tracked: **no**
+> Generated: 2026-06-14T03:19:14.053Z · Git revision: `53c1d280eefa` · Repomix tracked: **no**
## Audit Dashboard
diff --git a/docs/reports/feature-doc-coverage.json b/docs/reports/feature-doc-coverage.json
index 573f98c..43e89b7 100644
--- a/docs/reports/feature-doc-coverage.json
+++ b/docs/reports/feature-doc-coverage.json
@@ -1,6 +1,6 @@
{
- "generatedAt": "2026-06-14T02:47:32.303Z",
- "repoRevision": "66e4238b81a2",
+ "generatedAt": "2026-06-14T03:19:14.053Z",
+ "repoRevision": "53c1d280eefa",
"repomixContext": {
"path": "repomix-output.md",
"present": false,
diff --git a/docs/screenshots/manifest.json b/docs/screenshots/manifest.json
index 05f96c7..f5ff1af 100644
--- a/docs/screenshots/manifest.json
+++ b/docs/screenshots/manifest.json
@@ -1,5 +1,5 @@
{
- "generatedAt": "2026-06-14T02:52:18.096Z",
+ "generatedAt": "2026-06-14T03:21:55.908Z",
"entries": [
{
"file": "01-shell-make.png",
diff --git a/frontend/src/state/generateStore.ts b/frontend/src/state/generateStore.ts
index 55bad02..75af6d0 100644
--- a/frontend/src/state/generateStore.ts
+++ b/frontend/src/state/generateStore.ts
@@ -712,10 +712,25 @@ export const useGenerateStore = create()((set, get) => ({
// Load the first new entry into the player so playback works
// immediately. The blob comes from the backend streaming URL.
- const after = useLibraryStore.getState().entries;
- const firstEntry = items[0]?.audio_base64
- ? after.find((e) => e.id === `${jobId}_00`) ?? after.find((e) => e.id === jobId)
- : null;
+ //
+ // The backend writes artifacts synchronously before reporting
+ // 'completed', but the library list index can lag that write by a
+ // beat — when it does, the entry isn't in the just-refreshed list
+ // and the track silently fails to appear ("manual reload"). Re-
+ // refresh a few times until the expected id shows up so generated
+ // tracks reliably land in the library.
+ const findFirst = () => {
+ const after = useLibraryStore.getState().entries;
+ return items[0]?.audio_base64
+ ? after.find((e) => e.id === `${jobId}_00`) ?? after.find((e) => e.id === jobId) ?? null
+ : null;
+ };
+ let firstEntry = findFirst();
+ for (let attempt = 0; !firstEntry && items[0]?.audio_base64 && attempt < 5; attempt += 1) {
+ await wait(400);
+ await useLibraryStore.getState().refresh();
+ firstEntry = findFirst();
+ }
if (firstEntry) {
try {
const loadT0 = performance.now();
@@ -735,7 +750,10 @@ export const useGenerateStore = create()((set, get) => ({
logError('generate', `Player load failed: ${msg}`);
}
} else {
- logError('generate', `Could not find freshly-saved entry for job ${jobId}; library may need a manual reload.`);
+ // Retries exhausted: surface it instead of leaving the user to
+ // wonder why a track they just generated is missing.
+ logError('generate', `Could not find freshly-saved entry for job ${jobId} after retries — try reloading the library panel.`);
+ useStatusBarStore.getState().setText('Saved to disk, but the library list did not refresh — reload the Library panel.');
}
useStatusBarStore.getState().setText('GENERATION COMPLETE');