From 66e4238b81a25e60152b3290c3a1d8943f94c295 Mon Sep 17 00:00:00 2001 From: Daniel Joaquin Trujillo <54636507+danieljtrujillo@users.noreply.github.com> Date: Sat, 13 Jun 2026 19:43:41 -0700 Subject: [PATCH 1/3] feat(library): make stems & MIDI first-class Stems and MIDI in the library can now be played, favorited, deleted, and routed anywhere audio goes, instead of only offering a right-click send menu. - DB schema v5: favorite column on stems + midis; get/set-favorite/delete methods for both. - Endpoints: PATCH/DELETE /api/library/stems/{id} and PATCH/DELETE /api/midi/file/{id} (delete removes the file + row, leaves the parent track and siblings intact). - Library STEMS/MIDI rows get inline play/pause, favorite star, and delete; MIDI right-click now routes to editor/init/inpaint/chimera. - Shared lib/midiSynth.ts extracts the offline synth voice + WAV encoder (renderNotesToBlob / renderStepNotesToBlob / renderMidiBufferToBlob); PianoRoll delegates to it so previews, the editor bounce, and library MIDI all sound identical. midiIdToSendable() makes any library MIDI a lazy SendableAudio. --- backend/modules/library/db.py | 69 +++++- backend/modules/library/router.py | 37 ++++ backend/modules/midi/engine.py | 32 ++- backend/modules/midi/router.py | 40 +++- backend/modules/stems/sidecar.py | 132 ++++++++++-- docs/reports/feature-doc-coverage-report.md | 2 +- docs/reports/feature-doc-coverage.json | 4 +- docs/screenshots/manifest.json | 2 +- frontend/src/components/audio/PianoRoll.tsx | 102 +-------- frontend/src/lib/midiSynth.ts | 160 ++++++++++++++ frontend/src/lib/sendToTargets.ts | 21 ++ frontend/src/views/LibraryView.tsx | 219 ++++++++++++++++++-- 12 files changed, 675 insertions(+), 145 deletions(-) create mode 100644 frontend/src/lib/midiSynth.ts diff --git a/backend/modules/library/db.py b/backend/modules/library/db.py index 8e68800..f5c4d3e 100644 --- a/backend/modules/library/db.py +++ b/backend/modules/library/db.py @@ -36,7 +36,7 @@ log = logging.getLogger(__name__) -SCHEMA_VERSION = 4 +SCHEMA_VERSION = 5 # Each tuple is (schema_version_after_running, statements list). @@ -234,6 +234,16 @@ "CREATE INDEX IF NOT EXISTS idx_entries_play_count ON entries(play_count DESC)", ], ), + ( + 5, + [ + # Stems and MIDI become first-class library items: they can be + # favorited just like parent tracks. Default 0 keeps existing + # rows unflagged. + "ALTER TABLE stems ADD COLUMN favorite INTEGER NOT NULL DEFAULT 0", + "ALTER TABLE midis ADD COLUMN favorite INTEGER NOT NULL DEFAULT 0", + ], + ), ] @@ -708,6 +718,35 @@ def list_stems(self, entry_id: str) -> list[dict[str, Any]]: cur.close() return [dict(r) for r in rows] + def get_stem(self, stem_id: str) -> Optional[dict[str, Any]]: + """Look one stem row up by its globally-unique id.""" + with self._writelock: + cur = self._conn.cursor() + row = cur.execute("SELECT * FROM stems WHERE id = ?", (stem_id,)).fetchone() + cur.close() + return dict(row) if row else None + + def set_stem_favorite(self, stem_id: str, favorite: bool) -> bool: + with self._txn() as cur: + cur.execute( + "UPDATE stems SET favorite = ? WHERE id = ?", + (1 if favorite else 0, stem_id), + ) + return cur.rowcount > 0 + + def delete_stem(self, stem_id: str) -> bool: + """Drop one stem row. Caller is responsible for deleting the file on + disk (the path lives in ``audio_path``).""" + with self._txn() as cur: + cur.execute("DELETE FROM stems WHERE id = ?", (stem_id,)) + deleted = cur.rowcount > 0 + # Polymorphic edges may reference this stem id (stems-of / midi-of). + cur.execute( + "DELETE FROM relations WHERE from_id = ? OR to_id = ?", + (stem_id, stem_id), + ) + return deleted + def add_midi( self, *, @@ -751,6 +790,34 @@ def list_midis(self, entry_id: str) -> list[dict[str, Any]]: cur.close() return [dict(r) for r in rows] + def get_midi(self, midi_id: str) -> Optional[dict[str, Any]]: + """Look one MIDI row up by its globally-unique id.""" + with self._writelock: + cur = self._conn.cursor() + row = cur.execute("SELECT * FROM midis WHERE id = ?", (midi_id,)).fetchone() + cur.close() + return dict(row) if row else None + + def set_midi_favorite(self, midi_id: str, favorite: bool) -> bool: + with self._txn() as cur: + cur.execute( + "UPDATE midis SET favorite = ? WHERE id = ?", + (1 if favorite else 0, midi_id), + ) + return cur.rowcount > 0 + + def delete_midi(self, midi_id: str) -> bool: + """Drop one MIDI row. Caller deletes the .mid file on disk + (path lives in ``midi_path``).""" + with self._txn() as cur: + cur.execute("DELETE FROM midis WHERE id = ?", (midi_id,)) + deleted = cur.rowcount > 0 + cur.execute( + "DELETE FROM relations WHERE from_id = ? OR to_id = ?", + (midi_id, midi_id), + ) + return deleted + def add_notation_artifact( self, *, diff --git a/backend/modules/library/router.py b/backend/modules/library/router.py index 7f41bee..376b4b9 100644 --- a/backend/modules/library/router.py +++ b/backend/modules/library/router.py @@ -177,6 +177,43 @@ def stream_stem_audio(stem_id: str) -> FileResponse: raise HTTPException(404, f"stem {stem_id!r} not found") +@router.patch("/stems/{stem_id}") +def update_stem(stem_id: str, patch: dict[str, Any] = Body(...)) -> dict[str, Any]: + """Mutate a stem row. Currently only ``favorite`` is user-mutable so + stems behave like first-class library items.""" + store = get_store() + if store.db is None: + raise HTTPException(503, "library DB not available") + if "favorite" in patch: + ok = store.db.set_stem_favorite(stem_id, bool(patch["favorite"])) + if not ok: + raise HTTPException(404, f"stem {stem_id!r} not found") + row = store.db.get_stem(stem_id) + if row is None: + raise HTTPException(404, f"stem {stem_id!r} not found") + return dict(row) + + +@router.delete("/stems/{stem_id}") +def delete_stem(stem_id: str) -> dict[str, Any]: + """Delete one separated stem (its WAV on disk + its DB row), leaving the + parent track and sibling stems untouched.""" + store = get_store() + if store.db is None: + raise HTTPException(503, "library DB not available") + row = store.db.get_stem(stem_id) + if row is None: + raise HTTPException(404, f"stem {stem_id!r} not found") + audio_path = Path(row.get("audio_path") or "") + if audio_path.is_file(): + try: + audio_path.unlink() + except OSError as e: + log.warning("library: failed to delete stem file %s: %s", audio_path, e) + store.db.delete_stem(stem_id) + return {"deleted": stem_id} + + @router.get("/media/{entry_id}") def stream_media(entry_id: str) -> FileResponse: """Stream a video/image library entry. FileResponse honors Range diff --git a/backend/modules/midi/engine.py b/backend/modules/midi/engine.py index 5120fd6..e3900d1 100644 --- a/backend/modules/midi/engine.py +++ b/backend/modules/midi/engine.py @@ -17,7 +17,9 @@ from __future__ import annotations +import contextlib import importlib +import io import logging import shutil import subprocess @@ -226,15 +228,27 @@ def _run_basic_pitch(audio_path: Path, output_path: Path) -> dict: output_path.parent.mkdir(parents=True, exist_ok=True) with tempfile.TemporaryDirectory(dir=str(output_path.parent)) as td: td_path = Path(td) - predict_and_save( - audio_path_list=[str(audio_path)], - output_directory=str(td_path), - save_midi=True, - sonify_midi=False, - save_model_outputs=False, - save_notes=False, - model_or_model_path=ICASSP_2022_MODEL_PATH, - ) + # basic-pitch prints status with emoji (🚨, etc.). On Windows the + # console/log stream is often a legacy code page (cp1252), so the + # library's own print() raises UnicodeEncodeError ('charmap' codec + # can't encode '\U0001f6a8') and kills a conversion that would + # otherwise succeed. Capture its stdout/stderr into a str buffer — + # StringIO holds text, never encodes, so it cannot crash — then log + # the (now harmless) chatter at debug level. + chatter = io.StringIO() + with contextlib.redirect_stdout(chatter), contextlib.redirect_stderr(chatter): + predict_and_save( + audio_path_list=[str(audio_path)], + output_directory=str(td_path), + save_midi=True, + sonify_midi=False, + save_model_outputs=False, + save_notes=False, + model_or_model_path=ICASSP_2022_MODEL_PATH, + ) + captured = chatter.getvalue().strip() + if captured: + log.debug("basic_pitch output: %s", captured) # basic-pitch names: _basic_pitch.mid produced = next(td_path.glob("*_basic_pitch.mid"), None) if produced is None: diff --git a/backend/modules/midi/router.py b/backend/modules/midi/router.py index 00c4ddb..90cff3f 100644 --- a/backend/modules/midi/router.py +++ b/backend/modules/midi/router.py @@ -11,8 +11,9 @@ import logging from pathlib import Path +from typing import Any -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, Body, HTTPException from fastapi.responses import FileResponse from backend.modules.library.router import get_store as get_library_store @@ -89,6 +90,43 @@ def get_midi_file(midi_id: str) -> FileResponse: raise HTTPException(404, f"midi row {midi_id!r} not found") +@router.patch("/file/{midi_id}") +def update_midi(midi_id: str, patch: dict[str, Any] = Body(...)) -> dict: + """Mutate a MIDI row. Only ``favorite`` is user-mutable so MIDI rows + behave like first-class library items.""" + store = get_library_store() + if store.db is None: + raise HTTPException(503, "library DB not available") + if "favorite" in patch: + ok = store.db.set_midi_favorite(midi_id, bool(patch["favorite"])) + if not ok: + raise HTTPException(404, f"midi row {midi_id!r} not found") + row = store.db.get_midi(midi_id) + if row is None: + raise HTTPException(404, f"midi row {midi_id!r} not found") + return dict(row) + + +@router.delete("/file/{midi_id}") +def delete_midi_file(midi_id: str) -> dict: + """Delete one MIDI conversion (its .mid on disk + its DB row), leaving + the parent track untouched.""" + store = get_library_store() + if store.db is None: + raise HTTPException(503, "library DB not available") + row = store.db.get_midi(midi_id) + if row is None: + raise HTTPException(404, f"midi row {midi_id!r} not found") + midi_path = Path(row.get("midi_path") or "") + if midi_path.is_file(): + try: + midi_path.unlink() + except OSError as e: + log.warning("midi: failed to delete file %s: %s", midi_path, e) + store.db.delete_midi(midi_id) + return {"deleted": midi_id} + + @router.get("/{entry_id}") def list_entry_midis(entry_id: str) -> dict: store = get_library_store() diff --git a/backend/modules/stems/sidecar.py b/backend/modules/stems/sidecar.py index d6adcd9..e49295a 100644 --- a/backend/modules/stems/sidecar.py +++ b/backend/modules/stems/sidecar.py @@ -27,6 +27,7 @@ from __future__ import annotations +import json import logging import os import socket @@ -42,6 +43,50 @@ log = logging.getLogger(__name__) +# Packages the sidecar genuinely needs to separate stems. demucs imports but +# is useless without torch/torchaudio; torchcrepe drives the crepe pitch path. +# The historical probe only checked demucs, so a venv with demucs present but +# torch/torchcrepe missing spawned anyway — then run_backend.py tried to self- +# install them and blew the entire 300s readiness window. We now gate on ALL of +# these being importable before spawning. +_CRITICAL_PACKAGES: tuple[str, ...] = ("demucs", "torch", "torchaudio", "torchcrepe") + + +def _probe_packages(python_exe: Path) -> dict: + """Import every critical package in the sidecar Python in ONE subprocess. + + Returns ``{pkg: {"ok": bool, "version": str|None, "error": str|None}}``, + or ``{"_error": ...}`` if the probe itself couldn't run. Cheap (a single + interpreter start) and never raises.""" + script = ( + "import json, importlib\n" + f"pkgs = {list(_CRITICAL_PACKAGES)!r}\n" + "out = {}\n" + "for p in pkgs:\n" + " try:\n" + " m = importlib.import_module(p)\n" + " out[p] = {'ok': True, 'version': getattr(m, '__version__', None)}\n" + " except Exception as e:\n" + " out[p] = {'ok': False, 'error': repr(e)[:300]}\n" + "print(json.dumps(out))\n" + ) + try: + result = subprocess.run( + [str(python_exe), "-c", script], + capture_output=True, + text=True, + timeout=30, + ) + except (subprocess.TimeoutExpired, OSError) as e: + return {"_error": repr(e)} + if result.returncode != 0: + return {"_error": result.stderr.strip()[:300] or "probe subprocess failed"} + try: + return json.loads(result.stdout.strip().splitlines()[-1]) + except (ValueError, IndexError) as e: + return {"_error": f"probe parse failed: {e}"} + + DEFAULT_PACKAGE_PATH = Path(r"D:/StableAudio/JoshOG/integration-package/backend") PORT_FILENAME = "backend_port.txt" # run_backend.py does a dependency check + possible pip install on first @@ -173,23 +218,38 @@ def probe(cfg: Optional[SidecarConfig] = None) -> dict: ) return out + # Per-package import check (demucs + torch + torchaudio + torchcrepe), not + # demucs alone — a venv can import demucs while torch/torchcrepe are missing + # or broken, which is exactly what stalled the sidecar before. + out["packages"] = {} + out["missing_critical"] = [] + out["critical_ok"] = False if cfg.python_exe.is_file(): - try: - result = subprocess.run( - [str(cfg.python_exe), "-c", "import demucs; print(demucs.__version__)"], - capture_output=True, - text=True, - timeout=15, - ) - if result.returncode == 0: - out["demucs_importable"] = True - out["demucs_version"] = result.stdout.strip() + pkgs = _probe_packages(cfg.python_exe) + if "_error" in pkgs: + out["demucs_error"] = pkgs["_error"] + out["missing_critical"] = list(_CRITICAL_PACKAGES) + else: + out["packages"] = pkgs + out["missing_critical"] = [ + p for p in _CRITICAL_PACKAGES if not pkgs.get(p, {}).get("ok") + ] + out["critical_ok"] = len(out["missing_critical"]) == 0 + demucs_info = pkgs.get("demucs", {}) + out["demucs_importable"] = bool(demucs_info.get("ok")) + if demucs_info.get("ok"): + out["demucs_version"] = demucs_info.get("version") else: - out["demucs_error"] = result.stderr.strip()[:300] - except (subprocess.TimeoutExpired, OSError) as e: - out["demucs_error"] = repr(e) + out["demucs_error"] = demucs_info.get("error") + # Surface the first broken critical so logs/UI name a real cause. + if out["missing_critical"]: + first = out["missing_critical"][0] + first_err = pkgs.get(first, {}).get("error") + if first_err and not out.get("demucs_error"): + out["demucs_error"] = f"{first}: {first_err}" else: out["demucs_error"] = f"python_exe not found: {cfg.python_exe}" + out["missing_critical"] = list(_CRITICAL_PACKAGES) port_file = _port_file(cfg) if port_file.is_file(): @@ -201,7 +261,7 @@ def probe(cfg: Optional[SidecarConfig] = None) -> dict: pass out["ok"] = ( - out["package_exists"] and out["run_backend_exists"] and out["demucs_importable"] + out["package_exists"] and out["run_backend_exists"] and out["critical_ok"] ) return out @@ -261,12 +321,19 @@ def ensure_running(self) -> int: if not run_backend.is_file(): raise RuntimeError(f"stems sidecar launcher missing: {run_backend}") - # If demucs isn't importable in the configured Python, install - # deps ourselves rather than letting run_backend.py try (it uses - # plain `python -m pip` which fails in uv-managed venvs that - # ship without pip). We use ensurepip / uv-pip fallback. - if not probe(self.cfg).get("demucs_importable"): - log.info("stems.sidecar: demucs not importable — installing deps first") + # If ANY critical package (demucs/torch/torchaudio/torchcrepe) is + # missing or broken, install deps ourselves BEFORE spawning rather + # than letting run_backend.py try (it uses plain `python -m pip`, + # which fails in uv-managed venvs without pip AND can spend the whole + # readiness window resolving torch conflicts, the original 300s-stall + # bug). We use ensurepip / uv-pip fallback. + pr = probe(self.cfg) + if not pr.get("critical_ok"): + missing = pr.get("missing_critical") or ["demucs"] + log.info( + "stems.sidecar: critical deps not ready (%s) — installing first", + ", ".join(missing), + ) install_result = install_dependencies(self.cfg) if not install_result.get("ok"): err_blob = ( @@ -274,9 +341,20 @@ def ensure_running(self) -> int: ) raise RuntimeError( "stems sidecar dep install failed " - f"({install_result.get('install_mode', 'unknown')}): " + f"({install_result.get('install_mode', 'unknown')}); " + f"missing before install: {', '.join(missing)}. " f"{err_blob[:600]}" ) + # Re-probe so a post-install gap surfaces here with a clear list + # instead of as an opaque 300s port-file timeout downstream. + pr2 = probe(self.cfg) + if not pr2.get("critical_ok"): + still = pr2.get("missing_critical") or [] + raise RuntimeError( + "stems sidecar deps still missing after install: " + f"{', '.join(still)}. See install logs / sidecar venv " + f"({self.cfg.python_exe})." + ) # Clear any stale port file. port_file = _port_file(self.cfg) @@ -315,10 +393,20 @@ def ensure_running(self) -> int: if port is None: stdout_tail = _tail_log(self._stdout_log) stderr_tail = _tail_log(self._stderr_log) + # Snapshot dep state so the failure names a concrete cause rather + # than just "timed out" (deps were already installed above, so a + # gap here points at a different boot problem). + post = probe(self.cfg) + missing = post.get("missing_critical") or [] self.stop() + dep_note = ( + f" Critical deps still missing: {', '.join(missing)}." + if missing + else " All critical deps import OK — check the log tails for a boot error." + ) raise RuntimeError( f"stems sidecar didn't write {PORT_FILENAME} within " - f"{HEALTH_TIMEOUT_SEC}s.\n" + f"{HEALTH_TIMEOUT_SEC}s.{dep_note}\n" f"stdout tail: {stdout_tail[:500]}\n" f"stderr tail: {stderr_tail[:500]}" ) diff --git a/docs/reports/feature-doc-coverage-report.md b/docs/reports/feature-doc-coverage-report.md index ecd6c85..9a7aa9a 100644 --- a/docs/reports/feature-doc-coverage-report.md +++ b/docs/reports/feature-doc-coverage-report.md @@ -1,7 +1,7 @@ # Feature Documentation Coverage Report > [!NOTE] -> Generated: 2026-06-13T13:00:04.808Z · Git revision: `bc466fda402d` · Repomix tracked: **no** +> Generated: 2026-06-14T02:43:45.356Z · Git revision: `9cdeed22cf1f` · Repomix tracked: **no** ## Audit Dashboard diff --git a/docs/reports/feature-doc-coverage.json b/docs/reports/feature-doc-coverage.json index c079e13..a67a350 100644 --- a/docs/reports/feature-doc-coverage.json +++ b/docs/reports/feature-doc-coverage.json @@ -1,6 +1,6 @@ { - "generatedAt": "2026-06-13T13:00:04.808Z", - "repoRevision": "bc466fda402d", + "generatedAt": "2026-06-14T02:43:45.356Z", + "repoRevision": "9cdeed22cf1f", "repomixContext": { "path": "repomix-output.md", "present": false, diff --git a/docs/screenshots/manifest.json b/docs/screenshots/manifest.json index bca98e7..7ed7253 100644 --- a/docs/screenshots/manifest.json +++ b/docs/screenshots/manifest.json @@ -1,5 +1,5 @@ { - "generatedAt": "2026-06-13T13:02:09.249Z", + "generatedAt": "2026-06-14T02:46:37.642Z", "entries": [ { "file": "01-shell-make.png", diff --git a/frontend/src/components/audio/PianoRoll.tsx b/frontend/src/components/audio/PianoRoll.tsx index 9411279..5b580a4 100644 --- a/frontend/src/components/audio/PianoRoll.tsx +++ b/frontend/src/components/audio/PianoRoll.tsx @@ -8,6 +8,7 @@ import { downloadMidi, parseMidi } from '../../utils/midi'; import { logError, logInfo } from '../../state/logStore'; import { MidiMapper } from './MidiMapper'; import { ContextMenu, useContextMenu, type ContextMenuItem } from '../ui/ContextMenu'; +import { triggerSynthVoice, renderStepNotesToBlob } from '../../lib/midiSynth'; const NOTE_HEIGHT = 12; const HEADER_HEIGHT = 22; @@ -17,43 +18,13 @@ const NOTE_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', const isBlackKey = (midi: number) => [1, 3, 6, 8, 10].includes(midi % 12); const noteLabel = (midi: number) => `${NOTE_NAMES[midi % 12]}${Math.floor(midi / 12) - 1}`; -/** - * Schedule a single sawtooth+lowpass+env voice on the given context. Used both - * for live playback (engine ctx + master gain) and offline rendering - * (OfflineAudioContext + its destination). - */ -const triggerPianoNoteOn = ( - ctx: BaseAudioContext, - dest: AudioNode, - midi: number, - velocity: number, - when: number, - duration: number, - master: number, -): void => { - const freq = 440 * Math.pow(2, (midi - 69) / 12); - const osc = ctx.createOscillator(); - osc.type = 'sawtooth'; - osc.frequency.setValueAtTime(freq, when); - const lp = ctx.createBiquadFilter(); - lp.type = 'lowpass'; - lp.frequency.setValueAtTime(Math.min(8000, freq * 6), when); - const env = ctx.createGain(); - const peak = (velocity / 127) * 0.7 * master; - env.gain.setValueAtTime(0.001, when); - env.gain.exponentialRampToValueAtTime(peak, when + 0.008); - env.gain.setTargetAtTime(peak * 0.5, when + 0.05, 0.08); - env.gain.setTargetAtTime(0.001, when + duration, 0.05); - osc.connect(lp).connect(env).connect(dest); - osc.start(when); - osc.stop(when + duration + 0.2); -}; - -/** Live preview convenience: route through the shared engine master/analyser. */ +/** Live preview convenience: route the shared synth voice through the engine + * master/analyser. The voice itself lives in `lib/midiSynth` so previews, + * bounces, and library MIDI renders all sound identical. */ const triggerPianoNote = (midi: number, velocity: number, when: number, duration: number, master: number) => { const ctx = getEngineCtx(); if (ctx.state === 'suspended') void ctx.resume(); - triggerPianoNoteOn(ctx, getMasterGain(), midi, velocity, when, duration, master); + triggerSynthVoice(ctx, getMasterGain(), midi, velocity, when, duration, master); }; /** @@ -75,67 +46,14 @@ export const triggerPianoNoteFromMidi = (midi: number, velocity = 100, duration triggerPianoNote(midi, velocity, ctx.currentTime + 0.02, duration, 0.8); }; -// --- WAV encoder (16-bit PCM, mirrors WaveformEditor.encodeWav) --- -const encodeWavBlob = (audioBuf: AudioBuffer): Blob => { - const numCh = audioBuf.numberOfChannels; - const sr = audioBuf.sampleRate; - const len = audioBuf.length; - const buffer = new ArrayBuffer(44 + len * numCh * 2); - const view = new DataView(buffer); - const writeStr = (off: number, s: string) => { - for (let i = 0; i < s.length; i += 1) view.setUint8(off + i, s.charCodeAt(i)); - }; - writeStr(0, 'RIFF'); - view.setUint32(4, 36 + len * numCh * 2, true); - writeStr(8, 'WAVE'); - writeStr(12, 'fmt '); - view.setUint32(16, 16, true); - view.setUint16(20, 1, true); - view.setUint16(22, numCh, true); - view.setUint32(24, sr, true); - view.setUint32(28, sr * numCh * 2, true); - view.setUint16(32, numCh * 2, true); - view.setUint16(34, 16, true); - writeStr(36, 'data'); - view.setUint32(40, len * numCh * 2, true); - const channels: Float32Array[] = []; - for (let c = 0; c < numCh; c += 1) channels.push(audioBuf.getChannelData(c)); - let offset = 44; - for (let i = 0; i < len; i += 1) { - for (let c = 0; c < numCh; c += 1) { - const sample = Math.max(-1, Math.min(1, channels[c][i])); - view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7fff, true); - offset += 2; - } - } - return new Blob([buffer], { type: 'audio/wav' }); -}; - -/** Render the current pattern offline to a WAV Blob. Used by SEND TO EDITOR. */ -const renderPianoRollToBlob = async ( +/** Render the current pattern offline to a WAV Blob. Used by SEND TO EDITOR. + * Delegates to the shared step renderer in `lib/midiSynth`. */ +const renderPianoRollToBlob = ( notes: PianoNote[], bpm: number, totalSteps: number, -): Promise<{ blob: Blob; duration: number }> => { - const sr = 44100; - const stepSec = 60 / Math.max(40, bpm) / 4; // 16th note seconds - // Total length = (last note end + 0.5s tail). - let maxEnd = 0; - for (const n of notes) { - const end = (n.step + n.length) * stepSec; - if (end > maxEnd) maxEnd = end; - } - const padTail = 0.6; - const totalSec = Math.max(maxEnd, totalSteps * stepSec) + padTail; - const offline = new OfflineAudioContext(2, Math.ceil(totalSec * sr), sr); - for (const n of notes) { - const when = n.step * stepSec; - const dur = n.length * stepSec; - triggerPianoNoteOn(offline, offline.destination, n.note, n.velocity, when, dur, 1); - } - const rendered = await offline.startRendering(); - return { blob: encodeWavBlob(rendered), duration: rendered.duration }; -}; +): Promise<{ blob: Blob; duration: number }> => + renderStepNotesToBlob(notes, bpm, totalSteps); // Re-declared after the imports section so it picks up the imported // MidiMapper symbol without circular-import gymnastics. diff --git a/frontend/src/lib/midiSynth.ts b/frontend/src/lib/midiSynth.ts new file mode 100644 index 0000000..5f3b74d --- /dev/null +++ b/frontend/src/lib/midiSynth.ts @@ -0,0 +1,160 @@ +/** + * Shared MIDI → audio synthesis. + * + * Centralizes the offline render path so MIDI is usable everywhere audio is: + * preview playback, init audio, chimera fodder, and the piano roll's SEND TO + * EDITOR bounce. Today the only engine is a built-in subtractive sawtooth + * voice (no soundfont dependency), but the public surface is engine-shaped so + * a sample/soundfont engine can be dropped in later without touching callers. + * + * The voice is byte-for-byte the same one the piano roll used inline before + * this module existed, so previews and bounces stay consistent. + */ +import { parseMidi } from './midi'; + +/** One note in absolute seconds — the engine-neutral render unit. */ +export interface RenderNote { + /** MIDI note number 0-127 (60 = middle C). */ + midi: number; + /** Start time in seconds from the render origin. */ + startSec: number; + /** Sounding length in seconds. */ + durationSec: number; + /** Velocity 1-127. */ + velocity: number; +} + +export interface RenderOptions { + /** Output sample rate. Defaults to 44.1kHz to match the rest of the app. */ + sampleRate?: number; + /** Silence appended after the last note so tails aren't clipped. */ + tailSec?: number; +} + +/** + * Schedule a single sawtooth + lowpass + envelope voice on any audio context. + * Works on both a live `AudioContext` (preview) and an `OfflineAudioContext` + * (render), since it only touches the standard `BaseAudioContext` surface. + */ +export const triggerSynthVoice = ( + ctx: BaseAudioContext, + dest: AudioNode, + midi: number, + velocity: number, + when: number, + duration: number, + master: number, +): void => { + const freq = 440 * Math.pow(2, (midi - 69) / 12); + const osc = ctx.createOscillator(); + osc.type = 'sawtooth'; + osc.frequency.setValueAtTime(freq, when); + const lp = ctx.createBiquadFilter(); + lp.type = 'lowpass'; + lp.frequency.setValueAtTime(Math.min(8000, freq * 6), when); + const env = ctx.createGain(); + const peak = (velocity / 127) * 0.7 * master; + env.gain.setValueAtTime(0.001, when); + env.gain.exponentialRampToValueAtTime(peak, when + 0.008); + env.gain.setTargetAtTime(peak * 0.5, when + 0.05, 0.08); + env.gain.setTargetAtTime(0.001, when + duration, 0.05); + osc.connect(lp).connect(env).connect(dest); + osc.start(when); + osc.stop(when + duration + 0.2); +}; + +/** Encode an AudioBuffer to a 16-bit PCM WAV Blob. */ +export const encodeWavBlob = (audioBuf: AudioBuffer): Blob => { + const numCh = audioBuf.numberOfChannels; + const sr = audioBuf.sampleRate; + const len = audioBuf.length; + const buffer = new ArrayBuffer(44 + len * numCh * 2); + const view = new DataView(buffer); + const writeStr = (off: number, s: string) => { + for (let i = 0; i < s.length; i += 1) view.setUint8(off + i, s.charCodeAt(i)); + }; + writeStr(0, 'RIFF'); + view.setUint32(4, 36 + len * numCh * 2, true); + writeStr(8, 'WAVE'); + writeStr(12, 'fmt '); + view.setUint32(16, 16, true); + view.setUint16(20, 1, true); + view.setUint16(22, numCh, true); + view.setUint32(24, sr, true); + view.setUint32(28, sr * numCh * 2, true); + view.setUint16(32, numCh * 2, true); + view.setUint16(34, 16, true); + writeStr(36, 'data'); + view.setUint32(40, len * numCh * 2, true); + const channels: Float32Array[] = []; + for (let c = 0; c < numCh; c += 1) channels.push(audioBuf.getChannelData(c)); + let offset = 44; + for (let i = 0; i < len; i += 1) { + for (let c = 0; c < numCh; c += 1) { + const sample = Math.max(-1, Math.min(1, channels[c][i])); + view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7fff, true); + offset += 2; + } + } + return new Blob([buffer], { type: 'audio/wav' }); +}; + +/** Render absolute-seconds notes to a WAV Blob via the built-in synth. */ +export const renderNotesToBlob = async ( + notes: RenderNote[], + opts: RenderOptions = {}, +): Promise<{ blob: Blob; duration: number }> => { + const sr = opts.sampleRate ?? 44100; + const tail = opts.tailSec ?? 0.6; + let maxEnd = 0; + for (const n of notes) { + const end = n.startSec + n.durationSec; + if (end > maxEnd) maxEnd = end; + } + const totalSec = Math.max(0.1, maxEnd + tail); + const offline = new OfflineAudioContext(2, Math.ceil(totalSec * sr), sr); + for (const n of notes) { + triggerSynthVoice(offline, offline.destination, n.midi, n.velocity, n.startSec, n.durationSec, 1); + } + const rendered = await offline.startRendering(); + return { blob: encodeWavBlob(rendered), duration: rendered.duration }; +}; + +/** Render step-grid notes (piano roll / step sequencer) to a WAV Blob. */ +export const renderStepNotesToBlob = async ( + notes: Array<{ note: number; velocity: number; step: number; length: number }>, + bpm: number, + totalSteps: number, +): Promise<{ blob: Blob; duration: number }> => { + const stepSec = 60 / Math.max(40, bpm) / 4; // 16th-note seconds + const renderNotes: RenderNote[] = notes.map((n) => ({ + midi: n.note, + velocity: n.velocity, + startSec: n.step * stepSec, + durationSec: n.length * stepSec, + })); + // Pad to the pattern's nominal length so trailing rests are preserved. + const result = await renderNotesToBlob(renderNotes, { tailSec: 0.6 }); + const nominal = totalSteps * stepSec; + return { blob: result.blob, duration: Math.max(result.duration, nominal) }; +}; + +/** Parse a Standard MIDI File buffer and render it to a WAV Blob. */ +export const renderMidiBufferToBlob = async ( + buf: ArrayBuffer | Uint8Array, +): Promise<{ blob: Blob; duration: number }> => { + const midi = parseMidi(buf); + const ppq = midi.ppq || 480; + const bpm = midi.bpm || 120; + const secPerTick = 60 / Math.max(20, bpm) / ppq; + const notes: RenderNote[] = midi.tracks.flatMap((t) => + t.notes.map((n) => ({ + midi: n.note, + velocity: n.velocity, + startSec: n.tick * secPerTick, + durationSec: Math.max(0.02, n.durationTicks * secPerTick), + })), + ); + if (notes.length === 0) throw new Error('MIDI has no playable notes'); + return renderNotesToBlob(notes); +}; diff --git a/frontend/src/lib/sendToTargets.ts b/frontend/src/lib/sendToTargets.ts index 0823efb..35c2cca 100644 --- a/frontend/src/lib/sendToTargets.ts +++ b/frontend/src/lib/sendToTargets.ts @@ -21,6 +21,7 @@ import { useBottomPanelStore } from '../state/bottomPanelStore'; import { usePianoRollStore } from '../state/pianoRollStore'; import { addBlobsToChimera } from './chimeraClient'; import { parseMidi } from './midi'; +import { renderMidiBufferToBlob } from './midiSynth'; import { logError, logInfo } from '../state/logStore'; /** Default mime for stems / mic recordings when none provided. */ @@ -205,6 +206,26 @@ export async function sendMidiIdToTarget(midiId: string, target: MidiSendTarget) } } +/** + * Build a SendableAudio that synthesizes a library MIDI row into audio on + * demand. Lets MIDI flow into every audio destination (editor / init / inpaint + * / chimera) the same way a stem or track does. Rendering is lazy — the synth + * only runs when a consumer actually pulls the blob. + */ +export function midiIdToSendable(midiId: string, label = 'midi'): SendableAudio { + return { + label, + mimeType: 'audio/wav', + fetcher: async () => { + const res = await fetch(`/api/midi/file/${midiId}`); + if (!res.ok) throw new Error(`midi ${midiId} fetch HTTP ${res.status}`); + const buf = await res.arrayBuffer(); + const { blob } = await renderMidiBufferToBlob(buf); + return blob; + }, + }; +} + /** Build a SendableAudio from a stem row pulled from /api/library/_all/stems. */ export function stemRowToSendable(row: Record): SendableAudio { const stemId = String(row.id ?? ''); diff --git a/frontend/src/views/LibraryView.tsx b/frontend/src/views/LibraryView.tsx index 7a71c94..052da24 100644 --- a/frontend/src/views/LibraryView.tsx +++ b/frontend/src/views/LibraryView.tsx @@ -24,8 +24,10 @@ import { logError, logInfo } from '../state/logStore'; import { addBlobsToChimera } from '../lib/chimeraClient'; import { listMedia, importMedia, deleteMedia, MEDIA_ACCEPT } from '../lib/mediaLibrary'; import { setAudioDragData } from '../lib/audioDnD'; +import { renderMidiBufferToBlob } from '../lib/midiSynth'; import { loadMidiIntoPianoRoll, + midiIdToSendable, sendAudioToChimera, sendAudioToEditor, sendAudioToInit, @@ -308,6 +310,27 @@ export const LibraryView: React.FC<{ onSwitchTab?: (tab: string) => void; onExpa } }, []); + // In-place refresh of the stems / midi indexes (no null-flicker, unlike the + // lazy first-load). Passed to SubTabList so favorite / delete update the list + // without resetting the sub-tab to its "Loading…" placeholder. + const refreshStems = React.useCallback(async () => { + try { + const j = await fetch('/api/library/_all/stems').then((r) => r.json()); + setAllStems(j.stems || []); + } catch (e) { + logError('library', `Failed to refresh stems: ${e instanceof Error ? e.message : String(e)}`); + } + }, []); + + const refreshMidi = React.useCallback(async () => { + try { + const j = await fetch('/api/library/_all/midi').then((r) => r.json()); + setAllMidis(j.midis || []); + } catch (e) { + logError('library', `Failed to refresh MIDI: ${e instanceof Error ? e.message : String(e)}`); + } + }, []); + const stemsByParent = useMemo(() => { const map: Record>> = {}; (allStems || []).forEach((s) => { @@ -1026,6 +1049,7 @@ export const LibraryView: React.FC<{ onSwitchTab?: (tab: string) => void; onExpa parentTitles={Object.fromEntries(entries.map((e) => [e.id, e.title]))} kind="stem" placeholder={allStems === null ? 'Loading stems…' : 'No stems yet. Enable auto-stems in Settings or right-click a track → Separate stems.'} + onMutated={refreshStems} /> )} {subTab === 'midi' && ( @@ -1034,6 +1058,7 @@ export const LibraryView: React.FC<{ onSwitchTab?: (tab: string) => void; onExpa parentTitles={Object.fromEntries(entries.map((e) => [e.id, e.title]))} kind="midi" placeholder={allMidis === null ? 'Loading MIDI…' : 'No MIDI yet. Enable auto-MIDI in Settings or right-click a track → Convert to MIDI.'} + onMutated={refreshMidi} /> )} {subTab === 'video' && ( @@ -1589,6 +1614,8 @@ interface SubTabListProps { parentTitles: Record; kind: 'stem' | 'midi'; placeholder: string; + /** Re-fetch the index in place after a favorite toggle or delete. */ + onMutated: () => void | Promise; } type SubTabRowPayload = @@ -1596,13 +1623,77 @@ type SubTabRowPayload = | { kind: 'stem'; row: Record }; -const SubTabList: React.FC = ({ byParent, parentTitles, kind, placeholder }) => { +const SubTabList: React.FC = ({ byParent, parentTitles, kind, placeholder, onMutated }) => { const parentIds = Object.keys(byParent); // Shared ContextMenu primitive — fixes drift under .dense-layout // zoom and gives consistent close-on-outside behavior across the // app (plan step 3d migration). const rowMenu = useContextMenu(); + // Stems and MIDI are first-class library items: they play through the + // global engine, can be favorited, and can be deleted independently of + // their parent track. MIDI playback synthesizes via the shared sawtooth + // engine in lib/midiSynth (no soundfont needed). + const engineLoad = usePlayerStore((s) => s.load); + const enginePlay = usePlayerStore((s) => s.play); + const enginePause = usePlayerStore((s) => s.pause); + const engineIsPlaying = usePlayerStore((s) => s.isPlaying); + const engineEntryId = usePlayerStore((s) => s.currentEntryId); + const [playingRowKey, setPlayingRowKey] = useState(null); + const [busyRowKey, setBusyRowKey] = useState(null); + + // Stems / MIDI load with no entryId, so currentEntryId is null while one is + // playing. If a real track takes over the engine, currentEntryId goes + // non-null and our rows stop showing the pause state. + const rowIsPlaying = (rowKey: string) => + playingRowKey === rowKey && engineIsPlaying && engineEntryId === null; + + const playRow = async (rowKey: string, label: string, fetchBlob: () => Promise) => { + if (rowIsPlaying(rowKey)) { + enginePause(); + return; + } + setBusyRowKey(rowKey); + try { + const blob = await fetchBlob(); + await engineLoad(blob, { label }); + enginePlay(); + setPlayingRowKey(rowKey); + } catch (e) { + logError('library', `Could not play ${label}: ${e instanceof Error ? e.message : String(e)}`); + } finally { + setBusyRowKey(null); + } + }; + + const toggleFavorite = async (isMidi: boolean, rowId: string, current: boolean) => { + const url = isMidi ? `/api/midi/file/${rowId}` : `/api/library/stems/${rowId}`; + try { + const res = await fetch(url, { + method: 'PATCH', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ favorite: !current }), + }); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + await onMutated(); + } catch (e) { + logError('library', `Could not update favorite: ${e instanceof Error ? e.message : String(e)}`); + } + }; + + const deleteRow = async (isMidi: boolean, rowId: string, label: string) => { + if (!window.confirm(`Delete "${label}"? This removes the file from disk and cannot be undone.`)) return; + const url = isMidi ? `/api/midi/file/${rowId}` : `/api/library/stems/${rowId}`; + try { + const res = await fetch(url, { method: 'DELETE' }); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + logInfo('library', `Deleted ${isMidi ? 'MIDI' : 'stem'} "${label}".`); + await onMutated(); + } catch (e) { + logError('library', `Could not delete: ${e instanceof Error ? e.message : String(e)}`); + } + }; + if (parentIds.length === 0) { return

{placeholder}

; } @@ -1612,6 +1703,7 @@ const SubTabList: React.FC = ({ byParent, parentTitles, kind, p let menuTitle = ''; if (payload?.kind === 'midi') { + const sendable = midiIdToSendable(payload.midiId, payload.label); menuTitle = `MIDI · ${payload.label}`; menuItems = [ { @@ -1626,6 +1718,33 @@ const SubTabList: React.FC = ({ byParent, parentTitles, kind, p icon: , onSelect: () => { void sendMidiIdToTarget(payload.midiId, 'step-seq'); }, }, + { type: 'separator' }, + { + type: 'item', + label: 'Send to editor (synth)', + icon: , + hint: 'new track', + onSelect: () => { void sendAudioToEditor(sendable, 'editor-new-track'); }, + }, + { + type: 'item', + label: 'Send to Init audio (synth)', + icon: , + onSelect: () => { void sendAudioToInit(sendable); }, + }, + { + type: 'item', + label: 'Send to Inpaint (synth)', + icon: , + onSelect: () => { void sendAudioToInpaint(sendable); }, + }, + { + type: 'item', + label: 'Add to Chimera (synth)', + icon: , + onSelect: () => { void sendAudioToChimera([sendable]); }, + }, + { type: 'separator' }, { type: 'item', label: 'Download .mid', @@ -1639,6 +1758,13 @@ const SubTabList: React.FC = ({ byParent, parentTitles, kind, p document.body.removeChild(a); }, }, + { + type: 'item', + label: 'Delete MIDI', + icon: , + danger: true, + onSelect: () => { void deleteRow(true, payload.midiId, payload.label); }, + }, ]; } else if (payload?.kind === 'stem') { const stemId = String(payload.row.id ?? ''); @@ -1671,6 +1797,7 @@ const SubTabList: React.FC = ({ byParent, parentTitles, kind, p icon: , onSelect: () => { void sendAudioToChimera([sendable]); }, }, + { type: 'separator' }, { type: 'item', label: 'Download .wav', @@ -1684,6 +1811,13 @@ const SubTabList: React.FC = ({ byParent, parentTitles, kind, p document.body.removeChild(a); }, }, + { + type: 'item', + label: 'Delete stem', + icon: , + danger: true, + onSelect: () => { void deleteRow(false, stemId, stemName); }, + }, ]; } @@ -1697,30 +1831,83 @@ const SubTabList: React.FC = ({ byParent, parentTitles, kind, p
{byParent[pid].map((row, idx) => { const isMidi = kind === 'midi'; - const midiId = String(row.id ?? ''); - const label = String((isMidi ? row.source : row.stem_name) ?? 'item'); + const rowId = String(row.id ?? ''); + const name = isMidi ? String(row.source ?? 'midi') : String(row.stem_name ?? 'stem'); + const label = parentTitles[pid] ? `${parentTitles[pid]} · ${name}` : name; + const favorite = !!row.favorite; + const rowKey = `${kind}:${rowId}`; + const busy = busyRowKey === rowKey; + const playing = rowIsPlaying(rowKey); + const meta = isMidi + ? `${row.engine ?? ''}` + : `${row.model ?? ''} ${row.model_variant ?? ''}`.trim(); return (
{ if (isMidi) { - if (!midiId) return; - rowMenu.open(e, { kind: 'midi', midiId, label }); + if (!rowId) return; + rowMenu.open(e, { kind: 'midi', midiId: rowId, label }); } else { rowMenu.open(e, { kind: 'stem', row }); } }} - title="Right-click to send this anywhere" + title="Right-click for more — send to editor / init / inpaint / chimera" > - - {kind === 'stem' ? String(row.stem_name ?? 'stem') : String(row.source ?? 'midi')} - - - {kind === 'stem' - ? `${row.model ?? ''} ${row.model_variant ?? ''}`.trim() - : `${row.engine ?? ''}`} - + + + {name} + {meta} +
); })} From 53c1d280eefa15abcf802dbca4349bdf323da3c7 Mon Sep 17 00:00:00 2001 From: Daniel Joaquin Trujillo <54636507+danieljtrujillo@users.noreply.github.com> Date: Sat, 13 Jun 2026 19:47:29 -0700 Subject: [PATCH 2/3] fix: module-list reliability, Windows MIDI/stems incidents, Suno a11y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Settings 'Backend Modules' could show a false 'No modules found' when opened during a backend (re)start, because the modal fetched on open and silently blanked on any error. Now the catalog loads once on backend-ready into a shared moduleStore (retry-until-success, cached) and Settings just reads it, so there is nothing to fail on open; a real error shows an error + Retry. Also: - MIDI (Windows): basic-pitch's emoji status output no longer crashes MIDI conversion on cp1252 consoles — its stdout/stderr is captured into a text buffer (engine.py), so transcription failures report the real cause. - Stems sidecar: probe now checks all critical packages (demucs/torch/ torchaudio/torchcrepe), and ensure_running installs deps BEFORE spawning when any are missing instead of letting run_backend.py self-install and burn the 300s readiness window; the timeout error now names the dep state. - Suno API key field is wrapped in a
with an associated label and autoComplete=off (silences the 'password field not in a form' warning). - docs: record Phase I (stems/MIDI), G15 (device errors + Quest video-in), and H1/H2 status in the plan. --- ...al-layout-vj-library-optimizations-plan.md | 345 +++++++++++++++++- docs/reports/feature-doc-coverage-report.md | 2 +- docs/reports/feature-doc-coverage.json | 4 +- docs/screenshots/manifest.json | 2 +- frontend/src/App.tsx | 9 + .../src/components/layout/SettingsModal.tsx | 55 +-- frontend/src/state/moduleStore.ts | 87 +++++ frontend/src/suno/SunoKeySettings.tsx | 24 +- frontend/vite.config.ts | 17 +- 9 files changed, 499 insertions(+), 46 deletions(-) create mode 100644 frontend/src/state/moduleStore.ts diff --git a/docs/plans/2026-06-13-global-layout-vj-library-optimizations-plan.md b/docs/plans/2026-06-13-global-layout-vj-library-optimizations-plan.md index ff5cdab..37633de 100644 --- a/docs/plans/2026-06-13-global-layout-vj-library-optimizations-plan.md +++ b/docs/plans/2026-06-13-global-layout-vj-library-optimizations-plan.md @@ -18,6 +18,26 @@ Added later (user, verbatim): > finish integrating the live stems in DJ, have the sampler and stem activation super simple, but versatile. Fill some of those gaps. +Added later (user, verbatim, 2026-06-13): + +> Make a plan for making essentially the whole app right clickable, so I can send any track anywhere to the library,(adding to library isnt working right now) it should have multiselect, send all to _____, stem, delete, compress/archive, add to init, send here send there send everywhere. +> +> You should always always make sure that you break these tasks (even the planning) down into small manageable tasks so you can keep track, and dont overload your context. Add these requests to the most recent plan doc (i think like 2 days old) +> +> Cymatics visualizers should be mixable into the VJ feed. +> +> The EDIT tab, should by default have however many tracks/lanes as it needs to fill up the screen, so prob like 10 +> +> VOICE/MIC INPUT Needs to be integrated into the footer (just a simple record icon for now) and be able to be plugged into anywhere to record. record into init, edit lanes, midi, I have a vocoder for us to integrate into this app D:\StableAudio\JoshOG\KhoomeiVocoder but not dependent on that external folder after integration. +> +> All tracks/lanes should have a mic input button, ability to add the effects/stacks that we have. +> +> I need to be able to use the videos that are imported into the library, currently there's no function whatsoever or exposed ability to do anything with em. Videos and images should be automatically highly optimized without losing noticable quality. Whatever codec is small and would run the best in this app to keep overhead low. +> +> managing adding, saving playlists in DJ tab should be much easier. The pause play skip on the footer should control the track playheads. +> +> More granularity on the SUGGEST options. + Already shipped from the same request batch (PR #19): SLIDE Row/Focus bottom-anchored lanes + sticky `.sl-pagedock`, controller view fit-on-open/wheel-zoom/drag-pan, piano-transcription-inference installed + declared. Visual sign-off on the SLIDE behaviors is still pending the user's eyes. --- @@ -57,6 +77,8 @@ Already shipped from the same request batch (PR #19): SLIDE Row/Focus bottom-anc 4. **Phase D — micro-perf**: rVFC in VJ loop, selector-izing big views, H264 recording option. 5. **Phase E — DJ live stems finish + simple sampler/stem activation** (section 6.5). Can be pulled ahead of C/D on green-light; it is independent of the layout and VJ work. 6. **Phase F — library Opus autoconvert** (section 6.6). Audit-first: a compatibility matrix of every consumer of library audio BEFORE any conversion code. User: library is getting big quickly; this is a real disk-pressure item. +7. **Phase H — active incident fix** (section 6.8): stems sidecar dependency/probe hardening + Windows MIDI charmap fix. This can be pulled to the very front because it is an active failure. +8. **Phase G — global right-click routing + mic/media/DJ/SUGGEST expansion** (section 6.7): execute only after add-to-library reliability is green, because every send-anywhere workflow depends on it. --- @@ -175,6 +197,327 @@ Output of F1: a table in this plan (or a follow-up doc) with VERIFIED yes/no/nee **F2 — conversion mechanics (after F1).** Per-entry convert + bulk "convert older than N days / all" action, default-on toggle for NEW generations vs. keep-WAV setting, originals deleted only after a verified ffprobe pass on the new file, `metadata.json` + DB mime/size updates, stems and artifacts unaffected (separate files). Settings → Storage surface showing reclaimable space estimate. **F3 — guard rails.** Never convert entries referenced as init/inpaint sources if F1 says lossless matters there (or always keep a flag for "this entry was lossy-converted" so generation flows can warn). +## 6.7. Phase G — global right-click routing, mic input, media optimization, DJ playlist UX, and SUGGEST expansion + +User ask (2026-06-13, summarized): make essentially the whole app right-clickable, support multiselect and batch sends, fix add-to-library reliability, allow send here/there/everywhere, expose stems/delete/archive/init actions everywhere, make videos/images usable and optimized, route Cymatics into VJ, add footer mic recording and lane mic buttons/effects, make DJ playlists easier, make footer transport control the active playheads, and add more granular SUGGEST controls. + +### Verified current state (2026-06-13) + +- Shared right-click primitive exists: `frontend/src/components/ui/ContextMenu.tsx` + `useContextMenu()`. It is already used by `LibraryView.tsx`, `LineageModal.tsx`, `ControlSurface.tsx`, `MediaBucketView.tsx`, `PianoRoll.tsx`, `StepSequencer.tsx`, and `WaveformEditor.tsx`. +- Shared send helpers exist: `frontend/src/lib/sendToTargets.ts` handles `SendableAudio`, audio → editor/init/inpaint/chimera, MIDI → piano roll / step sequencer, and stem-row → sendable audio. This is the right foundation; do not duplicate this logic in each menu. +- Mic recording exists but is local-panel oriented: `frontend/src/components/audio/MicRecorder.tsx` uses browser `getUserMedia` + `MediaRecorder`, can preview, send to editor/init/inpaint, and import into library. +- EDIT timeline is store-driven: `frontend/src/state/editorStore.ts` currently starts with one track and exposes `addTrack`, `removeTrack`, `updateTrack`, `addClipToTrack`, etc. `WaveformEditor.tsx` already supports track selection and creates a new track when a library drop lands below the last lane. +- Footer transport has partial mode-awareness: `PlayerFooter.tsx` toggles DJ master on the DJ tab and VJ playback on VJ/DJ tabs, but skip/progress behavior still mostly targets the global single-track `playerStore`. +- Library media backend exists: `backend/modules/library/router.py` supports `GET /entries?kind=audio|video|image|media|all`, `POST /import-media`, `GET /media/{id}`, and `GET /media/{id}/thumb`; `backend/modules/library/store.py` has `kind`, `media_url`, `thumb_url`, `width`, `height`, and `has_alpha` fields. +- DJ setlists exist: `frontend/src/state/setlistStore.ts` persists named sets with `create`, `rename`, `remove`, `setEntries`, `append`, `setActive`, and `setNotes`; UX is the gap. +- SUGGEST exists: `SuggestPlaylistModal.tsx` posts duration/BPM/harmonic/flow/query to `/api/library/suggest-playlist`; backend `suggester.py` sequences by BPM flow + harmonic/Camelot + query/genre. + +### G0. Guard rails for this phase + +- Keep this work in small PR-sized slices. Do not start “whole app right-clickable” by touching every surface at once. +- First make add-to-library reliable, then build global routing on top of it. +- Prefer one action registry over duplicated menu arrays. New context menus should consume shared actions, not reimplement send/delete/stem/archive logic. +- For implementation work, inspect the full relevant file or exact function/class sections before editing; do not rely on arbitrary partial reads. + +### G1. Fix “Add to Library” and make routing reliable first + +Goal: every future global send action depends on a trustworthy library import path. + +Small tasks: +1. Audit every add/save/import caller before editing: `MicRecorder.tsx`, `MediaBucketView.tsx`, `LibraryView.tsx`, generated-output save paths, and any bucket/detail quick actions. +2. Verify the frontend provider import contract matches backend `/api/library/import`: multipart field names, filename, MIME type, metadata JSON, and returned `LibraryEntry` shape. +3. Add focused tests for importing an audio blob, a mic-recording blob, and a generated-output blob. +4. Make failures visible in the processing log/status UI; no silent “button did nothing” behavior. +5. Confirm the imported entry immediately appears in the library, can play, can be sent to editor/init/inpaint, and can be queued for stems/MIDI. + +Success criteria: saving any generated/mic/bucket audio to library returns a new `LibraryEntry`, refreshes the UI, and can immediately play/send/stem/MIDI-convert. + +### G2. Central Send/Action Registry + +Goal: make the app right-clickable without duplicating menu logic. + +Small tasks: +1. Create a centralized action builder, e.g. `frontend/src/lib/contextActions.ts`. +2. Define normalized payloads for: audio library entry, stem row, MIDI row, editor clip, editor track/lane, media/video/image entry, DJ setlist item, generated output, and mic recording. +3. Define shared action groups: play/preview, send to editor, send to selected/new lane, send here, send to init, send to inpaint, send to Chimera, separate stems, convert to MIDI, send to piano roll / step sequencer, send to DJ deck A/B, add to active/new DJ playlist, send to VJ, add to library, download/bundle, archive/compress, and delete. +4. Each UI component asks the registry for actions given `{ payload, selection, location }`; components should stay thin. +5. Keep `ContextMenu.tsx` as the rendering primitive and `sendToTargets.ts` as the routing foundation. + +Success criteria: Library rows, EDIT clips/lanes, DJ rows, media cards, buckets, and generated outputs share consistent context actions from one implementation path. + +### G3. App-wide multiselect + batch actions + +Goal: support “send all to ____”, batch stem/MIDI, batch delete, and batch archive without losing selection. + +Small tasks: +1. Preserve existing local selections but expose a normalized “current selection” shape to the action registry. +2. Use the Library selection behavior as the model: click single-select, Ctrl/Cmd toggle, Shift range, right-click unselected item selects it first, right-click selected item opens the batch menu. +3. Add batch wrappers for send to init/Chimera/editor, add to playlist, stem queue, MIDI queue, archive/compress, and delete with confirmation. +4. Add progress/log entries for long-running batch actions. +5. Add “selected count” headers in menus so destructive batch actions are obvious. + +Success criteria: selecting 3+ items and right-clicking exposes batch-safe actions without clearing selection. + +### G4. “Send here / send there / send everywhere” target model + +Goal: context actions should understand where the user clicked, not only what item was clicked. + +Small tasks: +1. Add a target registry for: current EDIT lane at click position, selected EDIT lane(s), new EDIT lane, Init, Inpaint, piano roll, step sequencer, DJ deck A/B, active DJ playlist, VJ bucket/feed/overlay, and Library. +2. For timeline/lane right-clicks, include `trackId`, `timeSec`, lane index, and selection state in the menu payload. +3. Extend `sendAudioToEditor()` to support specific track/lane, specific timeline time, selected lane, append-to-tail, and new-lane modes. +4. Make “Send everywhere” a submenu with explicit checked destinations, not a dangerous one-click blast. +5. Log every multi-target send with the destination list. + +Success criteria: right-clicking an EDIT lane at a specific time can place imported/recorded/sent audio exactly there. + +### G5. EDIT tab default lane fill (~10 lanes) + +Goal: EDIT opens with enough tracks/lanes to fill the visible timeline area instead of one lonely lane. + +Small tasks: +1. Add an `ensureMinTracks(count)` mutation to `editorStore.ts`; it only adds blank tracks and never auto-removes user tracks. +2. In `WaveformEditor.tsx`, measure available lane area height and compute `ceil(height / TRACK_HEIGHT)`. +3. On EDIT mount and resize, ensure at least the computed count, with a reasonable floor around 10 on normal desktop layouts. +4. Preserve existing drop-below-last-lane behavior for creating additional tracks. +5. Verify track naming/color cycling remains deterministic and readable. + +Success criteria: EDIT defaults to roughly 10 visible lanes on a normal desktop screen and adapts to smaller/larger screens without deleting user-created lanes. + +### G6. Footer mic input as the global record source + +Goal: add a simple footer record icon and make mic recordings routable anywhere. + +Small tasks: +1. Extract browser mic recording state from `MicRecorder.tsx` into a global `micInputStore` or `recordingBus` while keeping `MicRecorder` as the detailed/full UI. +2. Add a compact record icon/button to `PlayerFooter.tsx`: first click starts recording, second click stops. +3. After stop, open a compact destination menu using the shared action registry. +4. Destinations: save to library, record into Init, record into selected EDIT lane, record into lane at playhead, record into new lane, send to Inpaint, convert recording to MIDI. +5. Show recording state and elapsed time in the footer without crowding the transport. + +Success criteria: footer record works globally without opening the Library mic panel first. + +### G7. Track/lane mic-arm buttons + effects/stacks per lane + +Goal: every track/lane can be armed for mic input and can use the existing effects/stacks. + +Small tasks: +1. Add a mic-arm button to each EDIT lane header. +2. Add per-lane input mode: off, record from mic, monitor mic, record at playhead. +3. Route armed-lane recordings through the same recording bus as the footer record button. +4. Add a lane effects entry point that uses existing `effectChainStore` / effect catalog patterns. +5. Start with offline clip/lane processing or lane bounce before attempting real-time insert monitoring. +6. Only after EDIT is stable, consider DJ deck/sampler mic-arm controls. + +Success criteria: a lane can be armed, recorded into, then processed with an existing effect/stack without leaving EDIT. + +### G8. Vocoder integration, vendored into this repo + +Goal: integrate `D:\StableAudio\JoshOG\KhoomeiVocoder` without making the app depend on that external folder after integration. + +Small tasks: +1. Inspect the external vocoder source, license, model/data files, dependency footprint, and expected API before copying anything. +2. Copy only the required source/config/model assets into this repo under a clear module path such as `backend/modules/vocoder/` or `sidecars/vocoder/`. +3. Add a stable backend API: carrier audio + modulator mic/voice audio → vocoded output audio. +4. Add frontend context actions: “Use as vocoder carrier” and “Record voice as vocoder modulator”. +5. Keep it optional/lazy-loaded so normal app boot is unaffected. + +Success criteria: after integration, the app still runs and vocoder still works if `D:\StableAudio\JoshOG\KhoomeiVocoder` is renamed or removed. + +### G9. Make imported videos/images actually usable + +Goal: imported media should have visible actions, not just storage. + +Small tasks: +1. Verify the current VIDEO tab import/list/delete behavior end-to-end. +2. Add media context actions: preview, send to VJ main source, send to VJ overlay, add to active VJ set, add to DJ setlist as visual item, delete, archive/compress. +3. Add drag/drop routing from the Library VIDEO tab to VJ. +4. Show thumbnail, duration, dimensions, codec/proxy status, and alpha/overlay badges. +5. Make media entries first-class `VjSetItem` / `SetlistEntry` payloads where appropriate. + +Success criteria: importing MP4/WebM/PNG/WebP creates a media card that can be previewed and sent to VJ. + +### G10. Automatic video/image optimization and proxies + +Goal: optimize imported media automatically without noticeable quality loss and with low playback overhead. + +Codec direction: +- Opaque video playback proxy: MP4/H.264/AAC, `yuv420p`, `+faststart`, CRF roughly 20–23. +- Transparent animated overlays: WebM VP9 with alpha. +- Still images: WebP quality roughly 85–92 for most; preserve PNG/WebP/AVIF when alpha/detail needs it. +- Never flatten alpha media into opaque H.264 by accident. + +Small tasks: +1. Store the original first, then enqueue a background optimization job. +2. Generate optimized playback proxy, thumbnail/poster, and metadata: width, height, duration, alpha, codec, proxy path, original path. +3. Stream the proxy by default and expose original download on demand. +4. Add settings for keep-originals, max proxy resolution, quality preset, and alpha-preserving mode. +5. Log optimization progress and show proxy status on media cards. + +Success criteria: VJ uses lightweight media proxies by default while transparent overlays keep transparency. + +### G11. Cymatics visualizers mixable into the VJ feed + +Goal: the Cymatics/orb visualizers become VJ-mixable sources/layers. + +Recommended approach: do not stream canvas pixels from the React app every frame unless unavoidable. Prefer adding Cymatics as a VJ-side visual source driven by the existing `sa3-vj/audio-levels` bridge. + +Small tasks: +1. Reuse/port shader logic from `frontend/src/components/audio/CymaticsVisualizer.tsx` and `frontend/src/components/audio/cymatics/*` into the VJ app as a source module. +2. Add VJ source types: `sa3-cymatics-orb`, `sa3-cymatics-platform`, `sa3-landscape-chrome`, `sa3-landscape-ferrofluid`. +3. Drive the VJ-side source from existing `sa3-vj/audio-levels` messages. +4. Add VJ mix controls: opacity, blend mode, layer order, and audio-reactivity amount. +5. Add “Send Cymatics to VJ feed” from visualizer panels after the VJ source exists. + +Success criteria: Cymatics can be layered/mixed with video in VJ without heavy per-frame cross-frame copying. + +### G12. DJ playlist/setlist UX cleanup + +Goal: creating, saving, adding to, and managing DJ playlists should be obvious. + +Small tasks: +1. Add an always-visible active playlist strip in DJ. +2. Add quick actions: New playlist, Save current deck queue as playlist, Add selected library tracks to active playlist, Rename, Duplicate, Clear. +3. Add context actions from any track: Add to active DJ playlist, Add to new playlist, Send selected to DJ playlist. +4. Improve drag/drop into playlists. +5. Add autosave indicator and undo for accidental removals. + +Success criteria: building and saving a DJ set no longer requires hunting through hidden menus. + +### G13. Footer transport controls the active playheads + +Goal: footer play/pause/skip/progress controls whichever surface is currently live. + +Small tasks: +1. Extend `djMasterBus.ts` with commands: play/pause, previous/restart, next, seek active deck/set fraction, and report active deck/set progress. +2. In DJ mode, footer play/skip/progress uses the DJ bus, not global `playerStore`. +3. In EDIT mode, footer play/skip/progress uses the live mixer/editor bridge and updates `editorStore.playheadSec`. +4. In VJ mode, footer play/pause uses the VJ playback bus. +5. In normal mode, footer transport continues to use `playerStore`. + +Success criteria: the footer controls DJ decks/set, EDIT timeline, VJ playback, or normal library playback according to the active surface. + +### G14. More granular SUGGEST options + +Goal: make playlist suggestion more controllable without making the basic path intimidating. + +Small tasks: +1. Extend the backend request schema with optional controls: seed track, key/Camelot target, harmonic strictness, energy-curve intensity, min/max track duration, include/exclude tags, include/exclude genres, favor favorites, avoid recently played, play-count/popularity weight, discovery/randomness amount, max same-genre streak, and analyzed-only vs allow-unanalyzed. +2. Group frontend controls into Basic and Advanced sections. +3. Keep every result’s “why chosen” reason visible. +4. Add “Regenerate with more variety” and “Tighten criteria” actions. +5. Add tests for schema defaults so old callers keep working. + +Success criteria: SUGGEST can generate tighter, more intentional playlists without manual library digging. + +### G15. Accurate device-access errors + Quest video-in without MQDH + +User ask (2026-06-13, verbatim): "fixing this bullshit to state an accurate issue like 'give the browser access to your camera, or plug in a camera'" … "lump that in with getting the quest piping video in without MQDH (if possible)". + +Two related problems, one task: + +1. **Actionable camera/mic errors.** Today the VJ camera toggle surfaces the raw `getUserMedia` DOMException message (`GANTASMO-LIVE-VJ/src/useMedia.ts:85` sets `err.message`, echoed to `VJView.tsx:578` as `Camera error: …`), so the user sees `Permission denied` / `Requested device not found` instead of what to DO. Map `err.name` to plain instructions across every device consumer: + - `NotAllowedError` / `SecurityError` → "Give the browser access to your camera in the site permissions, then try again." + - `NotFoundError` / `OverconstrainedError` → "No camera found — plug one in (or pick a different device) and try again." + - `NotReadableError` → "The camera is in use by another app — close it and try again." + - Audit ALL consumers, not just VJ: `MicRecorder.tsx`, VJ `useAudioAnalyzer.ts` + `VideoOutput.tsx` (mic), and the controllervision capture path. Mic errors get the same treatment ("give the browser mic access / plug in a mic"). +2. **Quest video into the app without Meta Quest Developer Hub.** Research-first (feasibility uncertain). Goal: the Quest's passthrough/camera/headset view becomes a selectable VJ video source without requiring MQDH. Candidate paths to evaluate, FOSS-leaning: scrcpy (USB/Wi-Fi mirror → window → OBS virtual camera → shows up as a `getUserMedia` videoinput), WebRTC from a tiny page running in the Quest browser into the VJ app, or an OBS virtual-cam bridge. Tie-in: this is adjacent to the GANTASMO-MIDI Quest↔DAW bridge work (see memory `project_gantasmo_midi_unity`); reuse that transport if it fits. Output of the research step: a short note in this plan of the chosen path + its dependency footprint BEFORE building. + +Guard rail: the VJ app lives in a separate repo working tree — the error-message fix is a separate commit there (standing constraint §7). + +## 6.8. Phase H — active incident fix: stems sidecar timeout + MIDI charmap error + +User ask (2026-06-13): resolve the backend failure where library import returned 200, stems sidecar failed to write `backend_port.txt` within 300 seconds after missing `torch`/`torchcrepe`, `torchvision 0.27.0` required `torch==2.12.0` but `torch 2.11.0+cu128` was installed, and `basic_pitch` MIDI conversion failed on Windows with a `charmap` emoji encoding error. + +### H1. Stems sidecar dependency/probe hardening + +**Status (2026-06-13): core fix DONE.** `backend/modules/stems/sidecar.py` now probes ALL critical packages (`demucs`, `torch`, `torchaudio`, `torchcrepe`) in one subprocess via `_probe_packages()` (probe reports `packages` / `missing_critical` / `critical_ok`, keeps `demucs_importable` for back-compat). `ensure_running()` gates the pre-spawn install on `critical_ok` (not just demucs), re-probes after install to fail-fast with the missing list, and the 300s port-timeout error now names the dep state. Remaining (deferred, integration-package side): task 4 (pin torch/torchvision/torchcrepe as a compatible set), task 5 (disable run_backend.py's internal auto-install), task 7 ("Repair stems environment" Settings action). + +Observed failure: + +```text +background_workers: job stems: failed: stems sidecar didn't write backend_port.txt within 300.0s. +Missing 2 critical package(s): torch, torchcrepe +Auto-installing missing dependencies... +torchvision 0.27.0 requires torch==2.12.0, but you have torch 2.11.0+cu128 which is incompatible. +``` + +Likely root cause: `backend/modules/stems/sidecar.py` currently gates pre-spawn dependency repair mostly on whether `demucs` imports. If `demucs` imports but `torch`, `torchvision`, or `torchcrepe` are missing/broken/mismatched, `run_backend.py` starts and attempts its own auto-install, then can spend the entire 300 second readiness window resolving conflicts and never write `backend_port.txt`. + +Small tasks: +1. Strengthen `probe()` to check `demucs`, `torch`, `torchvision`, `torchcrepe`, and version compatibility in the dedicated sidecar venv. +2. Treat missing/broken critical packages as “deps not ready” even if `demucs` imports. +3. Run the controlled `install_dependencies()` path before spawn when critical deps are not ready. +4. Pin or filter the sidecar requirements so Torch/TorchVision/TorchCrepe resolve as a compatible set for the target CUDA/CPU mode. +5. If the integration package supports it, disable its internal auto-install and let theDAW own dependency repair. +6. Improve error diagnostics: exact package versions, import failures, install command used, return code, and `.sidecar_logs` paths. +7. Add a Settings/maintenance action or documented command: “Repair stems environment”. + +Success criteria: stems sidecar either starts successfully or fails fast with actionable package diagnostics before waiting 300 seconds. + +### H2. MIDI basic-pitch Windows `charmap` failure + +**Status (2026-06-13): DONE.** `backend/modules/midi/engine.py` `_run_basic_pitch()` now wraps `predict_and_save()` with `contextlib.redirect_stdout/redirect_stderr` into an `io.StringIO` (a text buffer never encodes, so the library's emoji output can't trigger `UnicodeEncodeError` on cp1252), logging the captured chatter at debug. Transcription failures now report the real cause, not an output-encoding crash. + +Observed failure: + +```text +midi.engine: basic_pitch conversion failed for 01 - Prologue.wav: 'charmap' codec can't encode character '\U0001f6a8' in position 2: character maps to +``` + +Likely root cause: on Windows, stdout/stderr may use a legacy code page such as CP1252. `basic_pitch` prints emoji/status characters; that output can crash when Python tries to encode it to the console/log stream. + +Small tasks: +1. In `backend/modules/midi/engine.py`, wrap `predict_and_save()` with stdout/stderr capture or redirection using UTF-8 with replacement. +2. Set subprocess/backend environment defaults where appropriate: `PYTHONUTF8=1` and `PYTHONIOENCODING=utf-8`. +3. Ensure the MIDI conversion result reports the actual transcription failure, not an output-encoding failure. +4. Add a Windows-safe smoke/regression test around `_run_basic_pitch()` logging behavior when available. + +Success criteria: MIDI conversion no longer fails merely because a dependency printed an emoji/status character on Windows. + +### H3. Recommended near-term order for G/H work + +1. Append/maintain this plan doc only. +2. Pull **H** first because it is an active backend failure. +3. Pull **G1** second because add-to-library reliability is the base of global routing. +4. Pull **G2–G4** next as one thin vertical slice: action registry + selection + send-here for a small set of surfaces. +5. Pull **G5–G6** as visible UX wins: EDIT lane fill and footer mic button. +6. Pull media/VJ work (**G9–G11**) after routing is stable. +7. Pull DJ playlist/footer transport (**G12–G13**) after the bus contracts are clear. +8. Pull SUGGEST granularity (**G14**) after active reliability issues are fixed. +9. Pull vocoder integration (**G8**) after mic routing is stable and the external folder has been audited. + +## 6.9. Phase I — stems & MIDI as first-class library items + +User ask (2026-06-13, verbatim): "wtf is the point of us having stems in our library if we cant directly do anything with em? we need to treat em just like all other audio, and we can listen, delete, favorite, etc etc … We should also be able to bring our midi (and stems) that are in our library into shit and do shit with it. even use it as init audio or chimera fodder." + +### I1. Stems & MIDI first-class (SHIPPED 2026-06-13) + +Before: the Library STEMS and MIDI sub-tabs (`SubTabList` in `LibraryView.tsx`) rendered rows with only a right-click "send" menu — no listen, no delete, no favorite. MIDI had no audio destinations at all. Backend had no per-stem/per-MIDI delete and no favorite column. + +Shipped in this slice: +- **Backend** — DB schema v5 adds a `favorite` column to `stems` and `midis`; `db.py` gains `get_stem`/`get_midi`/`set_stem_favorite`/`set_midi_favorite`/`delete_stem`/`delete_midi`. New endpoints: `PATCH`/`DELETE /api/library/stems/{id}` and `PATCH`/`DELETE /api/midi/file/{id}` (delete removes the file on disk + the row, leaves the parent track + siblings intact). +- **Frontend** — each stem/MIDI row now has an inline favorite star, play/pause (stems stream through the global engine; MIDI synthesizes first), and a delete button, plus the existing send menu extended so MIDI routes to editor/init/inpaint/chimera (synth-rendered). Favorite/delete refresh the index in place (no loading flicker) via `onMutated`. +- **Synth render** — extracted the piano-roll's offline sawtooth voice + WAV encoder into a shared, engine-shaped `frontend/src/lib/midiSynth.ts` (`renderNotesToBlob` / `renderStepNotesToBlob` / `renderMidiBufferToBlob`). PianoRoll now delegates to it, so previews, the SEND-TO-EDITOR bounce, and library MIDI all sound identical. `midiIdToSendable()` in `sendToTargets.ts` makes any library MIDI a lazy `SendableAudio`. + +Caveat carried forward: MIDI audio uses the built-in sawtooth synth (no soundfont). That is the only render path until I2. + +### I2. Soundfont / sample instruments + "create your own" (NEXT, not started) + +User direction (2026-06-13): "download a bunch of soundfonts or samples or w/e also, maybe make a 'create your own soundfont/midi/synth' or something since we got all this other crap." + +Goal: replace the sawtooth fallback with real instrument rendering and let the user build/manage their own instruments. `midiSynth.ts` is already engine-shaped so this slots in behind the same `renderNotesToBlob` surface. + +Small tasks (scope-confirm at green-light): +1. Pick the render engine. Candidates: a WASM FluidSynth (`js-synthesizer`) for offline GM-soundfont (.sf2/.sf3) rendering, or a sample-based player (`soundfont-player` / pre-rendered sample packs). Favor offline-capable + bundleable (no external folder dependency, per the repo's vendoring rule). +2. Bundle/download a default GM soundfont (FluidR3_GM or similar permissive license); add a backend `instruments` store + a Settings/Storage surface for downloading more, mirroring the existing model/checkpoint download UX. +3. Define an `Instrument` abstraction in `midiSynth.ts`: `{ id, name, kind: 'synth'|'soundfont'|'sample', render(notes, opts) }`. Default stays the sawtooth; soundfont/sample engines register alongside. +4. Per-MIDI instrument pick: the row/menu lets the user choose which instrument to preview/render with (drum-channel-aware: GM channel 10 → percussion map). +5. "Create your own" builder: a small UI to define a synth patch (osc type, filter, envelope) and/or assemble a sample/soundfont instrument from imported audio (reuse the library import + stem isolation we already have — "all this other crap"). Persist user instruments; round-trip through the same render surface. +6. Optional: bake instrument choice into the auto-MIDI metadata so re-renders are deterministic. + +Validation: live A/B on the rig — the user's ears decide whether soundfont output is good enough to replace the sawtooth as default; headless checks are insufficient. + ## 7. Standing constraints (apply to all phases) - Plan-before-patching: each phase gets a short proposal ping before code if scope shifts. @@ -187,6 +530,6 @@ Output of F1: a table in this plan (or a follow-up doc) with VERIFIED yes/no/nee ## 8. Resume notes -Read first on resume: this plan, then `frontend/src/state/surfaceLayoutStore.ts`, `frontend/src/components/surface/ControlSurface.tsx`, `backend/modules/vj/sidecar.py`, `backend/modules/library/db.py`, `GANTASMO-LIVE-VJ/src/App.tsx` (+ `VideoOutput.tsx`), `backend/server.py` jobs endpoints. +Read first on resume: this plan, then `frontend/src/state/surfaceLayoutStore.ts`, `frontend/src/components/surface/ControlSurface.tsx`, `backend/modules/vj/sidecar.py`, `backend/modules/library/db.py`, `GANTASMO-LIVE-VJ/src/App.tsx` (+ `VideoOutput.tsx`), `backend/server.py` jobs endpoints. For the 2026-06-13 routing/mic/media additions, also read `frontend/src/components/ui/ContextMenu.tsx`, `frontend/src/lib/sendToTargets.ts`, `frontend/src/components/audio/MicRecorder.tsx`, `frontend/src/components/audio/PlayerFooter.tsx`, `frontend/src/state/editorStore.ts`, `frontend/src/components/audio/WaveformEditor.tsx`, `frontend/src/state/setlistStore.ts`, `frontend/src/components/library/SuggestPlaylistModal.tsx`, `backend/modules/stems/sidecar.py`, and `backend/modules/midi/engine.py`. Open items elsewhere (not this plan): PR #19 awaiting review/visual sign-off; audit revision (judge rulings) still ON HOLD; pagefile decision pending; vertical showcase held; CRISPR punchlist open. diff --git a/docs/reports/feature-doc-coverage-report.md b/docs/reports/feature-doc-coverage-report.md index 9a7aa9a..bb18e62 100644 --- a/docs/reports/feature-doc-coverage-report.md +++ b/docs/reports/feature-doc-coverage-report.md @@ -1,7 +1,7 @@ # Feature Documentation Coverage Report > [!NOTE] -> Generated: 2026-06-14T02:43:45.356Z · Git revision: `9cdeed22cf1f` · Repomix tracked: **no** +> Generated: 2026-06-14T02:47:32.303Z · Git revision: `66e4238b81a2` · Repomix tracked: **no** ## Audit Dashboard diff --git a/docs/reports/feature-doc-coverage.json b/docs/reports/feature-doc-coverage.json index a67a350..573f98c 100644 --- a/docs/reports/feature-doc-coverage.json +++ b/docs/reports/feature-doc-coverage.json @@ -1,6 +1,6 @@ { - "generatedAt": "2026-06-14T02:43:45.356Z", - "repoRevision": "9cdeed22cf1f", + "generatedAt": "2026-06-14T02:47:32.303Z", + "repoRevision": "66e4238b81a2", "repomixContext": { "path": "repomix-output.md", "present": false, diff --git a/docs/screenshots/manifest.json b/docs/screenshots/manifest.json index 7ed7253..05f96c7 100644 --- a/docs/screenshots/manifest.json +++ b/docs/screenshots/manifest.json @@ -1,5 +1,5 @@ { - "generatedAt": "2026-06-14T02:46:37.642Z", + "generatedAt": "2026-06-14T02:52:18.096Z", "entries": [ { "file": "01-shell-make.png", diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index d060143..8c65947 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -14,6 +14,7 @@ import { logInfo, logWarn } from './state/logStore'; import { handletheDAWAction } from './orb-kit/actionHandlers'; import { useStatusBarStore } from './state/statusBarStore'; import { useLibraryStore } from './state/libraryStore'; +import { useModuleStore } from './state/moduleStore'; import { useLayoutPrefs } from './state/layoutPrefsStore'; import { triggerPianoNoteFromMidi } from './components/audio/PianoRoll'; import { publishMidi } from './state/midiBus'; @@ -77,6 +78,14 @@ export default function App() { else setTimeout(() => void useLibraryStore.getState().load(), 0); }, [isBackendReady]); + // Preload the backend module catalog the moment the backend is ready, so the + // Settings modal reads a cached list instead of fetching on open (which used + // to fail transiently during a (re)start and look like all modules vanished). + useEffect(() => { + if (!isBackendReady) return; + void useModuleStore.getState().load(); + }, [isBackendReady]); + useEffect(() => { logInfo('system', 'theDAW UI initialized'); }, []); diff --git a/frontend/src/components/layout/SettingsModal.tsx b/frontend/src/components/layout/SettingsModal.tsx index b475064..c14ee7a 100644 --- a/frontend/src/components/layout/SettingsModal.tsx +++ b/frontend/src/components/layout/SettingsModal.tsx @@ -12,21 +12,17 @@ import { SlideTrack } from '../audio/SlideTrack'; import { PathInput } from '../ui/PathInput'; // CHANGED: Suno cloud-generation API key section (surfaced in Settings). import { SunoKeySettings } from '../../suno/SunoKeySettings'; - -interface ModuleConfig { - name: string; - label?: string; - description?: string; - version?: string; - enabled: boolean; - api_prefix?: string; - _dir?: string; - _loaded?: boolean; -} +import { useModuleStore, type ModuleConfig } from '../../state/moduleStore'; export const SettingsModal: React.FC<{ open: boolean; onClose: () => void }> = ({ open, onClose }) => { - const [modules, setModules] = useState([]); - const [loading, setLoading] = useState(false); + // Modules come from the shared store, preloaded on backend-ready (App.tsx). + // The modal never fetches on open, so it can't show a false "no modules" + // during a backend (re)start — the list is already cached and warm. + const modules = useModuleStore((s) => s.modules); + const loading = useModuleStore((s) => s.loading && !s.loaded); + const moduleError = useModuleStore((s) => s.error); + const loadModules = useModuleStore((s) => s.load); + const setModuleEnabled = useModuleStore((s) => s.setEnabled); const [dirty, setDirty] = useState(false); const [changedModules, setChangedModules] = useState>(() => new Set()); const [toggling, setToggling] = useState(null); @@ -44,27 +40,20 @@ export const SettingsModal: React.FC<{ open: boolean; onClose: () => void }> = ( useEffect(() => { if (!open) return; - setLoading(true); setDirty(false); setChangedModules(new Set()); void refreshFeatures(); - fetch('/api/modules/all') - .then((r) => r.json() as Promise) - .then((data) => setModules(Array.isArray(data) ? data : [])) - .catch(() => setModules([])) - .finally(() => setLoading(false)); - }, [open, refreshFeatures]); + // The catalog is normally already warm (preloaded on backend-ready). This + // is a no-op when loaded; it only does work if the preload hasn't run yet + // or a previous attempt errored. + void loadModules(); + }, [open, refreshFeatures, loadModules]); const toggleModule = async (dirName: string, enabled: boolean) => { setToggling(dirName); try { - const res = await fetch(`/api/modules/${dirName}/enabled`, { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ enabled }), - }); - if (res.ok) { - setModules((prev) => prev.map((m) => (m._dir === dirName ? { ...m, enabled } : m))); + const ok = await setModuleEnabled(dirName, enabled); + if (ok) { setDirty(true); setChangedModules((prev) => new Set(prev).add(dirName)); } @@ -134,6 +123,18 @@ export const SettingsModal: React.FC<{ open: boolean; onClose: () => void }> = ( Loading modules...
+ ) : moduleError ? ( +
+ + Couldn't reach the backend ({moduleError}). + Modules are loaded from the server — this is a connection issue, not missing modules. + +
) : modules.length === 0 ? (
No modules found in backend/modules/
) : ( diff --git a/frontend/src/state/moduleStore.ts b/frontend/src/state/moduleStore.ts new file mode 100644 index 0000000..4f625ed --- /dev/null +++ b/frontend/src/state/moduleStore.ts @@ -0,0 +1,87 @@ +/** + * Backend module catalog store. + * + * The Settings modal used to fetch `/api/modules/all` every time it opened — + * which meant opening it during a backend (re)start hit a transient failure + * and rendered a misleading "No modules found", as if every module had + * vanished. This store fixes the root cause: the catalog loads ONCE when the + * backend becomes ready (preloaded from App), retries until it succeeds, and + * is cached. By the time the user opens Settings the list is already there, so + * there is nothing to fail. + */ +import { create } from 'zustand'; + +export interface ModuleConfig { + name: string; + label?: string; + description?: string; + version?: string; + enabled: boolean; + api_prefix?: string; + _dir?: string; + _loaded?: boolean; + [key: string]: unknown; +} + +interface ModuleStore { + modules: ModuleConfig[]; + loaded: boolean; + loading: boolean; + error: string | null; + /** Load (or reload) the catalog, retrying transient failures until success. */ + load: (opts?: { force?: boolean }) => Promise; + /** Toggle a module enabled flag (persists to its module.json on the backend). */ + setEnabled: (dirName: string, enabled: boolean) => Promise; +} + +const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + +export const useModuleStore = create((set, get) => ({ + modules: [], + loaded: false, + loading: false, + error: null, + + load: async (opts = {}) => { + const s = get(); + if (s.loading) return; + if (s.loaded && !opts.force) return; + set({ loading: true, error: null }); + // Retry with backoff: the backend may still be binding right after launch. + let delay = 500; + for (let attempt = 0; attempt < 6; attempt += 1) { + try { + const res = await fetch('/api/modules/all'); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const data = (await res.json()) as ModuleConfig[]; + if (!Array.isArray(data)) throw new Error('unexpected response shape'); + set({ modules: data, loaded: true, loading: false, error: null }); + return; + } catch (e) { + if (attempt === 5) { + set({ loading: false, error: e instanceof Error ? e.message : String(e) }); + return; + } + await sleep(delay); + delay = Math.min(delay * 2, 4000); + } + } + }, + + setEnabled: async (dirName, enabled) => { + try { + const res = await fetch(`/api/modules/${dirName}/enabled`, { + method: 'PATCH', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ enabled }), + }); + if (!res.ok) return false; + set((state) => ({ + modules: state.modules.map((m) => (m._dir === dirName ? { ...m, enabled } : m)), + })); + return true; + } catch { + return false; + } + }, +})); diff --git a/frontend/src/suno/SunoKeySettings.tsx b/frontend/src/suno/SunoKeySettings.tsx index 14e3516..3c8add5 100644 --- a/frontend/src/suno/SunoKeySettings.tsx +++ b/frontend/src/suno/SunoKeySettings.tsx @@ -99,24 +99,36 @@ export const SunoKeySettings: React.FC = () => { platform console. It's stored on the backend — never in the browser — and used for cloud generation.

-
+ {/* Wrapped in a so the password field has a containing form + (silences the Chrome "password field is not contained in a form" + warning) and Enter submits via the form, not an ad-hoc keydown. */} + { + e.preventDefault(); + void save(); + }} + > +
setVal(e.target.value)} - onKeyDown={(e) => { - if (e.key === 'Enter') void save(); - }} /> @@ -124,7 +136,7 @@ export const SunoKeySettings: React.FC = () => {
-
+ {err && {err}} diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index d2d3dea..4af29a4 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -58,14 +58,15 @@ export default defineConfig(({mode}) => { timeout: 0, proxyTimeout: 0, configure: (proxy) => { - proxy.on('error', (err, _req, res) => { - // Return a proper JSON error instead of silently swallowing. - // Without this, failed proxy requests hang indefinitely or - // fall through to Vite's SPA handler producing misleading - // "Not Found" or HTML responses instead of clear error JSON. - if (res && !res.headersSent) { - res.writeHead(502, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify({ + proxy.on('error', (_err, _req, res) => { + // For HTTP errors res is ServerResponse; for WebSocket errors it + // is a net.Socket (no writeHead). Guard before writing headers. + const r = res as Record; + if (typeof r['writeHead'] === 'function' && !r['headersSent']) { + (r['writeHead'] as (s: number, h: Record) => void)( + 502, { 'Content-Type': 'application/json' } + ); + (r['end'] as (b: string) => void)(JSON.stringify({ detail: 'Backend unreachable — is the server running on port 8600?', })); } From a301d43291f7ad95e73cdd59e87c50d811fb99a0 Mon Sep 17 00:00:00 2001 From: Daniel Joaquin Trujillo <54636507+danieljtrujillo@users.noreply.github.com> Date: Sat, 13 Jun 2026 20:19:11 -0700 Subject: [PATCH 3/3] fix(generate): reliably surface freshly-generated tracks in the library (G1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a generation completes the backend writes artifacts synchronously, but the library list index can lag that write by a beat — when it did, the entry wasn't in the just-refreshed list and the track silently failed to appear ('manual reload'). Now the post-generation reconciliation retries the refresh a few times until the expected _ id shows up, and on genuine failure surfaces a visible status message instead of only logging. Verified the rest of the add-to-library chain end-to-end: the /api/library/ import endpoint (200 + full entry), the frontend contract, importEntry's in-place store update, and the backend's _ id scheme all match. docs: record G15 research findings (Quest video-in without MQDH — scrcpy+OBS virtual cam Tier 1 needs only a VJ device picker; raw passthrough is Tier 2 via the Unity Passthrough Camera API). --- ...al-layout-vj-library-optimizations-plan.md | 7 +++++ docs/reports/feature-doc-coverage-report.md | 2 +- docs/reports/feature-doc-coverage.json | 4 +-- docs/screenshots/manifest.json | 2 +- frontend/src/state/generateStore.ts | 28 +++++++++++++++---- 5 files changed, 34 insertions(+), 9 deletions(-) diff --git a/docs/plans/2026-06-13-global-layout-vj-library-optimizations-plan.md b/docs/plans/2026-06-13-global-layout-vj-library-optimizations-plan.md index 37633de..24c74c7 100644 --- a/docs/plans/2026-06-13-global-layout-vj-library-optimizations-plan.md +++ b/docs/plans/2026-06-13-global-layout-vj-library-optimizations-plan.md @@ -425,6 +425,13 @@ Two related problems, one task: Guard rail: the VJ app lives in a separate repo working tree — the error-message fix is a separate commit there (standing constraint §7). +**Research findings (2026-06-13).** Quest video-in without MQDH is feasible; two tiers: + +- **Tier 1 — rendered headset view, zero app changes to the source (RECOMMENDED first).** The Quest is Android, so `scrcpy` (FOSS, ADB over USB or Wi-Fi, no MQDH) mirrors the in-headset rendered view to a desktop window. Pipe that window through OBS → **OBS Virtual Camera**, which then appears as a normal `videoinput` device. The VJ app already takes a camera via `getUserMedia`, so the only code gap is the camera source: `GANTASMO-LIVE-VJ/src/useMedia.ts:71` hardcodes `{ facingMode: 'environment' }` with NO device picker, so it grabs the default camera and the user can't choose the OBS virtual cam. **Concrete task: add a device picker** — `navigator.mediaDevices.enumerateDevices()` → list `videoinput`s → request `{ deviceId: { exact } }`. That single change unlocks scrcpy→OBS→VJ (and any other capture device) with no Quest-side dependency beyond scrcpy+OBS. +- **Tier 2 — raw passthrough, needs Unity work.** The Quest browser cannot access the passthrough/headset cameras (no web API). Raw passthrough requires the **Quest Passthrough Camera API** (Quest 3, recent Meta SDK) inside a native/Unity app, which then streams out over WebRTC/RTSP to the VJ app as a source. This ties into the existing `GANTASMO-MIDI` Unity app (memory `project_gantasmo_midi_unity`) — reuse its transport. Bigger lift; only pursue if Tier 1's rendered view isn't enough. + +Dependency footprint: Tier 1 = scrcpy + OBS (both FOSS, user-installed, no app deps) + a ~30-line device-picker change in the VJ app. Tier 2 = Unity Passthrough Camera API + a WebRTC/RTSP path. Recommend shipping Tier 1's device picker first. + ## 6.8. Phase H — active incident fix: stems sidecar timeout + MIDI charmap error User ask (2026-06-13): resolve the backend failure where library import returned 200, stems sidecar failed to write `backend_port.txt` within 300 seconds after missing `torch`/`torchcrepe`, `torchvision 0.27.0` required `torch==2.12.0` but `torch 2.11.0+cu128` was installed, and `basic_pitch` MIDI conversion failed on Windows with a `charmap` emoji encoding error. diff --git a/docs/reports/feature-doc-coverage-report.md b/docs/reports/feature-doc-coverage-report.md index bb18e62..872cc5e 100644 --- a/docs/reports/feature-doc-coverage-report.md +++ b/docs/reports/feature-doc-coverage-report.md @@ -1,7 +1,7 @@ # Feature Documentation Coverage Report > [!NOTE] -> Generated: 2026-06-14T02:47:32.303Z · Git revision: `66e4238b81a2` · Repomix tracked: **no** +> Generated: 2026-06-14T03:19:14.053Z · Git revision: `53c1d280eefa` · Repomix tracked: **no** ## Audit Dashboard diff --git a/docs/reports/feature-doc-coverage.json b/docs/reports/feature-doc-coverage.json index 573f98c..43e89b7 100644 --- a/docs/reports/feature-doc-coverage.json +++ b/docs/reports/feature-doc-coverage.json @@ -1,6 +1,6 @@ { - "generatedAt": "2026-06-14T02:47:32.303Z", - "repoRevision": "66e4238b81a2", + "generatedAt": "2026-06-14T03:19:14.053Z", + "repoRevision": "53c1d280eefa", "repomixContext": { "path": "repomix-output.md", "present": false, diff --git a/docs/screenshots/manifest.json b/docs/screenshots/manifest.json index 05f96c7..f5ff1af 100644 --- a/docs/screenshots/manifest.json +++ b/docs/screenshots/manifest.json @@ -1,5 +1,5 @@ { - "generatedAt": "2026-06-14T02:52:18.096Z", + "generatedAt": "2026-06-14T03:21:55.908Z", "entries": [ { "file": "01-shell-make.png", diff --git a/frontend/src/state/generateStore.ts b/frontend/src/state/generateStore.ts index 55bad02..75af6d0 100644 --- a/frontend/src/state/generateStore.ts +++ b/frontend/src/state/generateStore.ts @@ -712,10 +712,25 @@ export const useGenerateStore = create()((set, get) => ({ // Load the first new entry into the player so playback works // immediately. The blob comes from the backend streaming URL. - const after = useLibraryStore.getState().entries; - const firstEntry = items[0]?.audio_base64 - ? after.find((e) => e.id === `${jobId}_00`) ?? after.find((e) => e.id === jobId) - : null; + // + // The backend writes artifacts synchronously before reporting + // 'completed', but the library list index can lag that write by a + // beat — when it does, the entry isn't in the just-refreshed list + // and the track silently fails to appear ("manual reload"). Re- + // refresh a few times until the expected id shows up so generated + // tracks reliably land in the library. + const findFirst = () => { + const after = useLibraryStore.getState().entries; + return items[0]?.audio_base64 + ? after.find((e) => e.id === `${jobId}_00`) ?? after.find((e) => e.id === jobId) ?? null + : null; + }; + let firstEntry = findFirst(); + for (let attempt = 0; !firstEntry && items[0]?.audio_base64 && attempt < 5; attempt += 1) { + await wait(400); + await useLibraryStore.getState().refresh(); + firstEntry = findFirst(); + } if (firstEntry) { try { const loadT0 = performance.now(); @@ -735,7 +750,10 @@ export const useGenerateStore = create()((set, get) => ({ logError('generate', `Player load failed: ${msg}`); } } else { - logError('generate', `Could not find freshly-saved entry for job ${jobId}; library may need a manual reload.`); + // Retries exhausted: surface it instead of leaving the user to + // wonder why a track they just generated is missing. + logError('generate', `Could not find freshly-saved entry for job ${jobId} after retries — try reloading the library panel.`); + useStatusBarStore.getState().setText('Saved to disk, but the library list did not refresh — reload the Library panel.'); } useStatusBarStore.getState().setText('GENERATION COMPLETE');