diff --git a/.github/workflows/build-pr.yml b/.github/workflows/build-pr.yml index 7a8060e..9b4dc1f 100644 --- a/.github/workflows/build-pr.yml +++ b/.github/workflows/build-pr.yml @@ -99,6 +99,10 @@ jobs: NODE_ENV: development run: bun nuxt prepare + - name: Run frontend format check + working-directory: frontend + run: bun run format:check + - name: Run frontend linting working-directory: frontend run: bun run lint diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index de96f2d..9fa86b2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -113,6 +113,10 @@ jobs: NODE_ENV: development run: bun nuxt prepare + - name: Run frontend format check + working-directory: frontend + run: bun run format:check + - name: Run frontend linting working-directory: frontend run: bun run lint diff --git a/API.md b/API.md index 0b536f5..c9be535 100644 --- a/API.md +++ b/API.md @@ -25,6 +25,7 @@ This document describes the FBC Uploader REST API endpoints. All endpoints retur - [GET /api/tokens/{token\_value}/uploads](#get-apitokenstoken_valueuploads) - [GET /api/tokens/{download\_token}/uploads/{upload\_id}](#get-apitokensdownload_tokenuploadsupload_id) - [GET /api/tokens/{download\_token}/uploads/{upload\_id}/stream](#get-apitokensdownload_tokenuploadsupload_idstream) + - [GET /api/tokens/{download\_token}/uploads/{upload\_id}/preview.mp4](#get-apitokensdownload_tokenuploadsupload_idpreviewmp4) - [GET /api/tokens/{download\_token}/uploads/{upload\_id}/thumbnail](#get-apitokensdownload_tokenuploadsupload_idthumbnail) - [GET /api/tokens/{download\_token}/uploads/{upload\_id}/download](#get-apitokensdownload_tokenuploadsupload_iddownload) - [POST /api/uploads/initiate](#post-apiuploadsinitiate) @@ -35,6 +36,7 @@ This document describes the FBC Uploader REST API endpoints. All endpoints retur - [DELETE /api/uploads/{upload\_id}/cancel](#delete-apiuploadsupload_idcancel) - [POST /api/uploads/{upload\_id}/complete](#post-apiuploadsupload_idcomplete) - [GET /api/metadata/](#get-apimetadata) + - [POST /api/metadata/extract](#post-apimetadataextract) - [POST /api/metadata/validate](#post-apimetadatavalidate) - [GET /api/notice/](#get-apinotice) - [GET /api/admin/validate](#get-apiadminvalidate) @@ -327,6 +329,7 @@ Get public token information including uploads. "meta_data": {"title": "My Document"}, "upload_length": 1024000, "upload_offset": 1024000, + "recommended_chunk_bytes": 94371840, "status": "completed", "created_at": "2025-12-23T12:00:00Z", "completed_at": "2025-12-23T12:01:00Z", @@ -344,6 +347,7 @@ Get public token information including uploads. - `max_chunk_bytes`: Maximum chunk size for TUS uploads (from `FBC_MAX_CHUNK_BYTES`) - `allow_public_downloads`: Whether public downloads are enabled - `uploads`: Array of upload records +- `recommended_chunk_bytes`: Server-selected TUS chunk size to reuse for checksum-verified resume operations - `stream_url`: Inline media URL for browser playback when the upload is completed - `thumbnail_url`: Preview image URL for embeds and thumbnail-first media UIs @@ -378,6 +382,7 @@ List all uploads for a specific token. "meta_data": {"title": "My Document"}, "upload_length": 1024000, "upload_offset": 1024000, + "recommended_chunk_bytes": 94371840, "status": "completed", "created_at": "2025-12-23T12:00:00Z", "completed_at": "2025-12-23T12:01:00Z", @@ -417,6 +422,7 @@ Get metadata information about a completed upload. }, "upload_length": 1024000, "upload_offset": 1024000, + "recommended_chunk_bytes": 94371840, "status": "completed", "created_at": "2025-01-01T12:00:00Z", "completed_at": "2025-01-01T12:05:00Z", @@ -435,6 +441,8 @@ Get metadata information about a completed upload. ### GET /api/tokens/{download_token}/uploads/{upload_id}/stream +`HEAD` is also supported. + Stream a completed file inline for browser playback. **Authentication:** Required (Admin, or public if `FBC_ALLOW_PUBLIC_DOWNLOADS=1`) @@ -459,8 +467,110 @@ Returns the file with headers: --- +### GET /api/tokens/{download_token}/uploads/{upload_id}/subtitles + +List external subtitle tracks that match a completed upload filename. + +**Authentication:** Required (Admin, or public if `FBC_ALLOW_PUBLIC_DOWNLOADS=1`) + +**Path Parameters:** +- `download_token` (string): The download token +- `upload_id` (integer): The upload record ID + +**Response (200):** +```json +{ + "subtitles": [ + { + "source_format": "vtt", + "delivery_format": "vtt", + "renderer": "native", + "url": "http://localhost:8000/api/tokens/fbc_token/uploads/rT72ZKGMPdldiEmA9eDI7kik/subtitles/vtt" + }, + { + "source_format": "ass", + "delivery_format": "ass", + "renderer": "assjs", + "url": "http://localhost:8000/api/tokens/fbc_token/uploads/rT72ZKGMPdldiEmA9eDI7kik/subtitles/ass" + } + ] +} +``` + +**Notes:** +- This endpoint only returns tracks discovered under `FBC_SUBTITLE_PATH`. +- Discovery results are cached per upload for `FBC_SUBTITLE_CACHE_TTL_SECONDS`, including cases where no subtitles are found. +- Results are ordered by renderer preference: `.vtt`, then `.srt`, then `.ass`. +- `.srt` files are exposed here with `source_format: "srt"`, `delivery_format: "vtt"`, and `renderer: "native"`. +- Duplicate matches for the same stem and extension are treated as ambiguous and omitted. + +**Error Responses:** +- `404 Not Found` - Download token or upload not found +- `409 Conflict` - Upload not yet completed + +--- + +### GET /api/tokens/{download_token}/uploads/{upload_id}/subtitles/{source_format} + +`HEAD` is also supported. + +Return the selected subtitle content for a completed upload. + +**Authentication:** Required (Admin, or public if `FBC_ALLOW_PUBLIC_DOWNLOADS=1`) + +**Path Parameters:** +- `download_token` (string): The download token +- `upload_id` (integer): The upload record ID +- `source_format` (string): One of `vtt`, `srt`, or `ass` + +**Response (200):** +- `vtt`: `Content-Type: text/vtt` +- `srt`: Converted on the fly and returned as `Content-Type: text/vtt` +- `ass`: Returned as `Content-Type: text/x-ssa` + +**Error Responses:** +- `404 Not Found` - Download token, upload, or subtitle not found +- `409 Conflict` - Upload not yet completed + +**Notes:** +- The share page uses `vtt` and converted `srt` as native browser subtitle tracks. +- `.ass` subtitles are intended for ASS.js-based browser rendering. +- Subtitle discovery is cached per upload, so newly added or removed subtitle files may take up to `FBC_SUBTITLE_CACHE_TTL_SECONDS` to appear. + +--- + +### GET /api/tokens/{download_token}/uploads/{upload_id}/preview.mp4 + +`HEAD` is also supported. + +Return a short MP4 preview clip for bot embeds when one has been generated. + +**Authentication:** Required (Admin, or public if `FBC_ALLOW_PUBLIC_DOWNLOADS=1`) + +**Path Parameters:** +- `download_token` (string): The download token +- `upload_id` (integer): The upload record ID + +**Response (200):** +Returns the file with headers: +- `Content-Type`: `video/mp4` +- `Content-Disposition`: `inline; filename="..."` + +**Error Responses:** +- `404 Not Found` - Download token, upload, or preview not found +- `409 Conflict` - Upload not yet completed + +**Notes:** +- Bot embed metadata may use this endpoint instead of the full stream for large videos that meet the preview size threshold. +- Setting `FBC_EMBED_PREVIEW_MIN_SIZE_BYTES=0` disables preview sidecars and keeps embed metadata on the original `/stream` URL. +- If no preview sidecar exists, embed metadata falls back to the original `/stream` URL. + +--- + ### GET /api/tokens/{download_token}/uploads/{upload_id}/thumbnail +`HEAD` is also supported. + Return a preview image for a completed upload. **Authentication:** Required (Admin, or public if `FBC_ALLOW_PUBLIC_DOWNLOADS=1`) @@ -480,13 +590,14 @@ Returns an image with headers: **Notes:** - Video uploads return a generated sidecar thumbnail when available -- Uploads without a generated sidecar return the shared fallback image from the exported frontend assets - The same endpoint is used by social embeds and the thumbnail-first share page UI --- ### GET /api/tokens/{download_token}/uploads/{upload_id}/download +`HEAD` is also supported. + Download a completed file. **Authentication:** Required (Admin, or public if `FBC_ALLOW_PUBLIC_DOWNLOADS=1`) @@ -546,10 +657,13 @@ Initiate a new file upload. "category": "reports" }, "allowed_mime": ["application/pdf"], - "remaining_uploads": 0 + "remaining_uploads": 0, + "recommended_chunk_bytes": 1024000 } ``` +- `recommended_chunk_bytes`: Server-selected TUS chunk size for this upload. Clients should use this value when sending checksum-verified PATCH requests and when resuming the upload. + **Error Responses:** - `404 Not Found` - Token does not exist - `403 Forbidden` - Token expired, disabled, or upload limit reached @@ -777,9 +891,45 @@ Each field object can have: - `min`, `max` (number, optional): Numeric value constraints - `regex` (string, optional): Regular expression pattern for string validation - `default` (any, optional): Default value if not provided +- `extract_regex` (string, optional): Python regular expression used by `/api/metadata/extract` to prefill metadata from a filename **Notes:** - Schema is loaded from `{config_path}/metadata.json` +- `extract_regex` is interpreted with Python `re` syntax + +--- + +### POST /api/metadata/extract + +Extract metadata values from a filename using the configured schema. + +**Authentication:** None + +**Request Body:** +```json +{ + "filename": "240101 Example Show [youtube-dQw4w9WgXcQ].mp4" +} +``` + +**Response (200):** +```json +{ + "metadata": { + "broadcast_date": "2024-01-01", + "title": "Example Show", + "source": "youtube", + "source_id": "dQw4w9WgXcQ" + } +} +``` + +**Extraction Rules:** +1. Fields without `extract_regex` are ignored +2. The regex is matched against the provided filename with case-insensitive search +3. Capture group 1 is used when present; otherwise the full match is used +4. For `date` fields, named groups `year`, `month`, and `day` are combined into `YYYY-MM-DD` +5. Two-digit years are normalized to `20xx` --- @@ -982,8 +1132,6 @@ Typical upload flow: Authorization: Bearer YOUR_API_KEY ``` -After upload data is received, multimedia files enter background post-processing before they become `completed`. Browser-safe `video/mp4` and `video/webm` files are kept as-is. Compatible non-MP4 video containers may be copy-remuxed into MP4 without transcoding, which updates the stored filename, extension, MIME type, size, and ffprobe metadata to match the final file. Files larger than `FBC_MAX_REMUX_BYTES` skip remux and still complete normally. - --- ## Error Codes Reference diff --git a/Dockerfile b/Dockerfile index 53c870f..999eb54 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,11 +45,15 @@ ENV PYDEVD_DISABLE_FILE_VALIDATION=1 ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONFAULTHANDLER=1 ENV FBC_DEV_MODE=0 +ENV PYTHONUTF8=1 ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ - apt-get install -y --no-install-recommends libmagic1 && \ + apt-get install -y --no-install-recommends locales libmagic1 && \ + sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \ + dpkg-reconfigure --frontend=noninteractive locales && \ + update-locale LANG=en_US.UTF-8 && \ rm -rf /var/lib/apt/lists/* RUN mkdir /config /downloads && ln -snf /usr/share/zoneinfo/${TZ} /etc/localtime && echo ${TZ} > /etc/timezone && \ @@ -68,6 +72,9 @@ COPY ./backend/bin/fbc /usr/bin/fbc RUN chmod +x /usr/bin/fbc ENV PATH="/opt/python/bin:$PATH" +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US:en +ENV LC_ALL=en_US.UTF-8 VOLUME /config VOLUME /downloads diff --git a/README.md b/README.md index f813af1..0f9f407 100644 --- a/README.md +++ b/README.md @@ -55,33 +55,39 @@ Then you can access the WebUI at `http://localhost:8000`. All configuration is done via environment variables prefixed with `FBC_`: -| Variable | Default | Description | -| ----------------------------------- | ---------------- | --------------------------------------------------------------------------------- | -| `FBC_CONFIG_PATH` | `./data/config` | Configuration directory | -| `FBC_STORAGE_PATH` | `./data/uploads` | Directory for uploaded files | -| `FBC_ADMIN_API_KEY` | Auto-generated | Admin API key (stored in `{config_path}/secret.key` if not set) | -| `FBC_DEFAULT_TOKEN_TTL_HOURS` | `24` | Default token expiration in hours (1-720) | -| `FBC_CLEANUP_INTERVAL_SECONDS` | `3600` | Interval between cleanup job runs | -| `FBC_INCOMPLETE_TTL_HOURS` | `24` | Time-to-live for incomplete uploads (0 to disable) | -| `FBC_DISABLED_TOKENS_TTL_DAYS` | `30` | Days to keep disabled tokens before deletion (0 to disable) | -| `FBC_DELETE_FILES_ON_TOKEN_CLEANUP` | `true` | Delete associated files when cleaning up disabled tokens | -| `FBC_MAX_CHUNK_BYTES` | `94371840` | Maximum TUS chunk size. Default to (90MB) | -| `FBC_MAX_REMUX_BYTES` | `5368709120` | Maximum file size eligible for copy-remux to MP4 during post-processing (5GB) | -| `FBC_POSTPROCESSING_WORKERS` | `4` | Number of uploads processed concurrently in the background post-processing queue | -| `FBC_ALLOW_PUBLIC_DOWNLOADS` | `false` | Allow public downloads without authentication | -| `FBC_TRUST_PROXY_HEADERS` | `false` | Trust `X-Forwarded-*` headers, but only from proxies in `FBC_FORWARDED_ALLOW_IPS` | -| `FBC_FORWARDED_ALLOW_IPS` | `127.0.0.1,::1` | Comma-separated trusted proxy IPs or CIDRs allowed to supply forwarded headers | +| Variable | Default | Description | +| ----------------------------------- | ---------------- | ------------------------------------------------------------------------------------------------------------ | +| `FBC_CONFIG_PATH` | `./data/config` | Configuration directory | +| `FBC_STORAGE_PATH` | `./data/uploads` | Directory for uploaded files | +| `FBC_SUBTITLE_PATH` | unset | Optional external subtitle directory scanned recursively for matching `.vtt`, `.srt`, and `.ass` files | +| `FBC_SUBTITLE_CACHE_TTL_SECONDS` | `600` | Cache subtitle lookup results per upload for this many seconds, including misses; set `0` to disable | +| `FBC_ADMIN_API_KEY` | Auto-generated | Admin API key (stored in `{config_path}/secret.key` if not set) | +| `FBC_DEFAULT_TOKEN_TTL_HOURS` | `24` | Default token expiration in hours (1-720) | +| `FBC_CLEANUP_INTERVAL_SECONDS` | `3600` | Interval between cleanup job runs | +| `FBC_INCOMPLETE_TTL_HOURS` | `24` | Time-to-live for incomplete uploads (0 to disable) | +| `FBC_DISABLED_TOKENS_TTL_DAYS` | `30` | Days to keep disabled tokens before deletion (0 to disable) | +| `FBC_DELETE_FILES_ON_TOKEN_CLEANUP` | `true` | Delete associated files when cleaning up disabled tokens | +| `FBC_MAX_CHUNK_BYTES` | `94371840` | Maximum TUS chunk size. Default to (90MB) | +| `FBC_MAX_REMUX_BYTES` | `5368709120` | Maximum file size eligible for copy-remux to MP4 during post-processing (5GB) | +| `FBC_POSTPROCESSING_WORKERS` | `4` | Number of uploads processed concurrently in the background post-processing queue | +| `FBC_EMBED_PREVIEW_CLIP_SECONDS` | `10` | Length of generated bot preview clips in seconds (0 disables preview generation) | +| `FBC_EMBED_PREVIEW_MIN_SIZE_BYTES` | `204472320` | Only generate bot preview clips for videos at or above this size in bytes (195 MB); `0` disables the feature | +| `FBC_ALLOW_PUBLIC_DOWNLOADS` | `false` | Allow public downloads without authentication | +| `FBC_TRUST_PROXY_HEADERS` | `false` | Trust `X-Forwarded-*` headers, but only from proxies in `FBC_FORWARDED_ALLOW_IPS` | +| `FBC_FORWARDED_ALLOW_IPS` | `127.0.0.1,::1` | Comma-separated trusted proxy IPs or CIDRs allowed to supply forwarded headers | When running behind a reverse proxy, `FBC_TRUST_PROXY_HEADERS=true` is not enough on its own. You must also set `FBC_FORWARDED_ALLOW_IPS` to the proxy IPs or networks that connect directly to FBC Uploader, such as a Docker bridge subnet like `172.23.0.0/16`. -If you leave `FBC_FORWARDED_ALLOW_IPS` at its default, only local loopback proxies are trusted. This protects against clients forging `X-Forwarded-For`, `X-Forwarded-Proto`, or `X-Forwarded-Host` when the app is exposed directly. +If you leave `FBC_FORWARDED_ALLOW_IPS` at its default, only local loopback proxies are trusted. -Uploaded multimedia files are post-processed after upload completion. Browser-safe `mp4` and `webm` files are kept as-is. Compatible non-MP4 video containers may be copy-remuxed into `mp4` for better playback compatibility without transcoding. Files larger than `FBC_MAX_REMUX_BYTES` skip remux and still complete normally. The background worker pool processes up to `FBC_POSTPROCESSING_WORKERS` uploads concurrently. +## External Subtitles + +Set `FBC_SUBTITLE_PATH` to an existing directory to enable subtitle discovery on the `/f/{token}` share page. ## Dynamic Metadata Schema Upload metadata is configurable via `{config_path}/metadata.json`. Define custom fields with validation rules, types, and UI hints. -The schema is validated on both client and server. See [metadata.md](metadata.md) for full documentation. +The schema is validated on server. See [metadata.md](metadata.md) for full documentation. ## yt-dlp Extractor diff --git a/backend/app/config.py b/backend/app/config.py index b4ab840..fee516a 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -12,6 +12,8 @@ class Settings(BaseSettings): database_url: str | None = Field(None, validation_alias="FBC_DATABASE_URL") admin_api_key: str = Field("change-me", validation_alias="FBC_ADMIN_API_KEY") storage_path: str = Field("./data/uploads", validation_alias="FBC_STORAGE_PATH") + subtitle_path: str | None = Field(None, validation_alias="FBC_SUBTITLE_PATH") + subtitle_cache_ttl_seconds: int = Field(600, ge=0, validation_alias="FBC_SUBTITLE_CACHE_TTL_SECONDS") frontend_export_path: str = Field("./frontend/exported", validation_alias="FBC_FRONTEND_EXPORT_PATH") public_base_url: str | None = Field(default=None, validation_alias="FBC_PUBLIC_BASE_URL") default_token_ttl_hours: int = Field(24, ge=1, le=24 * 30) @@ -25,6 +27,8 @@ class Settings(BaseSettings): max_chunk_bytes: int = Field(90 * 1024 * 1024, validation_alias="FBC_MAX_CHUNK_BYTES") max_remux_bytes: int = Field(5 * 1024 * 1024 * 1024, validation_alias="FBC_MAX_REMUX_BYTES") postprocessing_workers: int = Field(4, ge=1, validation_alias="FBC_POSTPROCESSING_WORKERS") + embed_preview_clip_seconds: int = Field(180, ge=0, le=600, validation_alias="FBC_EMBED_PREVIEW_CLIP_SECONDS") + embed_preview_min_size_bytes: int = Field(195 * 1024 * 1024, ge=0, validation_alias="FBC_EMBED_PREVIEW_MIN_SIZE_BYTES") allow_public_downloads: bool = Field(False, validation_alias="FBC_ALLOW_PUBLIC_DOWNLOADS") trust_proxy_headers: bool = Field(False, validation_alias="FBC_TRUST_PROXY_HEADERS") forwarded_allow_ips: str = Field("127.0.0.1,::1", validation_alias="FBC_FORWARDED_ALLOW_IPS") @@ -35,6 +39,13 @@ def model_post_init(self, _) -> None: cfg_dir: Path = Path(self.config_path).expanduser().resolve() cfg_dir.mkdir(parents=True, exist_ok=True) + if self.subtitle_path: + subtitle_dir = Path(self.subtitle_path).expanduser().resolve() + if not subtitle_dir.exists() or not subtitle_dir.is_dir(): + msg = "FBC_SUBTITLE_PATH must point to an existing directory" + raise ValueError(msg) + self.subtitle_path = str(subtitle_dir) + if not self.database_url: default_db_path: Path = cfg_dir / "fbc.db" self.database_url = f"sqlite+aiosqlite:///{default_db_path!s}" diff --git a/backend/app/db.py b/backend/app/db.py index 8deeb2f..003add6 100644 --- a/backend/app/db.py +++ b/backend/app/db.py @@ -7,14 +7,19 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine from sqlalchemy.ext.asyncio.engine import AsyncEngine from sqlalchemy.orm import declarative_base +from sqlalchemy.pool import StaticPool from .config import settings url: URL = make_url(settings.database_url) -if url.drivername.startswith("sqlite") and url.database: - Path(url.database).expanduser().parent.mkdir(parents=True, exist_ok=True) - -engine: AsyncEngine = create_async_engine(settings.database_url, future=True) +engine_kwargs: dict[str, Any] = {"future": True} +if url.drivername.startswith("sqlite"): + if url.database and url.database != ":memory:": + Path(url.database).expanduser().parent.mkdir(parents=True, exist_ok=True) + else: + engine_kwargs["poolclass"] = StaticPool + +engine: AsyncEngine = create_async_engine(settings.database_url, **engine_kwargs) SessionLocal: async_sessionmaker[AsyncSession] = async_sessionmaker(bind=engine, expire_on_commit=False, class_=AsyncSession) Base: Any = declarative_base() diff --git a/backend/app/embed_preview.py b/backend/app/embed_preview.py index cd325e6..6c6a5c3 100644 --- a/backend/app/embed_preview.py +++ b/backend/app/embed_preview.py @@ -6,6 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from backend.app import models, utils +from backend.app.config import settings templates = Jinja2Templates(directory=str(Path(__file__).resolve().parent / "templates")) @@ -48,6 +49,33 @@ async def render_embed_preview(request: Request, db: AsyncSession, token_row: mo mime_type = first_media.mimetype or "application/octet-stream" is_video = mime_type.startswith("video/") is_audio = mime_type.startswith("audio/") + is_directly_embeddable = utils.is_directly_embeddable_video(first_media.mimetype, ffprobe_data) if is_video else False + media_url = str(request.url_for("stream_file", download_token=token_row.download_token, upload_id=first_media.public_id)) + preview_url = None + used_generated_preview = False + allow_direct_video_embed = True + if is_video and settings.embed_preview_clip_seconds > 0 and settings.embed_preview_min_size_bytes > 0: + candidate_preview_url = str( + request.url_for("get_file_preview", download_token=token_row.download_token, upload_id=first_media.public_id) + ) + preview_path = utils.get_preview_path(first_media.storage_path or "") if first_media.storage_path else None + should_use_preview = not is_directly_embeddable or utils.should_generate_video_preview( + first_media.size_bytes, + min_size_bytes=settings.embed_preview_min_size_bytes, + ) + if should_use_preview and preview_path and preview_path.is_file() and preview_path.stat().st_size > 0: + preview_url = candidate_preview_url + + embed_media_url = preview_url if preview_url and not user else media_url + if preview_url and embed_media_url == preview_url: + used_generated_preview = True + mime_type = utils.PREVIEW_MEDIA_TYPE + elif is_video and not user and not is_directly_embeddable: + allow_direct_video_embed = False + + description: str = f"{len(uploads)} file(s) shared" if len(uploads) > 1 else "Shared file" + if used_generated_preview: + description: str = "A video preview. Click to watch the full-length video." return templates.TemplateResponse( request=request, @@ -55,16 +83,18 @@ async def render_embed_preview(request: Request, db: AsyncSession, token_row: mo context={ "request": request, "title": first_media.filename or "Shared Media", - "description": f"{len(uploads)} file(s) shared" if len(uploads) > 1 else "Shared file", + "description": description, + "uses_preview_clip": used_generated_preview, "og_type": "video.other" if is_video else "music.song", "share_url": str(request.url_for("share_page", token=token_row.download_token)), - "media_url": str(request.url_for("stream_file", download_token=token_row.download_token, upload_id=first_media.public_id)), + "media_url": media_url, + "embed_media_url": embed_media_url, "download_url": str(request.url_for("download_file", download_token=token_row.download_token, upload_id=first_media.public_id)), "thumbnail_url": str( request.url_for("get_file_thumbnail", download_token=token_row.download_token, upload_id=first_media.public_id) ), "mime_type": mime_type, - "is_video": is_video, + "is_video": is_video and (user or allow_direct_video_embed), "is_audio": is_audio, "width": video_metadata.get("width"), "height": video_metadata.get("height"), diff --git a/backend/app/main.py b/backend/app/main.py index c91ecb6..9380532 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -48,9 +48,9 @@ async def lifespan(app: FastAPI): queue = ProcessingQueue() queue.start_worker() app.state.processing_queue = queue - app.state.thumbnail_backfill_task = asyncio.create_task( + app.state.media_sidecar_backfill_task = asyncio.create_task( backfill_missing_video_thumbnails(), - name="thumbnail_backfill", + name="media_sidecar_backfill", ) if not settings.skip_cleanup: @@ -58,12 +58,12 @@ async def lifespan(app: FastAPI): yield - thumbnail_backfill_task: asyncio.Task | None = getattr(app.state, "thumbnail_backfill_task", None) - if thumbnail_backfill_task: - if not thumbnail_backfill_task.done(): - thumbnail_backfill_task.cancel() + media_sidecar_backfill_task: asyncio.Task | None = getattr(app.state, "media_sidecar_backfill_task", None) + if media_sidecar_backfill_task: + if not media_sidecar_backfill_task.done(): + media_sidecar_backfill_task.cancel() with suppress(asyncio.CancelledError, Exception): - await thumbnail_backfill_task + await media_sidecar_backfill_task await queue.stop_worker() diff --git a/backend/app/metadata_schema.py b/backend/app/metadata_schema.py index 0823fd4..0286edb 100644 --- a/backend/app/metadata_schema.py +++ b/backend/app/metadata_schema.py @@ -1,4 +1,5 @@ import json +import re from datetime import date, datetime from pathlib import Path from typing import Any @@ -177,3 +178,41 @@ def validate_metadata(values: dict[str, Any]) -> dict[str, Any]: cleaned[key] = val.isoformat() if isinstance(val, (datetime, date)) else val return cleaned + + +def extract_metadata_from_filename(filename: str) -> dict[str, Any]: + """ + Extract metadata values from a filename using the configured schema. + + Args: + filename (str): Filename to inspect. + + Returns: + dict[str, Any]: Extracted metadata values. + + """ + schema: list[dict] = load_schema() + extracted: dict[str, Any] = {} + + for field in schema: + regex: str | None = field.get("extract_regex") + if not regex: + continue + + match = re.search(regex, filename, flags=re.IGNORECASE) + if not match: + continue + + value: Any = match.group(1) if match.lastindex else match.group(0) + if field.get("type") == "date": + groups = match.groupdict() + year = groups.get("year") + month = groups.get("month") + day = groups.get("day") + if year and month and day: + full_year = f"20{year}" if len(year) == 2 else year + value = f"{full_year}-{month}-{day}" + + extracted[field["key"]] = value + + return extracted diff --git a/backend/app/postprocessing.py b/backend/app/postprocessing.py index 5fff74a..f57735c 100644 --- a/backend/app/postprocessing.py +++ b/backend/app/postprocessing.py @@ -24,10 +24,13 @@ from .utils import ( detect_mimetype, ensure_faststart_mp4, + ensure_video_preview, ensure_video_thumbnail, extract_ffprobe_metadata, get_mp4_remux_skip_reason, + is_directly_embeddable_video, is_multimedia, + preview_exists, remux_to_mp4, should_remux_to_mp4, thumbnail_exists, @@ -36,6 +39,36 @@ logger = logging.getLogger(__name__) +def _get_upload_directory_name(storage_path: str | None) -> str: + if not storage_path: + return "unknown" + + try: + return Path(storage_path).parent.name or "unknown" + except Exception: + return "unknown" + + +def _log_with_upload_context( + level: int, + message: str, + record: models.UploadRecord | SimpleNamespace, + *args: object, + exc_info: bool = False, +) -> None: + if args: + message = message % args + + logger.log( + level, + "%s [upload=%s dir=%s]", + message, + record.public_id, + _get_upload_directory_name(getattr(record, "storage_path", None)), + exc_info=exc_info, + ) + + def _build_remuxed_filename(filename: str | None) -> str | None: if not filename: return None @@ -57,14 +90,15 @@ async def _apply_media_normalization(record: models.UploadRecord, path: Path) -> if should_remux_to_mp4(record.mimetype, ffprobe_data): source_size = path.stat().st_size if source_size > settings.max_remux_bytes: - logger.info( - "Skipping remux for upload %s because %s bytes exceeds remux limit %s", - record.public_id, + _log_with_upload_context( + logging.INFO, + "Skipping remux because %s bytes exceeds remux limit %s", + record, source_size, settings.max_remux_bytes, ) else: - logger.info("Remuxing upload %s into MP4 container", record.public_id) + _log_with_upload_context(logging.INFO, "Remuxing upload into MP4 container", record) path = await remux_to_mp4(path) record.storage_path = str(path) record.filename = _build_remuxed_filename(record.filename) @@ -72,10 +106,12 @@ async def _apply_media_normalization(record: models.UploadRecord, path: Path) -> record.mimetype = detect_mimetype(path) record.size_bytes = path.stat().st_size ffprobe_data = await extract_ffprobe_metadata(path) + _log_with_upload_context(logging.INFO, "Remuxed upload into MP4 container", record) elif record.mimetype and record.mimetype.startswith("video/") and record.mimetype != "video/mp4": - logger.info( - "Skipping MP4 remux for upload %s because %s", - record.public_id, + _log_with_upload_context( + logging.INFO, + "Skipping MP4 remux because %s", + record, get_mp4_remux_skip_reason(record.mimetype, ffprobe_data), ) @@ -83,11 +119,14 @@ async def _apply_media_normalization(record: models.UploadRecord, path: Path) -> try: modified = await ensure_faststart_mp4(path, record.mimetype) if modified: - logger.info("Applied faststart to upload %s", record.public_id) + _log_with_upload_context(logging.INFO, "Applied faststart to upload", record) + else: + _log_with_upload_context(logging.INFO, "Faststart already satisfied or not needed", record) except Exception: - logger.exception("Failed to apply faststart to upload %s", record.public_id) + _log_with_upload_context(logging.ERROR, "Failed to apply faststart to upload", record, exc_info=True) await _ensure_thumbnail(record, path) + await _ensure_preview(record, path, ffprobe_data, force_generation=not is_directly_embeddable_video(record.mimetype, ffprobe_data)) return path, ffprobe_data @@ -117,17 +156,59 @@ async def _ensure_thumbnail(record: models.UploadRecord | SimpleNamespace, path: try: thumbnail_path = await ensure_video_thumbnail(path) except Exception: - logger.exception("Failed to generate thumbnail for upload %s", record.public_id) + _log_with_upload_context(logging.ERROR, "Failed to generate thumbnail", record, exc_info=True) return if thumbnail_path is not None: - logger.info("Thumbnail ready for upload %s", record.public_id) + _log_with_upload_context(logging.INFO, "Thumbnail ready", record) + else: + _log_with_upload_context(logging.INFO, "Thumbnail was not generated", record) + + +async def _ensure_preview( + record: models.UploadRecord | SimpleNamespace, + path: Path, + ffprobe_data: dict | None, + *, + force_generation: bool = False, +) -> None: + mimetype = getattr(record, "mimetype", None) + if not mimetype or not mimetype.startswith("video/"): + return + + try: + preview_path = await ensure_video_preview( + path, + ffprobe_data=ffprobe_data, + clip_seconds=settings.embed_preview_clip_seconds, + min_size_bytes=settings.embed_preview_min_size_bytes, + ignore_size_threshold=force_generation, + ) + except Exception: + _log_with_upload_context(logging.ERROR, "Failed to generate embed preview", record, exc_info=True) + return + + if preview_path is not None: + if force_generation: + _log_with_upload_context(logging.INFO, "Embed preview ready for incompatible video", record) + else: + _log_with_upload_context(logging.INFO, "Embed preview ready", record) + return + + if settings.embed_preview_clip_seconds < 1: + _log_with_upload_context(logging.INFO, "Skipping embed preview because preview generation is disabled", record) + elif settings.embed_preview_min_size_bytes <= 0: + _log_with_upload_context(logging.INFO, "Skipping embed preview because preview size threshold disables previews", record) + elif force_generation: + _log_with_upload_context(logging.WARNING, "Failed to produce forced embed preview for incompatible video", record) + else: + _log_with_upload_context(logging.INFO, "Skipping embed preview because upload is below preview size threshold", record) async def _mark_upload_failed(upload_id: str, error_message: str) -> bool: async with SessionLocal() as session: if not (record := await _get_upload_record(session, upload_id)): - logger.warning("Upload %s not found for processing", upload_id) + logger.warning("Upload %s not found for processing [dir=unknown]", upload_id) return False record.status = "failed" @@ -136,13 +217,14 @@ async def _mark_upload_failed(upload_id: str, error_message: str) -> bool: record.meta_data["error"] = error_message attributes.flag_modified(record, "meta_data") await session.commit() + _log_with_upload_context(logging.ERROR, "Marked upload as failed: %s", record, error_message) return False async def _mark_upload_completed(upload_id: str, processing_state: SimpleNamespace, path: Path, ffprobe_data: dict | None) -> bool: async with SessionLocal() as session: if not (record := await _get_upload_record(session, upload_id)): - logger.warning("Upload %s not found for processing", upload_id) + logger.warning("Upload %s not found for processing [dir=unknown]", upload_id) return False record.storage_path = processing_state.storage_path @@ -157,12 +239,12 @@ async def _mark_upload_completed(upload_id: str, processing_state: SimpleNamespa record.meta_data["ffprobe"] = ffprobe_data attributes.flag_modified(record, "meta_data") - logger.info("Extracted ffprobe metadata for upload %s", upload_id) + _log_with_upload_context(logging.INFO, "Extracted ffprobe metadata", processing_state) record.status = "completed" record.completed_at = datetime.now(UTC) await session.commit() - logger.info("Completed processing upload %s", upload_id) + _log_with_upload_context(logging.INFO, "Completed processing upload", processing_state) return True @@ -182,7 +264,7 @@ def __init__(self, worker_count: int | None = None) -> None: async def enqueue(self, upload_id: str) -> None: """Add an upload to the processing queue.""" await self._queue.put(upload_id) - logger.info("Enqueued upload %s for post-processing", upload_id) + logger.info("Enqueued upload %s for post-processing [dir=unknown]", upload_id) def start_worker(self) -> None: """Start the background worker pool if not already running.""" @@ -226,7 +308,7 @@ async def _run_worker(self, worker_index: int) -> None: try: await self._process_upload_by_id(upload_id) except Exception: - logger.exception("Failed to process upload %s", upload_id) + logger.exception("Failed to process upload %s [dir=unknown]", upload_id) finally: self._queue.task_done() except asyncio.CancelledError: @@ -254,38 +336,38 @@ async def process_upload(upload_id: str) -> bool: """ async with SessionLocal() as session: if not (record := await _get_upload_record(session, upload_id)): - logger.warning("Upload %s not found for processing", upload_id) + logger.warning("Upload %s not found for processing [dir=unknown]", upload_id) return False processing_state = _build_processing_state(record) if not processing_state.storage_path: - logger.error("Upload %s has no storage path", upload_id) + logger.error("Upload %s has no storage path [dir=unknown]", upload_id) return await _mark_upload_failed(upload_id, "No storage path") path = Path(processing_state.storage_path) if not path.exists(): - logger.error("Upload %s file not found: %s", upload_id, path) + logger.error("Upload %s file not found: %s [dir=%s]", upload_id, path, path.parent.name or "unknown") return await _mark_upload_failed(upload_id, "File not found") try: ffprobe_data = None if processing_state.mimetype and is_multimedia(processing_state.mimetype): - logger.info("Processing multimedia upload %s", upload_id) + _log_with_upload_context(logging.INFO, "Processing multimedia upload", processing_state) path, ffprobe_data = await _apply_media_normalization(processing_state, path) if processing_state.size_bytes is None and path.exists(): processing_state.size_bytes = path.stat().st_size except Exception: - logger.exception("Failed to process upload %s", upload_id) + _log_with_upload_context(logging.ERROR, "Failed to process upload", processing_state, exc_info=True) return await _mark_upload_failed(upload_id, "Post-processing failed") return await _mark_upload_completed(upload_id, processing_state, path, ffprobe_data) async def backfill_missing_video_thumbnails() -> int: - """Generate thumbnails for completed video uploads that predate thumbnail support.""" + """Generate thumbnails and embed previews for completed video uploads missing sidecars.""" async with SessionLocal() as session: stmt = ( select(models.UploadRecord, models.UploadToken.expires_at) @@ -293,18 +375,18 @@ async def backfill_missing_video_thumbnails() -> int: .where(models.UploadRecord.status == "completed") ) result = await session.execute(stmt) - upload_ids = [ - record.public_id + upload_targets = [ + (record.public_id, not thumbnail_exists(record.storage_path), not preview_exists(record.storage_path)) for record, expires_at in result.all() if record.storage_path and record.mimetype and record.mimetype.startswith("video/") and not _token_has_expired(expires_at) - and not thumbnail_exists(record.storage_path) + and (not thumbnail_exists(record.storage_path) or not preview_exists(record.storage_path)) ] - generated_count = 0 - for upload_id in upload_ids: + updated_count = 0 + for upload_id, needs_thumbnail, needs_preview in upload_targets: async with SessionLocal() as session: stmt = ( select(models.UploadRecord, models.UploadToken.expires_at) @@ -324,21 +406,54 @@ async def backfill_missing_video_thumbnails() -> int: continue path = Path(record.storage_path) + ffprobe_data = record.meta_data.get("ffprobe") if isinstance(record.meta_data, dict) else None if not path.exists(): - logger.warning("Skipping thumbnail backfill for upload %s because file is missing", upload_id) - continue - - try: - thumbnail_path = await ensure_video_thumbnail(path) - except Exception: - logger.exception("Failed to backfill thumbnail for upload %s", upload_id) + logger.warning("Skipping sidecar backfill because file is missing [upload=%s dir=%s]", upload_id, path.parent.name or "unknown") continue - if thumbnail_path is not None: - generated_count += 1 - - if generated_count > 0: - logger.info("Backfilled thumbnails for %s upload(s)", generated_count) - - return generated_count + generated_any = False + if needs_thumbnail: + try: + thumbnail_path = await ensure_video_thumbnail(path) + except Exception: + logger.exception("Failed to backfill thumbnail [upload=%s dir=%s]", upload_id, path.parent.name or "unknown") + else: + if thumbnail_path is not None: + generated_any = True + logger.info("Backfilled thumbnail [upload=%s dir=%s]", upload_id, path.parent.name or "unknown") + else: + logger.info( + "Skipped thumbnail backfill because no thumbnail was generated [upload=%s dir=%s]", + upload_id, + path.parent.name or "unknown", + ) + + if needs_preview: + try: + preview_path = await ensure_video_preview( + path, + ffprobe_data=ffprobe_data, + clip_seconds=settings.embed_preview_clip_seconds, + min_size_bytes=settings.embed_preview_min_size_bytes, + ) + except Exception: + logger.exception("Failed to backfill embed preview [upload=%s dir=%s]", upload_id, path.parent.name or "unknown") + else: + if preview_path is not None: + generated_any = True + logger.info("Backfilled embed preview [upload=%s dir=%s]", upload_id, path.parent.name or "unknown") + else: + logger.info( + "Skipped embed preview backfill because no preview was generated [upload=%s dir=%s]", + upload_id, + path.parent.name or "unknown", + ) + + if generated_any: + updated_count += 1 + + if updated_count > 0: + logger.info("Backfilled media sidecars for %s upload(s)", updated_count) + + return updated_count diff --git a/backend/app/routers/metadata.py b/backend/app/routers/metadata.py index cc98e02..b878937 100644 --- a/backend/app/routers/metadata.py +++ b/backend/app/routers/metadata.py @@ -2,7 +2,8 @@ from fastapi import APIRouter -from backend.app.metadata_schema import load_schema, validate_metadata +from backend.app import schemas +from backend.app.metadata_schema import extract_metadata_from_filename, load_schema, validate_metadata router = APIRouter(prefix="/api/metadata", tags=["metadata"]) @@ -19,6 +20,21 @@ async def get_metadata_schema() -> dict[str, list[dict]]: return {"fields": load_schema()} +@router.post("/extract", name="metadata_schema_extract") +async def extract_metadata_payload(payload: schemas.MetadataExtractRequest) -> schemas.MetadataExtractResponse: + """ + Extract metadata values from a filename. + + Args: + payload: Filename payload to inspect. + + Returns: + dict: A dictionary containing the extracted metadata. + + """ + return schemas.MetadataExtractResponse(metadata=extract_metadata_from_filename(payload.filename)) + + @router.post("/validate", name="metadata_schema_validate") async def validate_metadata_payload(payload: dict) -> dict[str, dict[str, Any]]: """ diff --git a/backend/app/routers/tokens.py b/backend/app/routers/tokens.py index 2d4af88..6b62a2b 100644 --- a/backend/app/routers/tokens.py +++ b/backend/app/routers/tokens.py @@ -11,7 +11,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.sql.selectable import Select -from backend.app import models, schemas, utils +from backend.app import models, schemas, subtitles, utils from backend.app.config import settings from backend.app.db import SessionLocal, get_db from backend.app.security import optional_admin_check, verify_admin @@ -35,8 +35,10 @@ def _generate_token_value(num_bytes: int, prefix: str = "") -> str: def _get_thumbnail_fallback_path() -> Path: repo_root = Path(__file__).resolve().parents[3] candidates = ( - Path(settings.frontend_export_path).resolve() / "images" / "thumbnail-fallback.jpg", repo_root / "frontend" / "public" / "images" / "thumbnail-fallback.jpg", + repo_root / "frontend" / "app" / "assets" / "images" / "thumbnail-fallback.jpg", + Path(settings.frontend_export_path).resolve() / "images" / "thumbnail-fallback.jpg", + Path(settings.frontend_export_path).resolve() / "assets" / "images" / "thumbnail-fallback.jpg", ) for candidate in candidates: @@ -93,6 +95,33 @@ def _set_upload_urls(request: Request, item: schemas.UploadRecordResponse, downl item.thumbnail_url = str(request.app.url_path_for("get_file_thumbnail", download_token=download_token, upload_id=upload_id)) item.upload_url = str(request.app.url_path_for("tus_head", upload_id=upload_id)) item.info_url = str(request.app.url_path_for("get_file_info", download_token=download_token, upload_id=upload_id)) + item.recommended_chunk_bytes = utils.recommend_chunk_size(item.upload_length, settings.max_chunk_bytes) + + +def _build_subtitle_manifest( + request: Request, + download_token: str, + upload_id: str, + filename: str | None, +) -> schemas.SubtitleManifestResponse: + subtitle_items = [ + schemas.SubtitleTrackResponse( + source_format=track.source_format, + delivery_format=track.delivery_format, + renderer=track.renderer, + url=str( + request.app.url_path_for( + "get_file_subtitle", + download_token=download_token, + upload_id=upload_id, + source_format=track.source_format, + ) + ), + ) + for track in subtitles.list_subtitle_tracks(upload_id, filename) + ] + + return schemas.SubtitleManifestResponse(subtitles=subtitle_items) @router.get("/", response_model=schemas.TokenListResponse, name="list_tokens") @@ -412,8 +441,55 @@ async def get_file_info( return item +@router.get( + "/{download_token}/uploads/{upload_id}/subtitles", + response_model=schemas.SubtitleManifestResponse, + name="list_file_subtitles", + summary="List upload subtitle tracks", +) +@router.get("/{download_token}/uploads/{upload_id}/subtitles/", response_model=schemas.SubtitleManifestResponse) +async def list_file_subtitles( + request: Request, + download_token: str, + upload_id: str, + db: Annotated[AsyncSession, Depends(get_db)], + is_admin: Annotated[bool, Depends(optional_admin_check)], +) -> schemas.SubtitleManifestResponse: + """List external subtitle tracks that match a completed upload filename.""" + _, record, _ = await _get_accessible_upload(download_token, upload_id, db, is_admin) + return _build_subtitle_manifest(request, download_token, upload_id, record.filename) + + +@router.get("/{download_token}/uploads/{upload_id}/subtitles/{source_format}", name="get_file_subtitle", response_model=None) +@router.head("/{download_token}/uploads/{upload_id}/subtitles/{source_format}", response_model=None) +@router.get("/{download_token}/uploads/{upload_id}/subtitles/{source_format}/", response_model=None) +@router.head("/{download_token}/uploads/{upload_id}/subtitles/{source_format}/", response_model=None) +async def get_file_subtitle( + download_token: str, + upload_id: str, + source_format: str, + is_admin: Annotated[bool, Depends(optional_admin_check)], +) -> Response: + """Return a matching subtitle file, converting SRT to WebVTT on demand.""" + async with SessionLocal() as db: + _, record, _ = await _get_accessible_upload(download_token, upload_id, db, is_admin) + track = subtitles.get_subtitle_track(upload_id, record.filename, source_format) + + if track is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Subtitle not found") + + try: + subtitle_content = subtitles.get_delivery_content(track) + except OSError as exc: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Subtitle not found") from exc + + return Response(content=subtitle_content, media_type=subtitles.get_delivery_media_type(track)) + + @router.get("/{download_token}/uploads/{upload_id}/thumbnail", name="get_file_thumbnail", response_model=None) +@router.head("/{download_token}/uploads/{upload_id}/thumbnail", response_model=None) @router.get("/{download_token}/uploads/{upload_id}/thumbnail/", response_model=None) +@router.head("/{download_token}/uploads/{upload_id}/thumbnail/", response_model=None) async def get_file_thumbnail( download_token: str, upload_id: str, @@ -444,8 +520,35 @@ async def get_file_thumbnail( ) +@router.get("/{download_token}/uploads/{upload_id}/preview.mp4", name="get_file_preview") +@router.head("/{download_token}/uploads/{upload_id}/preview.mp4", response_model=None) +@router.get("/{download_token}/uploads/{upload_id}/preview.mp4/", response_model=None) +@router.head("/{download_token}/uploads/{upload_id}/preview.mp4/", response_model=None) +async def get_file_preview( + download_token: str, + upload_id: str, + is_admin: Annotated[bool, Depends(optional_admin_check)], +) -> FileResponse: + """Return a generated short MP4 preview for bot embeds when available.""" + async with SessionLocal() as db: + _, _, path = await _get_accessible_upload(download_token, upload_id, db, is_admin) + preview_path = utils.get_preview_path(path) + + if not preview_path.exists() or preview_path.stat().st_size == 0: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Preview missing") + + return FileResponse( + preview_path, + filename=preview_path.name, + media_type=utils.PREVIEW_MEDIA_TYPE, + content_disposition_type="inline", + ) + + @router.get("/{download_token}/uploads/{upload_id}/stream", name="stream_file") -@router.get("/{download_token}/uploads/{upload_id}/stream/") +@router.head("/{download_token}/uploads/{upload_id}/stream", response_model=None) +@router.get("/{download_token}/uploads/{upload_id}/stream/", response_model=None) +@router.head("/{download_token}/uploads/{upload_id}/stream/", response_model=None) async def stream_file( download_token: str, upload_id: str, @@ -466,7 +569,9 @@ async def stream_file( @router.get("/{download_token}/uploads/{upload_id}/download", name="download_file") -@router.get("/{download_token}/uploads/{upload_id}/download/") +@router.head("/{download_token}/uploads/{upload_id}/download", response_model=None) +@router.get("/{download_token}/uploads/{upload_id}/download/", response_model=None) +@router.head("/{download_token}/uploads/{upload_id}/download/", response_model=None) async def download_file( download_token: str, upload_id: str, diff --git a/backend/app/routers/uploads.py b/backend/app/routers/uploads.py index 0b6af25..26455f0 100644 --- a/backend/app/routers/uploads.py +++ b/backend/app/routers/uploads.py @@ -18,7 +18,14 @@ from backend.app.db import SessionLocal, get_db from backend.app.metadata_schema import validate_metadata from backend.app.postprocessing import ProcessingQueue -from backend.app.utils import compute_file_digest, delete_upload_artifacts, detect_mimetype, is_multimedia, mime_allowed +from backend.app.utils import ( + compute_file_digest, + delete_upload_artifacts, + detect_mimetype, + is_multimedia, + mime_allowed, + recommend_chunk_size, +) if TYPE_CHECKING: from sqlalchemy.engine.result import Result @@ -327,6 +334,7 @@ async def initiate_upload( meta_data=cleaned_metadata, allowed_mime=token_row.allowed_mime, remaining_uploads=token_row.remaining_uploads, + recommended_chunk_bytes=recommend_chunk_size(record.upload_length, settings.max_chunk_bytes), ) diff --git a/backend/app/schemas.py b/backend/app/schemas.py index adbe0ce..9b360c8 100644 --- a/backend/app/schemas.py +++ b/backend/app/schemas.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import Any +from typing import Any, Literal from pydantic import BaseModel, ConfigDict, Field @@ -67,10 +67,22 @@ class UploadRecordResponse(BaseModel): thumbnail_url: str | None = None upload_url: str | None = None info_url: str | None = None + recommended_chunk_bytes: int | None = None model_config = ConfigDict(from_attributes=True, populate_by_name=True) +class SubtitleTrackResponse(BaseModel): + source_format: Literal["vtt", "srt", "ass"] + delivery_format: Literal["vtt", "ass"] + renderer: Literal["native", "assjs"] + url: str + + +class SubtitleManifestResponse(BaseModel): + subtitles: list[SubtitleTrackResponse] = Field(default_factory=list) + + class TokenPublicInfo(BaseModel): token: str | None download_token: str @@ -99,6 +111,14 @@ class UploadRequest(BaseModel): size_bytes: int | None = Field(None, gt=0) +class MetadataExtractRequest(BaseModel): + filename: str = Field(..., min_length=1) + + +class MetadataExtractResponse(BaseModel): + metadata: dict[str, Any] = Field(default_factory=dict) + + class TokenListResponse(BaseModel): tokens: list[TokenAdmin] total: int @@ -111,3 +131,4 @@ class InitiateUploadResponse(BaseModel): meta_data: dict[str, Any] allowed_mime: list[str] | None remaining_uploads: int + recommended_chunk_bytes: int diff --git a/backend/app/subtitles.py b/backend/app/subtitles.py new file mode 100644 index 0000000..07a0f12 --- /dev/null +++ b/backend/app/subtitles.py @@ -0,0 +1,278 @@ +from __future__ import annotations + +import contextlib +import re +import time +import unicodedata +from dataclasses import dataclass +from pathlib import Path + +from backend.app import config + +SUPPORTED_SUBTITLE_SOURCE_FORMATS: tuple[str, ...] = ("vtt", "srt", "ass") +DELIVERY_FORMAT_BY_SOURCE_FORMAT: dict[str, str] = { + "vtt": "vtt", + "srt": "vtt", + "ass": "ass", +} +RENDERER_BY_SOURCE_FORMAT: dict[str, str] = { + "vtt": "native", + "srt": "native", + "ass": "assjs", +} +DELIVERY_MEDIA_TYPE_BY_FORMAT: dict[str, str] = { + "vtt": "text/vtt; charset=utf-8", + "ass": "text/x-ssa; charset=utf-8", +} +TEXT_DECODING_CANDIDATES: tuple[str, ...] = ("utf-8-sig", "utf-16", "utf-16-le", "utf-16-be", "cp1252") +SRT_TIMESTAMP_RE = re.compile(r"(?P