diff --git a/app/features/conditions/router.py b/app/features/conditions/router.py index d4e462fe..beef2612 100644 --- a/app/features/conditions/router.py +++ b/app/features/conditions/router.py @@ -1,3 +1,4 @@ +import asyncio import logging from collections import OrderedDict from typing import Any @@ -85,7 +86,7 @@ async def conditions_test(request: Request, encoder: Encoder, cache: Cache, conf return web.json_response({"error": "condition is required."}, status=web.HTTPBadRequest.status_code) try: - validate_url(url, allow_internal=config.allow_internal_urls) + await asyncio.to_thread(validate_url, url, config.allow_internal_urls) except ValueError as e: return web.json_response({"error": str(e)}, status=web.HTTPBadRequest.status_code) diff --git a/app/features/tasks/definitions/handlers/generic.py b/app/features/tasks/definitions/handlers/generic.py index 4efc71d4..5cf21ce1 100644 --- a/app/features/tasks/definitions/handlers/generic.py +++ b/app/features/tasks/definitions/handlers/generic.py @@ -187,6 +187,7 @@ def _generic_id(url): url=url, no_archive=True, no_log=True, + budget_sleep=True, ) if not info: diff --git a/app/features/tasks/definitions/handlers/rss.py b/app/features/tasks/definitions/handlers/rss.py index 084105fc..0e4f7b12 100644 --- a/app/features/tasks/definitions/handlers/rss.py +++ b/app/features/tasks/definitions/handlers/rss.py @@ -185,6 +185,7 @@ async def extract(task: HandleTask) -> TaskResult | TaskFailure: url=url, no_archive=True, no_log=True, + budget_sleep=True, ) if not info: diff --git a/app/features/tasks/definitions/results.py b/app/features/tasks/definitions/results.py index e4052311..3520b8e6 100644 --- a/app/features/tasks/definitions/results.py +++ b/app/features/tasks/definitions/results.py @@ -107,6 +107,7 @@ async def fetch_metadata(self, full: bool = False) -> tuple[dict[str, Any] | Non no_archive=True, follow_redirect=False, sanitize_info=True, + budget_sleep=True, ) if not ie_info or not isinstance(ie_info, dict): @@ -133,7 +134,7 @@ async def _mark_logic(self) -> tuple[bool, str] | dict[str, Any]: archive_file: Path = Path(archive_file) - (ie_info, _) = await fetch_info(params, self.url, no_archive=True, follow_redirect=True) + (ie_info, _) = await fetch_info(params, self.url, no_archive=True, follow_redirect=True, budget_sleep=True) if not ie_info or not isinstance(ie_info, dict): return (False, "Failed to extract information from URL.") diff --git a/app/features/tasks/router.py b/app/features/tasks/router.py index 9cd23f19..0f15e14d 100644 --- a/app/features/tasks/router.py +++ b/app/features/tasks/router.py @@ -1,3 +1,4 @@ +import asyncio import logging from typing import TYPE_CHECKING, Any @@ -469,7 +470,7 @@ async def task_handler_inspect(request: Request, handler: TaskHandle, encoder: E static_only: bool = data.get("static_only", False) if isinstance(data, dict) else False if not static_only: try: - validate_url(url, allow_internal=config.allow_internal_urls) + await asyncio.to_thread(validate_url, url, config.allow_internal_urls) except ValueError as e: return web.json_response({"error": str(e)}, status=web.HTTPBadRequest.status_code) @@ -710,7 +711,7 @@ async def task_metadata(request: Request, repo: TasksRepository, config: Config, continue try: - validate_url(url, allow_internal=config.allow_internal_urls) + await asyncio.to_thread(validate_url, url, config.allow_internal_urls) except ValueError: LOG.warning(f"Invalid thumbnail url '{url}'") continue diff --git a/app/features/ytdlp/extractor.py b/app/features/ytdlp/extractor.py index 55c46d62..8636c848 100644 --- a/app/features/ytdlp/extractor.py +++ b/app/features/ytdlp/extractor.py @@ -49,6 +49,17 @@ def __init__( self.wait_threshold = wait_threshold +def _sleep_timeout(config: dict[str, Any], timeout: float, budget_sleep: bool) -> float: + if not budget_sleep: + return timeout + + sleep_requests = config.get("sleep_interval_requests") + if not isinstance(sleep_requests, int | float) or sleep_requests <= 0: + return timeout + + return timeout + min(float(sleep_requests) * 20, 300.0) + + class ExtractorPool(metaclass=Singleton): """ Manages process pool and semaphore for video information extraction. @@ -312,6 +323,7 @@ async def fetch_info( sanitize_info: bool = False, capture_logs: int | None = None, extractor_config: ExtractorConfig | None = None, + budget_sleep: bool = False, **kwargs, ) -> tuple[dict[str, Any] | None, list[dict[str, Any]]]: """ @@ -329,6 +341,7 @@ async def fetch_info( sanitize_info: Sanitize the extracted information capture_logs: If provided (e.g., logging.WARNING), capture logs extractor_config: Configuration for the extractor + budget_sleep: Whether to add extra timeout budget for request-sleep-heavy extraction **kwargs: Additional arguments Returns: @@ -352,6 +365,7 @@ async def fetch_info( loop = asyncio.get_running_loop() safe_config = _sanitize_config(config) + timeout = _sleep_timeout(safe_config, extractor_config.timeout, budget_sleep) try: try: @@ -372,9 +386,12 @@ async def fetch_info( **kwargs, ), ), - timeout=extractor_config.timeout, + timeout=timeout, ) + except TimeoutError: + raise + except Exception as exc: LOG.exception(exc) LOG.warning("extract_info process pool failed, falling back to thread pool url=%s error=%s", url, exc) @@ -393,7 +410,7 @@ async def fetch_info( **kwargs, ), ), - timeout=extractor_config.timeout, + timeout=timeout, ) finally: semaphore.release() diff --git a/app/features/ytdlp/router.py b/app/features/ytdlp/router.py index 1054e3fb..d1653b11 100644 --- a/app/features/ytdlp/router.py +++ b/app/features/ytdlp/router.py @@ -1,3 +1,4 @@ +import asyncio import json import logging import time @@ -301,7 +302,7 @@ async def get_info(request: Request, cache: Cache, config: Config) -> Response: ) try: - validate_url(url, allow_internal=config.allow_internal_urls) + await asyncio.to_thread(validate_url, url, config.allow_internal_urls) except ValueError as e: return web.json_response( data={"status": False, "message": str(e), "error": str(e)}, @@ -453,7 +454,7 @@ async def get_archive_ids(request: Request, config: Config) -> Response: for i, url in enumerate(data): dct = {"index": i, "url": url} try: - validate_url(url, allow_internal=config.allow_internal_urls) + await asyncio.to_thread(validate_url, url, config.allow_internal_urls) dct.update(get_archive_id(url)) except ValueError as e: dct.update({"id": None, "ie_key": None, "archive_id": None, "error": str(e)}) diff --git a/app/library/downloads/item_adder.py b/app/library/downloads/item_adder.py index 103cda7d..2e37178c 100644 --- a/app/library/downloads/item_adder.py +++ b/app/library/downloads/item_adder.py @@ -208,6 +208,7 @@ async def add( no_archive=False, follow_redirect=True, capture_logs=logging.WARNING, + budget_sleep=True, ) if not entry: diff --git a/app/routes/api/history.py b/app/routes/api/history.py index a063cfc5..bb6bbffb 100644 --- a/app/routes/api/history.py +++ b/app/routes/api/history.py @@ -792,6 +792,7 @@ async def item_nfo_generate(request: Request, queue: DownloadQueue) -> Response: url=item.info.url, no_archive=True, follow_redirect=True, + budget_sleep=True, ) if not info_dict: diff --git a/app/routes/api/images.py b/app/routes/api/images.py index 66dce6ae..164e9479 100644 --- a/app/routes/api/images.py +++ b/app/routes/api/images.py @@ -1,3 +1,4 @@ +import asyncio import logging import random import time @@ -39,7 +40,7 @@ async def get_thumbnail(request: Request, config: Config) -> Response: return web.json_response(data={"error": "URL is required."}, status=web.HTTPForbidden.status_code) try: - validate_url(url, allow_internal=config.allow_internal_urls) + await asyncio.to_thread(validate_url, url, config.allow_internal_urls) except ValueError as e: return web.json_response(data={"error": str(e)}, status=web.HTTPForbidden.status_code) @@ -59,6 +60,7 @@ async def get_thumbnail(request: Request, config: Config) -> Response: url=url, follow_redirects=True, headers=request_headers, + timeout=10.0, ) if response.status_code != web.HTTPOk.status_code: diff --git a/app/tests/test_async_url_validation_routes.py b/app/tests/test_async_url_validation_routes.py new file mode 100644 index 00000000..a9d7d762 --- /dev/null +++ b/app/tests/test_async_url_validation_routes.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +import json +from typing import Any, Generator + +import pytest + +from app.features.conditions import router as conditions_router +from app.features.tasks import router as tasks_router +from app.features.ytdlp import router as ytdlp_router +from app.library.config import Config + + +@pytest.fixture(autouse=True) +def reset_config() -> Generator[None, None, None]: + Config._reset_singleton() + yield + Config._reset_singleton() + + +class _Req: + def __init__(self, payload: Any) -> None: + self._payload = payload + self.body_exists = payload is not None + + async def json(self) -> Any: + return self._payload + + +class _InspectReq(_Req): + query: dict[str, str] = {} + match_info: dict[str, str] = {} + + +class _QueryReq: + def __init__(self, query: dict[str, str]) -> None: + self.query = query + + +def _patch_thread(monkeypatch: pytest.MonkeyPatch, module: Any, config: Config, url: str) -> dict[str, bool]: + seen = {"to_thread": False, "validate": False} + + def fake_validate_url(next_url: str, allow_internal: bool = False) -> bool: + seen["validate"] = True + assert next_url == url + assert allow_internal is config.allow_internal_urls + raise ValueError("Invalid hostname.") + + async def fake_to_thread(func, *args, **kwargs): + seen["to_thread"] = True + return func(*args, **kwargs) + + monkeypatch.setattr(module, "validate_url", fake_validate_url) + monkeypatch.setattr(module.asyncio, "to_thread", fake_to_thread) + return seen + + +@pytest.mark.asyncio +async def test_inspect_thread(monkeypatch: pytest.MonkeyPatch) -> None: + config = Config.get_instance() + request = _InspectReq({"url": "https://bad.example/task"}) + seen = _patch_thread(monkeypatch, tasks_router, config, "https://bad.example/task") + + response = await tasks_router.task_handler_inspect(request, handler=None, encoder=None, config=config) + + assert response.status == 400 + assert json.loads(response.body.decode("utf-8")) == {"error": "Invalid hostname."} + assert seen == {"to_thread": True, "validate": True} + + +@pytest.mark.asyncio +async def test_conditions_thread(monkeypatch: pytest.MonkeyPatch) -> None: + config = Config.get_instance() + request = _Req({"url": "https://bad.example/cond", "condition": "title ~= 'x'"}) + seen = _patch_thread(monkeypatch, conditions_router, config, "https://bad.example/cond") + + response = await conditions_router.conditions_test(request, encoder=None, cache=None, config=config) + + assert response.status == 400 + assert json.loads(response.body.decode("utf-8")) == {"error": "Invalid hostname."} + assert seen == {"to_thread": True, "validate": True} + + +@pytest.mark.asyncio +async def test_info_thread(monkeypatch: pytest.MonkeyPatch) -> None: + config = Config.get_instance() + request = _QueryReq({"url": "https://bad.example/info"}) + seen = _patch_thread(monkeypatch, ytdlp_router, config, "https://bad.example/info") + + response = await ytdlp_router.get_info(request, cache=None, config=config) + + assert response.status == 400 + assert json.loads(response.body.decode("utf-8")) == { + "status": False, + "message": "Invalid hostname.", + "error": "Invalid hostname.", + } + assert seen == {"to_thread": True, "validate": True} + + +@pytest.mark.asyncio +async def test_archive_ids_thread(monkeypatch: pytest.MonkeyPatch) -> None: + config = Config.get_instance() + request = _Req(["https://bad.example/archive"]) + seen = _patch_thread(monkeypatch, ytdlp_router, config, "https://bad.example/archive") + + response = await ytdlp_router.get_archive_ids(request, config) + + assert response.status == 200 + assert json.loads(response.body.decode("utf-8")) == [ + { + "index": 0, + "url": "https://bad.example/archive", + "id": None, + "ie_key": None, + "archive_id": None, + "error": "Invalid hostname.", + } + ] + assert seen == {"to_thread": True, "validate": True} diff --git a/app/tests/test_extractor_timeout.py b/app/tests/test_extractor_timeout.py new file mode 100644 index 00000000..36f9f296 --- /dev/null +++ b/app/tests/test_extractor_timeout.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +import asyncio +from unittest.mock import Mock + +import pytest + +from app.features.ytdlp import extractor + + +class _Loop: + def __init__(self) -> None: + self.calls: list[object | None] = [] + + def run_in_executor(self, executor, func): # noqa: ANN001 + self.calls.append(executor) + return func + + +class _Pool: + def __init__(self) -> None: + self.semaphore = asyncio.Semaphore(1) + self.executor = object() + + def get_semaphore(self, _config: extractor.ExtractorConfig) -> asyncio.Semaphore: + return self.semaphore + + def get_pool(self, _config: extractor.ExtractorConfig) -> object: + return self.executor + + +def test_sleep_budget() -> None: + assert extractor._sleep_timeout({}, 70, False) == 70 + assert extractor._sleep_timeout({"sleep_interval_requests": 0}, 70, True) == 70 + assert extractor._sleep_timeout({"sleep_interval_requests": 3}, 70, True) == 130 + assert extractor._sleep_timeout({"sleep_interval_requests": 30}, 70, True) == 370 + + +@pytest.mark.asyncio +async def test_timeout_no_retry(monkeypatch: pytest.MonkeyPatch) -> None: + pool = _Pool() + loop = _Loop() + seen: list[float] = [] + + async def fake_wait_for(*, fut, timeout): + seen.append(timeout) + raise TimeoutError + + monkeypatch.setattr(extractor.ExtractorPool, "get_instance", classmethod(lambda cls: pool)) + monkeypatch.setattr(extractor.asyncio, "get_running_loop", lambda: loop) + monkeypatch.setattr(extractor.asyncio, "wait_for", fake_wait_for) + + with pytest.raises(TimeoutError): + await extractor.fetch_info( + config={"sleep_interval_requests": 3}, + url="https://example.com", + extractor_config=extractor.ExtractorConfig(concurrency=1, timeout=70), + budget_sleep=True, + ) + + assert loop.calls == [pool.executor] + assert seen == [130] + assert not pool.semaphore.locked() + + +@pytest.mark.asyncio +async def test_pool_fallback(monkeypatch: pytest.MonkeyPatch) -> None: + pool = _Pool() + loop = _Loop() + expected = ({"id": "ok"}, []) + seen: list[float] = [] + + async def fake_wait_for(*, fut, timeout): + seen.append(timeout) + if len(loop.calls) == 1: + raise RuntimeError("pool failed") + return fut() + + monkeypatch.setattr(extractor.ExtractorPool, "get_instance", classmethod(lambda cls: pool)) + monkeypatch.setattr(extractor.asyncio, "get_running_loop", lambda: loop) + monkeypatch.setattr(extractor.asyncio, "wait_for", fake_wait_for) + monkeypatch.setattr(extractor, "extract_info_sync", Mock(return_value=expected)) + + result = await extractor.fetch_info( + config={"sleep_interval_requests": 3}, + url="https://example.com", + extractor_config=extractor.ExtractorConfig(concurrency=1, timeout=70), + budget_sleep=True, + ) + + assert result == expected + assert loop.calls == [pool.executor, None] + assert seen == [130, 130] + assert not pool.semaphore.locked() diff --git a/app/tests/test_images_routes.py b/app/tests/test_images_routes.py new file mode 100644 index 00000000..97bce698 --- /dev/null +++ b/app/tests/test_images_routes.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from typing import Generator +from unittest.mock import AsyncMock + +import pytest +from aiohttp import web +from aiohttp.test_utils import make_mocked_request + +from app.library.config import Config +from app.routes.api import images + + +@pytest.fixture(autouse=True) +def reset_config() -> Generator[None, None, None]: + Config._reset_singleton() + yield + Config._reset_singleton() + + +class _Resp: + def __init__(self, *, status_code: int = 200, content: bytes = b"img", content_type: str = "image/jpeg") -> None: + self.status_code = status_code + self.content = content + self.headers = {"Content-Type": content_type} + + +@pytest.mark.asyncio +async def test_thumb_thread(monkeypatch: pytest.MonkeyPatch) -> None: + config = Config.get_instance() + req = make_mocked_request("GET", "/api/thumbnail?url=https://example.com/a.jpg") + req._rel_url = req._rel_url.with_query({"url": "https://example.com/a.jpg"}) + + seen = {"to_thread": False, "validate": False} + + def fake_validate_url(url: str, allow_internal: bool = False) -> bool: + seen["validate"] = True + assert url == "https://example.com/a.jpg" + assert allow_internal is config.allow_internal_urls + return True + + async def fake_to_thread(func, *args, **kwargs): + seen["to_thread"] = True + return func(*args, **kwargs) + + client = AsyncMock() + client.request.return_value = _Resp() + + monkeypatch.setattr(images, "validate_url", fake_validate_url) + monkeypatch.setattr(images.asyncio, "to_thread", fake_to_thread) + monkeypatch.setattr(images, "get_async_client", lambda **_kwargs: client) + monkeypatch.setattr(images, "resolve_curl_transport", lambda: False) + monkeypatch.setattr(images, "build_request_headers", lambda **_kwargs: {}) + monkeypatch.setattr(images.Globals, "get_random_agent", staticmethod(lambda: "agent")) + monkeypatch.setattr( + images.YTDLPOpts, + "get_instance", + staticmethod( + lambda: type( + "Opts", + (), + { + "preset": lambda self, name: self, + "get_all": lambda self: {}, + }, + )() + ), + ) + + response = await images.get_thumbnail(req, config) + + assert response.status == web.HTTPOk.status_code + assert seen["to_thread"] is True + assert seen["validate"] is True + client.request.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_thumb_reject(monkeypatch: pytest.MonkeyPatch) -> None: + config = Config.get_instance() + req = make_mocked_request("GET", "/api/thumbnail?url=https://bad.example/a.jpg") + req._rel_url = req._rel_url.with_query({"url": "https://bad.example/a.jpg"}) + + def fake_validate_url(_url: str, allow_internal: bool = False) -> bool: + assert allow_internal is config.allow_internal_urls + raise ValueError("Invalid hostname.") + + async def fake_to_thread(func, *args, **kwargs): + return func(*args, **kwargs) + + monkeypatch.setattr(images, "validate_url", fake_validate_url) + monkeypatch.setattr(images.asyncio, "to_thread", fake_to_thread) + + response = await images.get_thumbnail(req, config) + + assert response.status == web.HTTPForbidden.status_code + assert response.text == '{"error": "Invalid hostname."}' diff --git a/ui/app/components/NewDownload.vue b/ui/app/components/NewDownload.vue index b54fd8a0..39123242 100644 --- a/ui/app/components/NewDownload.vue +++ b/ui/app/components/NewDownload.vue @@ -17,7 +17,7 @@ -