From e94b993a1d50dd0504ea467ac15fa2c0bcd83098 Mon Sep 17 00:00:00 2001 From: Emmanuel Rondan Date: Tue, 30 Sep 2025 14:28:06 -0300 Subject: [PATCH 1/2] adding FromSetting for declarative spider param defaults --- scrapy_spider_metadata/_params.py | 38 +++++++++++++++++++++++++++++ scrapy_spider_metadata/defaults.py | 9 +++++++ tests/test_fromsetting.py | 39 ++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+) create mode 100644 scrapy_spider_metadata/defaults.py create mode 100644 tests/test_fromsetting.py diff --git a/scrapy_spider_metadata/_params.py b/scrapy_spider_metadata/_params.py index 8d41cf5..027d3c1 100644 --- a/scrapy_spider_metadata/_params.py +++ b/scrapy_spider_metadata/_params.py @@ -4,6 +4,7 @@ from pydantic import BaseModel, ValidationError from ._utils import get_generic_param, normalize_param_schema +from .defaults import FromSetting ParamSpecT = TypeVar("ParamSpecT", bound=BaseModel) logger = getLogger(__name__) @@ -30,6 +31,43 @@ def __init__(self, *args: Any, **kwargs: Any): raise super().__init__(*args, **kwargs) + def _set_crawler(self, crawler): + super()._set_crawler(crawler) + + if not hasattr(self, "args") or self.args is None: + return + + param_model = get_generic_param(self.__class__, Args) + assert param_model is not None + assert issubclass(param_model, BaseModel) + + # compat Pydantic v1/v2 + if hasattr(self.args, "model_dump"): + data = self.args.model_dump(exclude_unset=True) + else: + data = self.args.dict(exclude_unset=True) + + fields = getattr(param_model, "model_fields", None) or getattr( + param_model, "__fields__", {} + ) + + for field_name, field in fields.items(): + default_val = getattr(field, "default", None) + if field_name in data and data[field_name] is not None: + continue + if isinstance(default_val, FromSetting): + getter_name = default_val.getter or "get" + getter = getattr(crawler.settings, getter_name, crawler.settings.get) + value = getter(default_val.name, default_val.default) + if value is not None: + data[field_name] = value + + try: + self.args = param_model(**data) + except ValidationError as e: + logger.error(f"Spider parameter validation failed: {e}") + raise + @classmethod def get_param_schema(cls, normalize: bool = False) -> dict[Any, Any]: """Return a :class:`dict` with the :ref:`parameter definition diff --git a/scrapy_spider_metadata/defaults.py b/scrapy_spider_metadata/defaults.py new file mode 100644 index 0000000..32525d3 --- /dev/null +++ b/scrapy_spider_metadata/defaults.py @@ -0,0 +1,9 @@ +from dataclasses import dataclass +from typing import Any, Literal + + +@dataclass(frozen=True) +class FromSetting: + name: str + default: Any = None + getter: Literal["get", "getint", "getbool", "getfloat"] = "get" diff --git a/tests/test_fromsetting.py b/tests/test_fromsetting.py new file mode 100644 index 0000000..517f653 --- /dev/null +++ b/tests/test_fromsetting.py @@ -0,0 +1,39 @@ +import pytest +from scrapy.utils.test import get_crawler +from pydantic import BaseModel +from scrapy import Spider + +from scrapy_spider_metadata.defaults import FromSetting +from scrapy_spider_metadata._params import Args + + +class Params(BaseModel): + pages: int = FromSetting("MAX_PAGES_SETTING", default=5, getter="getint") + lang: str = "en" + + +class S(Args[Params], Spider): + name = "s" + + +def test_fromsetting_reads_setting(): + crawler = get_crawler(S, settings_dict={"MAX_PAGES_SETTING": 10}) + s = S() + s._set_crawler(crawler) + assert s.args.pages == 10 + assert s.args.lang == "en" + + +def test_fromsetting_uses_default_when_missing(): + crawler = get_crawler(S, settings_dict={}) + s = S() + s._set_crawler(crawler) + assert s.args.pages == 5 + assert s.args.lang == "en" + + +def test_cli_overrides_everything(): + crawler = get_crawler(S, settings_dict={"MAX_PAGES_SETTING": 10}) + s = S(pages=99) + s._set_crawler(crawler) + assert s.args.pages == 99 From 784bf7ee0f4293827ee128b83fc45e032ce821d4 Mon Sep 17 00:00:00 2001 From: Emmanuel Rondan Date: Wed, 1 Oct 2025 12:06:45 -0300 Subject: [PATCH 2/2] adding conftest to avoid errors with reactor --- tests/conftest.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..355df16 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,12 @@ +import os + +os.environ.setdefault( + "TWISTED_REACTOR", + "twisted.internet.asyncioreactor.AsyncioSelectorReactor", +) + +try: + from twisted.internet import asyncioreactor + asyncioreactor.install() +except Exception: + pass