diff --git a/.riot/requirements/1fe8dd2.txt b/.riot/requirements/1900591.txt similarity index 54% rename from .riot/requirements/1fe8dd2.txt rename to .riot/requirements/1900591.txt index 72ea102ea03..ed309c651a9 100644 --- a/.riot/requirements/1fe8dd2.txt +++ b/.riot/requirements/1900591.txt @@ -2,33 +2,35 @@ # This file is autogenerated by pip-compile with Python 3.13 # by the following command: # -# pip-compile --allow-unsafe --no-annotate .riot/requirements/1fe8dd2.in +# pip-compile --allow-unsafe --no-annotate .riot/requirements/1900591.in # -aiohappyeyeballs==2.4.8 -aiohttp==3.11.13 -aiosignal==1.3.2 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.14 +aiosignal==1.4.0 annotated-types==0.7.0 -anyio==4.8.0 +anyio==4.9.0 appdirs==1.4.4 -attrs==25.1.0 -certifi==2025.1.31 -charset-normalizer==3.4.1 -coverage[toml]==7.6.12 +attrs==25.3.0 +certifi==2025.7.14 +charset-normalizer==3.4.2 +coverage[toml]==7.9.2 dataclasses-json==0.6.7 -datasets==3.3.2 +datasets==4.0.0 dill==0.3.8 distro==1.9.0 -filelock==3.17.0 -frozenlist==1.5.0 -fsspec[http]==2024.12.0 -h11==0.14.0 -httpcore==1.0.7 +filelock==3.18.0 +frozenlist==1.7.0 +fsspec[http]==2025.3.0 +greenlet==3.2.3 +h11==0.16.0 +hf-xet==1.1.5 +httpcore==1.0.9 httpx==0.28.1 -huggingface-hub==0.29.2 +huggingface-hub==0.33.4 hypothesis==6.45.0 idna==3.10 -iniconfig==2.0.0 -jiter==0.8.2 +iniconfig==2.1.0 +jiter==0.10.0 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.2.17 @@ -39,45 +41,47 @@ langchain-text-splitters==0.2.4 langsmith==0.1.147 marshmallow==3.26.1 mock==5.2.0 -multidict==6.1.0 +multidict==6.6.3 multiprocess==0.70.16 -mypy-extensions==1.0.0 +mypy-extensions==1.1.0 nest-asyncio==1.6.0 numpy==1.26.4 -openai==1.65.3 +openai==1.97.1 opentracing==2.4.0 -orjson==3.10.15 +orjson==3.11.0 packaging==24.2 -pandas==2.2.3 -pluggy==1.5.0 -propcache==0.3.0 -pyarrow==19.0.1 -pydantic==2.10.6 -pydantic-core==2.27.2 +pandas==2.3.1 +pluggy==1.6.0 +propcache==0.3.2 +pyarrow==21.0.0 +pydantic==2.11.7 +pydantic-core==2.33.2 +pygments==2.19.2 pysbd==0.3.4 -pytest==8.3.5 +pytest==8.4.1 pytest-asyncio==0.21.1 -pytest-cov==6.0.0 -pytest-mock==3.14.0 +pytest-cov==6.2.1 +pytest-mock==3.14.1 python-dateutil==2.9.0.post0 -pytz==2025.1 +pytz==2025.2 pyyaml==6.0.2 ragas==0.1.21 regex==2024.11.6 -requests==2.32.3 +requests==2.32.4 requests-toolbelt==1.0.0 six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 -sqlalchemy==2.0.38 +sqlalchemy==2.0.41 tenacity==8.5.0 tiktoken==0.9.0 tqdm==4.67.1 -typing-extensions==4.12.2 +typing-extensions==4.14.1 typing-inspect==0.9.0 -tzdata==2025.1 -urllib3==2.3.0 +typing-inspection==0.4.1 +tzdata==2025.2 +urllib3==2.5.0 vcrpy==7.0.0 wrapt==1.17.2 xxhash==3.5.0 -yarl==1.18.3 +yarl==1.20.1 diff --git a/.riot/requirements/1687eab.txt b/.riot/requirements/46e9996.txt similarity index 50% rename from .riot/requirements/1687eab.txt rename to .riot/requirements/46e9996.txt index 009a5ede488..6c8fc1bdc4c 100644 --- a/.riot/requirements/1687eab.txt +++ b/.riot/requirements/46e9996.txt @@ -2,33 +2,35 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --allow-unsafe --no-annotate --resolver=backtracking .riot/requirements/1687eab.in +# pip-compile --allow-unsafe --no-annotate .riot/requirements/46e9996.in # -aiohappyeyeballs==2.4.8 -aiohttp==3.11.13 -aiosignal==1.3.2 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.14 +aiosignal==1.4.0 annotated-types==0.7.0 -anyio==4.8.0 +anyio==4.9.0 appdirs==1.4.4 -attrs==25.1.0 -certifi==2025.1.31 -charset-normalizer==3.4.1 -coverage[toml]==7.6.12 +attrs==25.3.0 +certifi==2025.7.14 +charset-normalizer==3.4.2 +coverage[toml]==7.9.2 dataclasses-json==0.6.7 -datasets==3.3.2 +datasets==4.0.0 dill==0.3.8 distro==1.9.0 -filelock==3.17.0 -frozenlist==1.5.0 -fsspec[http]==2024.12.0 -h11==0.14.0 -httpcore==1.0.7 +filelock==3.18.0 +frozenlist==1.7.0 +fsspec[http]==2025.3.0 +greenlet==3.2.3 +h11==0.16.0 +hf-xet==1.1.5 +httpcore==1.0.9 httpx==0.28.1 -huggingface-hub==0.29.2 +huggingface-hub==0.33.4 hypothesis==6.45.0 idna==3.10 -iniconfig==2.0.0 -jiter==0.8.2 +iniconfig==2.1.0 +jiter==0.10.0 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.2.17 @@ -39,45 +41,47 @@ langchain-text-splitters==0.2.4 langsmith==0.1.147 marshmallow==3.26.1 mock==5.2.0 -multidict==6.1.0 +multidict==6.6.3 multiprocess==0.70.16 -mypy-extensions==1.0.0 +mypy-extensions==1.1.0 nest-asyncio==1.6.0 numpy==1.26.4 -openai==1.65.3 +openai==1.97.1 opentracing==2.4.0 -orjson==3.10.15 +orjson==3.11.0 packaging==24.2 -pandas==2.2.3 -pluggy==1.5.0 -propcache==0.3.0 -pyarrow==19.0.1 -pydantic==2.10.6 -pydantic-core==2.27.2 +pandas==2.3.1 +pluggy==1.6.0 +propcache==0.3.2 +pyarrow==21.0.0 +pydantic==2.11.7 +pydantic-core==2.33.2 +pygments==2.19.2 pysbd==0.3.4 -pytest==8.3.5 +pytest==8.4.1 pytest-asyncio==0.21.1 -pytest-cov==6.0.0 -pytest-mock==3.14.0 +pytest-cov==6.2.1 +pytest-mock==3.14.1 python-dateutil==2.9.0.post0 -pytz==2025.1 +pytz==2025.2 pyyaml==6.0.2 ragas==0.1.21 regex==2024.11.6 -requests==2.32.3 +requests==2.32.4 requests-toolbelt==1.0.0 six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 -sqlalchemy==2.0.38 +sqlalchemy==2.0.41 tenacity==8.5.0 tiktoken==0.9.0 tqdm==4.67.1 -typing-extensions==4.12.2 +typing-extensions==4.14.1 typing-inspect==0.9.0 -tzdata==2025.1 -urllib3==2.3.0 +typing-inspection==0.4.1 +tzdata==2025.2 +urllib3==2.5.0 vcrpy==7.0.0 wrapt==1.17.2 xxhash==3.5.0 -yarl==1.18.3 +yarl==1.20.1 diff --git a/.riot/requirements/771848b.txt b/.riot/requirements/5908834.txt similarity index 52% rename from .riot/requirements/771848b.txt rename to .riot/requirements/5908834.txt index cd804c107c2..443cdf7b385 100644 --- a/.riot/requirements/771848b.txt +++ b/.riot/requirements/5908834.txt @@ -2,35 +2,37 @@ # This file is autogenerated by pip-compile with Python 3.9 # by the following command: # -# pip-compile --allow-unsafe --no-annotate --resolver=backtracking .riot/requirements/771848b.in +# pip-compile --allow-unsafe --no-annotate .riot/requirements/5908834.in # -aiohappyeyeballs==2.4.8 -aiohttp==3.11.13 -aiosignal==1.3.2 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.14 +aiosignal==1.4.0 annotated-types==0.7.0 -anyio==4.8.0 +anyio==4.9.0 appdirs==1.4.4 async-timeout==4.0.3 -attrs==25.1.0 -certifi==2025.1.31 -charset-normalizer==3.4.1 -coverage[toml]==7.6.12 +attrs==25.3.0 +certifi==2025.7.14 +charset-normalizer==3.4.2 +coverage[toml]==7.9.2 dataclasses-json==0.6.7 -datasets==3.3.2 +datasets==4.0.0 dill==0.3.8 distro==1.9.0 -exceptiongroup==1.2.2 -filelock==3.17.0 -frozenlist==1.5.0 -fsspec[http]==2024.12.0 -h11==0.14.0 -httpcore==1.0.7 +exceptiongroup==1.3.0 +filelock==3.18.0 +frozenlist==1.7.0 +fsspec[http]==2025.3.0 +greenlet==3.2.3 +h11==0.16.0 +hf-xet==1.1.5 +httpcore==1.0.9 httpx==0.28.1 -huggingface-hub==0.29.2 +huggingface-hub==0.33.4 hypothesis==6.45.0 idna==3.10 -iniconfig==2.0.0 -jiter==0.8.2 +iniconfig==2.1.0 +jiter==0.10.0 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.2.17 @@ -41,46 +43,48 @@ langchain-text-splitters==0.2.4 langsmith==0.1.147 marshmallow==3.26.1 mock==5.2.0 -multidict==6.1.0 +multidict==6.6.3 multiprocess==0.70.16 -mypy-extensions==1.0.0 +mypy-extensions==1.1.0 nest-asyncio==1.6.0 numpy==1.26.4 -openai==1.65.3 +openai==1.97.1 opentracing==2.4.0 -orjson==3.10.15 +orjson==3.11.0 packaging==24.2 -pandas==2.2.3 -pluggy==1.5.0 -propcache==0.3.0 -pyarrow==19.0.1 -pydantic==2.10.6 -pydantic-core==2.27.2 +pandas==2.3.1 +pluggy==1.6.0 +propcache==0.3.2 +pyarrow==21.0.0 +pydantic==2.11.7 +pydantic-core==2.33.2 +pygments==2.19.2 pysbd==0.3.4 -pytest==8.3.5 +pytest==8.4.1 pytest-asyncio==0.21.1 -pytest-cov==6.0.0 -pytest-mock==3.14.0 +pytest-cov==6.2.1 +pytest-mock==3.14.1 python-dateutil==2.9.0.post0 -pytz==2025.1 +pytz==2025.2 pyyaml==6.0.2 ragas==0.1.21 regex==2024.11.6 -requests==2.32.3 +requests==2.32.4 requests-toolbelt==1.0.0 six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 -sqlalchemy==2.0.38 +sqlalchemy==2.0.41 tenacity==8.5.0 tiktoken==0.9.0 tomli==2.2.1 tqdm==4.67.1 -typing-extensions==4.12.2 +typing-extensions==4.14.1 typing-inspect==0.9.0 -tzdata==2025.1 +typing-inspection==0.4.1 +tzdata==2025.2 urllib3==1.26.20 vcrpy==7.0.0 wrapt==1.17.2 xxhash==3.5.0 -yarl==1.18.3 +yarl==1.20.1 diff --git a/.riot/requirements/146f2d8.txt b/.riot/requirements/97f1328.txt similarity index 74% rename from .riot/requirements/146f2d8.txt rename to .riot/requirements/97f1328.txt index e552a5620a3..68875c403e1 100644 --- a/.riot/requirements/146f2d8.txt +++ b/.riot/requirements/97f1328.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.8 # by the following command: # -# pip-compile --allow-unsafe --no-annotate --resolver=backtracking .riot/requirements/146f2d8.in +# pip-compile --allow-unsafe --no-annotate .riot/requirements/97f1328.in # aiohappyeyeballs==2.4.4 aiohttp==3.10.11 @@ -11,26 +11,28 @@ annotated-types==0.7.0 anyio==4.5.2 appdirs==1.4.4 async-timeout==4.0.3 -attrs==25.1.0 -certifi==2025.1.31 -charset-normalizer==3.4.1 +attrs==25.3.0 +certifi==2025.7.14 +charset-normalizer==3.4.2 coverage[toml]==7.6.1 dataclasses-json==0.6.7 datasets==3.1.0 dill==0.3.8 distro==1.9.0 -exceptiongroup==1.2.2 +exceptiongroup==1.3.0 filelock==3.16.1 frozenlist==1.5.0 fsspec[http]==2024.9.0 -h11==0.14.0 -httpcore==1.0.7 +greenlet==3.1.1 +h11==0.16.0 +hf-xet==1.1.5 +httpcore==1.0.9 httpx==0.28.1 -huggingface-hub==0.29.2 +huggingface-hub==0.33.4 hypothesis==6.45.0 idna==3.10 -iniconfig==2.0.0 -jiter==0.8.2 +iniconfig==2.1.0 +jiter==0.9.1 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.2.17 @@ -43,10 +45,10 @@ marshmallow==3.22.0 mock==5.2.0 multidict==6.1.0 multiprocess==0.70.16 -mypy-extensions==1.0.0 +mypy-extensions==1.1.0 nest-asyncio==1.6.0 numpy==1.24.4 -openai==1.65.3 +openai==1.97.1 opentracing==2.4.0 orjson==3.10.15 packaging==24.2 @@ -60,25 +62,25 @@ pysbd==0.3.4 pytest==8.3.5 pytest-asyncio==0.21.1 pytest-cov==5.0.0 -pytest-mock==3.14.0 +pytest-mock==3.14.1 python-dateutil==2.9.0.post0 -pytz==2025.1 +pytz==2025.2 pyyaml==6.0.2 ragas==0.1.21 regex==2024.11.6 -requests==2.32.3 +requests==2.32.4 requests-toolbelt==1.0.0 six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 -sqlalchemy==2.0.38 +sqlalchemy==2.0.41 tenacity==8.5.0 tiktoken==0.7.0 tomli==2.2.1 tqdm==4.67.1 -typing-extensions==4.12.2 +typing-extensions==4.13.2 typing-inspect==0.9.0 -tzdata==2025.1 +tzdata==2025.2 urllib3==1.26.20 vcrpy==6.0.2 wrapt==1.17.2 diff --git a/.riot/requirements/12c5529.txt b/.riot/requirements/f37741b.txt similarity index 54% rename from .riot/requirements/12c5529.txt rename to .riot/requirements/f37741b.txt index 40afeea9f40..f258baee359 100644 --- a/.riot/requirements/12c5529.txt +++ b/.riot/requirements/f37741b.txt @@ -2,35 +2,37 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --no-annotate .riot/requirements/12c5529.in +# pip-compile --allow-unsafe --no-annotate .riot/requirements/f37741b.in # -aiohappyeyeballs==2.4.8 -aiohttp==3.11.13 -aiosignal==1.3.2 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.14 +aiosignal==1.4.0 annotated-types==0.7.0 -anyio==4.8.0 +anyio==4.9.0 appdirs==1.4.4 async-timeout==4.0.3 -attrs==25.1.0 -certifi==2025.1.31 -charset-normalizer==3.4.1 -coverage[toml]==7.6.12 +attrs==25.3.0 +certifi==2025.7.14 +charset-normalizer==3.4.2 +coverage[toml]==7.9.2 dataclasses-json==0.6.7 -datasets==3.3.2 +datasets==4.0.0 dill==0.3.8 distro==1.9.0 -exceptiongroup==1.2.2 -filelock==3.17.0 -frozenlist==1.5.0 -fsspec[http]==2024.12.0 -h11==0.14.0 -httpcore==1.0.7 +exceptiongroup==1.3.0 +filelock==3.18.0 +frozenlist==1.7.0 +fsspec[http]==2025.3.0 +greenlet==3.2.3 +h11==0.16.0 +hf-xet==1.1.5 +httpcore==1.0.9 httpx==0.28.1 -huggingface-hub==0.29.2 +huggingface-hub==0.33.4 hypothesis==6.45.0 idna==3.10 -iniconfig==2.0.0 -jiter==0.8.2 +iniconfig==2.1.0 +jiter==0.10.0 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.2.17 @@ -41,46 +43,48 @@ langchain-text-splitters==0.2.4 langsmith==0.1.147 marshmallow==3.26.1 mock==5.2.0 -multidict==6.1.0 +multidict==6.6.3 multiprocess==0.70.16 -mypy-extensions==1.0.0 +mypy-extensions==1.1.0 nest-asyncio==1.6.0 numpy==1.26.4 -openai==1.65.3 +openai==1.97.1 opentracing==2.4.0 -orjson==3.10.15 +orjson==3.11.0 packaging==24.2 -pandas==2.2.3 -pluggy==1.5.0 -propcache==0.3.0 -pyarrow==19.0.1 -pydantic==2.10.6 -pydantic-core==2.27.2 +pandas==2.3.1 +pluggy==1.6.0 +propcache==0.3.2 +pyarrow==21.0.0 +pydantic==2.11.7 +pydantic-core==2.33.2 +pygments==2.19.2 pysbd==0.3.4 -pytest==8.3.5 +pytest==8.4.1 pytest-asyncio==0.21.1 -pytest-cov==6.0.0 -pytest-mock==3.14.0 +pytest-cov==6.2.1 +pytest-mock==3.14.1 python-dateutil==2.9.0.post0 -pytz==2025.1 +pytz==2025.2 pyyaml==6.0.2 ragas==0.1.21 regex==2024.11.6 -requests==2.32.3 +requests==2.32.4 requests-toolbelt==1.0.0 six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 -sqlalchemy==2.0.38 +sqlalchemy==2.0.41 tenacity==8.5.0 tiktoken==0.9.0 tomli==2.2.1 tqdm==4.67.1 -typing-extensions==4.12.2 +typing-extensions==4.14.1 typing-inspect==0.9.0 -tzdata==2025.1 -urllib3==2.3.0 +typing-inspection==0.4.1 +tzdata==2025.2 +urllib3==2.5.0 vcrpy==7.0.0 wrapt==1.17.2 xxhash==3.5.0 -yarl==1.18.3 +yarl==1.20.1 diff --git a/.riot/requirements/4102ef5.txt b/.riot/requirements/ffd66c1.txt similarity index 54% rename from .riot/requirements/4102ef5.txt rename to .riot/requirements/ffd66c1.txt index d0518848787..89c4f02b050 100644 --- a/.riot/requirements/4102ef5.txt +++ b/.riot/requirements/ffd66c1.txt @@ -2,33 +2,35 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile --allow-unsafe --no-annotate .riot/requirements/4102ef5.in +# pip-compile --allow-unsafe --no-annotate .riot/requirements/ffd66c1.in # -aiohappyeyeballs==2.4.8 -aiohttp==3.11.13 -aiosignal==1.3.2 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.14 +aiosignal==1.4.0 annotated-types==0.7.0 -anyio==4.8.0 +anyio==4.9.0 appdirs==1.4.4 -attrs==25.1.0 -certifi==2025.1.31 -charset-normalizer==3.4.1 -coverage[toml]==7.6.12 +attrs==25.3.0 +certifi==2025.7.14 +charset-normalizer==3.4.2 +coverage[toml]==7.9.2 dataclasses-json==0.6.7 -datasets==3.3.2 +datasets==4.0.0 dill==0.3.8 distro==1.9.0 -filelock==3.17.0 -frozenlist==1.5.0 -fsspec[http]==2024.12.0 -h11==0.14.0 -httpcore==1.0.7 +filelock==3.18.0 +frozenlist==1.7.0 +fsspec[http]==2025.3.0 +greenlet==3.2.3 +h11==0.16.0 +hf-xet==1.1.5 +httpcore==1.0.9 httpx==0.28.1 -huggingface-hub==0.29.2 +huggingface-hub==0.33.4 hypothesis==6.45.0 idna==3.10 -iniconfig==2.0.0 -jiter==0.8.2 +iniconfig==2.1.0 +jiter==0.10.0 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.2.17 @@ -39,45 +41,47 @@ langchain-text-splitters==0.2.4 langsmith==0.1.147 marshmallow==3.26.1 mock==5.2.0 -multidict==6.1.0 +multidict==6.6.3 multiprocess==0.70.16 -mypy-extensions==1.0.0 +mypy-extensions==1.1.0 nest-asyncio==1.6.0 numpy==1.26.4 -openai==1.65.3 +openai==1.97.1 opentracing==2.4.0 -orjson==3.10.15 +orjson==3.11.0 packaging==24.2 -pandas==2.2.3 -pluggy==1.5.0 -propcache==0.3.0 -pyarrow==19.0.1 -pydantic==2.10.6 -pydantic-core==2.27.2 +pandas==2.3.1 +pluggy==1.6.0 +propcache==0.3.2 +pyarrow==21.0.0 +pydantic==2.11.7 +pydantic-core==2.33.2 +pygments==2.19.2 pysbd==0.3.4 -pytest==8.3.5 +pytest==8.4.1 pytest-asyncio==0.21.1 -pytest-cov==6.0.0 -pytest-mock==3.14.0 +pytest-cov==6.2.1 +pytest-mock==3.14.1 python-dateutil==2.9.0.post0 -pytz==2025.1 +pytz==2025.2 pyyaml==6.0.2 ragas==0.1.21 regex==2024.11.6 -requests==2.32.3 +requests==2.32.4 requests-toolbelt==1.0.0 six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 -sqlalchemy==2.0.38 +sqlalchemy==2.0.41 tenacity==8.5.0 tiktoken==0.9.0 tqdm==4.67.1 -typing-extensions==4.12.2 +typing-extensions==4.14.1 typing-inspect==0.9.0 -tzdata==2025.1 -urllib3==2.3.0 +typing-inspection==0.4.1 +tzdata==2025.2 +urllib3==2.5.0 vcrpy==7.0.0 wrapt==1.17.2 xxhash==3.5.0 -yarl==1.18.3 +yarl==1.20.1 diff --git a/ddtrace/llmobs/_constants.py b/ddtrace/llmobs/_constants.py index 10c81842e4c..e28956390be 100644 --- a/ddtrace/llmobs/_constants.py +++ b/ddtrace/llmobs/_constants.py @@ -56,6 +56,7 @@ EVP_PAYLOAD_SIZE_LIMIT = 5 << 20 # 5MB (actual limit is 5.1MB) EVP_EVENT_SIZE_LIMIT = (1 << 20) - 1024 # 999KB (actual limit is 1MB) +EXPERIMENT_CSV_FIELD_MAX_SIZE = 10 * 1024 * 1024 DROPPED_IO_COLLECTION_ERROR = "dropped_io" DROPPED_VALUE_TEXT = "[This value has been dropped because this span's size exceeds the 1MB size limit.]" diff --git a/ddtrace/llmobs/_experiment.py b/ddtrace/llmobs/_experiment.py index 8fcf6706065..996af3cd00d 100644 --- a/ddtrace/llmobs/_experiment.py +++ b/ddtrace/llmobs/_experiment.py @@ -177,12 +177,56 @@ def __len__(self) -> int: def __iter__(self) -> Iterator[DatasetRecord]: return iter(self._records) + def as_dataframe(self) -> None: + try: + import pandas as pd + except ImportError as e: + raise ImportError( + "pandas is required to convert dataset to DataFrame. Please install via `pip install pandas`" + ) from e + + column_tuples = set() + data_rows = [] + for record in self._records: + flat_record = {} # type: Dict[Union[str, Tuple[str, str]], Any] + + input_data = record.get("input_data", {}) + if isinstance(input_data, dict): + for input_data_col, input_data_val in input_data.items(): + flat_record[("input_data", input_data_col)] = input_data_val + column_tuples.add(("input_data", input_data_col)) + else: + flat_record[("input_data", "")] = input_data + column_tuples.add(("input_data", "")) + + expected_output = record.get("expected_output", {}) + if isinstance(expected_output, dict): + for expected_output_col, expected_output_val in expected_output.items(): + flat_record[("expected_output", expected_output_col)] = expected_output_val + column_tuples.add(("expected_output", expected_output_col)) + else: + flat_record[("expected_output", "")] = expected_output + column_tuples.add(("expected_output", "")) + + for metadata_col, metadata_val in record.get("metadata", {}).items(): + flat_record[("metadata", metadata_col)] = metadata_val + column_tuples.add(("metadata", metadata_col)) + + data_rows.append(flat_record) + + records_list = [] + for flat_record in data_rows: + row = [flat_record.get(col, None) for col in column_tuples] + records_list.append(row) + + return pd.DataFrame(data=records_list, columns=pd.MultiIndex.from_tuples(column_tuples)) + class Experiment: def __init__( self, name: str, - task: Callable[[DatasetRecordInputType], JSONType], + task: Callable[[DatasetRecordInputType, Optional[ExperimentConfigType]], JSONType], dataset: Dataset, evaluators: List[Callable[[DatasetRecordInputType, JSONType, JSONType], JSONType]], project_name: str, @@ -224,7 +268,8 @@ def run( if not self._llmobs_instance.enabled: logger.warning( "Skipping experiment as LLMObs is not enabled. " - "Ensure LLM Observability is enabled via `LLMObs.enable(...)` or set `DD_LLMOBS_ENABLED=1`." + "Ensure LLM Observability is enabled via `LLMObs.enable(...)` " + "or set `DD_LLMOBS_ENABLED=1` and use `ddtrace-run` to run your application." ) return [] @@ -273,7 +318,7 @@ def _process_record(self, idx_record: Tuple[int, DatasetRecord]) -> Optional[Tas } output_data = None try: - output_data = self._task(input_data) + output_data = self._task(input_data, self._config) except Exception: span.set_exc_info(*sys.exc_info()) self._llmobs_instance.annotate(span, input_data=input_data, output_data=output_data, tags=tags) diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index d7a6db680cd..74fdd935944 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -1,3 +1,4 @@ +import csv from dataclasses import dataclass from dataclasses import field import inspect @@ -49,6 +50,7 @@ from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED +from ddtrace.llmobs._constants import EXPERIMENT_CSV_FIELD_MAX_SIZE from ddtrace.llmobs._constants import EXPERIMENT_EXPECTED_OUTPUT from ddtrace.llmobs._constants import EXPERIMENT_ID_KEY from ddtrace.llmobs._constants import INPUT_DOCUMENTS @@ -78,8 +80,8 @@ from ddtrace.llmobs._context import LLMObsContextProvider from ddtrace.llmobs._evaluators.runner import EvaluatorRunner from ddtrace.llmobs._experiment import Dataset +from ddtrace.llmobs._experiment import DatasetRecord from ddtrace.llmobs._experiment import DatasetRecordInputType -from ddtrace.llmobs._experiment import DatasetRecordRaw as DatasetRecord from ddtrace.llmobs._experiment import Experiment from ddtrace.llmobs._experiment import ExperimentConfigType from ddtrace.llmobs._experiment import JSONType @@ -601,6 +603,67 @@ def create_dataset(cls, name: str, description: str, records: List[DatasetRecord ds.push() return ds + @classmethod + def create_dataset_from_csv( + cls, + csv_path: str, + dataset_name: str, + input_data_columns: List[str], + expected_output_columns: List[str], + metadata_columns: List[str] = [], + csv_delimiter: str = ",", + description="", + ) -> Dataset: + ds = cls._instance._dne_client.dataset_create(dataset_name, description) + + # Store the original field size limit to restore it later + original_field_size_limit = csv.field_size_limit() + + csv.field_size_limit(EXPERIMENT_CSV_FIELD_MAX_SIZE) # 10mb + + try: + with open(csv_path, mode="r") as csvfile: + content = csvfile.readline().strip() + if not content: + raise ValueError("CSV file appears to be empty or header is missing.") + + csvfile.seek(0) + + rows = csv.DictReader(csvfile, delimiter=csv_delimiter) + + if rows.fieldnames is None: + raise ValueError("CSV file appears to be empty or header is missing.") + + header_columns = rows.fieldnames + missing_input_columns = [col for col in input_data_columns if col not in header_columns] + missing_output_columns = [col for col in expected_output_columns if col not in header_columns] + missing_metadata_columns = [col for col in metadata_columns if col not in metadata_columns] + + if any(col not in header_columns for col in input_data_columns): + raise ValueError(f"Input columns not found in CSV header: {missing_input_columns}") + if any(col not in header_columns for col in expected_output_columns): + raise ValueError(f"Expected output columns not found in CSV header: {missing_output_columns}") + if any(col not in header_columns for col in metadata_columns): + raise ValueError(f"Metadata columns not found in CSV header: {missing_metadata_columns}") + + for row in rows: + ds.append( + DatasetRecord( + input_data={col: row[col] for col in input_data_columns}, + expected_output={col: row[col] for col in expected_output_columns}, + metadata={col: row[col] for col in metadata_columns}, + record_id="", + ) + ) + + finally: + # Always restore the original field size limit + csv.field_size_limit(original_field_size_limit) + + if len(ds) > 0: + ds.push() + return ds + @classmethod def _delete_dataset(cls, dataset_id: str) -> None: return cls._instance._dne_client.dataset_delete(dataset_id) @@ -609,7 +672,7 @@ def _delete_dataset(cls, dataset_id: str) -> None: def experiment( cls, name: str, - task: Callable[[DatasetRecordInputType], JSONType], + task: Callable[[DatasetRecordInputType, Optional[ExperimentConfigType]], JSONType], dataset: Dataset, evaluators: List[Callable[[DatasetRecordInputType, JSONType, JSONType], JSONType]], description: str = "", @@ -619,7 +682,7 @@ def experiment( """Initializes an Experiment to run a task on a Dataset and evaluators. :param name: The name of the experiment. - :param task: The task function to run. Must accept a parameter ``input_data`` and optionally ``config``. + :param task: The task function to run. Must accept parameters ``input_data`` and ``config``. :param dataset: The dataset to run the experiment on, created with LLMObs.pull/create_dataset(). :param evaluators: A list of evaluator functions to evaluate the task output. Must accept parameters ``input_data``, ``output_data``, and ``expected_output``. @@ -631,8 +694,8 @@ def experiment( raise TypeError("task must be a callable function.") sig = inspect.signature(task) params = sig.parameters - if "input_data" not in params: - raise TypeError("Task function must accept 'input_data' parameters.") + if "input_data" not in params or "config" not in params: + raise TypeError("Task function must have 'input_data' and 'config' parameters.") if not isinstance(dataset, Dataset): raise TypeError("Dataset must be an LLMObs Dataset object.") if not evaluators or not all(callable(evaluator) for evaluator in evaluators): diff --git a/riotfile.py b/riotfile.py index 790dea22d29..ae6ceead39f 100644 --- a/riotfile.py +++ b/riotfile.py @@ -3134,6 +3134,7 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT "pytest-asyncio": "==0.21.1", "ragas": "==0.1.21", "langchain": latest, + "pandas": latest, }, pys=select_pys(min_version="3.8"), ), diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_8dbba503-cf48-4e82-805e-1d9cb46e5ed3_batch_update_post_34c161e3.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_8dbba503-cf48-4e82-805e-1d9cb46e5ed3_batch_update_post_34c161e3.yaml new file mode 100644 index 00000000000..3e4ea62376f --- /dev/null +++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_8dbba503-cf48-4e82-805e-1d9cb46e5ed3_batch_update_post_34c161e3.yaml @@ -0,0 +1,49 @@ +interactions: +- request: + body: '{"data": {"type": "datasets", "id": "8dbba503-cf48-4e82-805e-1d9cb46e5ed3", + "attributes": {"insert_records": [{"input": {"in0": "r0v1", "in1": "r0v2", "in2": + "r0v3"}, "expected_output": {"out0": "r0v4", "out1": "r0v5"}, "metadata": {"m0": + "r0v6"}}, {"input": {"in0": "r1v1", "in1": "r1v2", "in2": "r1v3"}, "expected_output": + {"out0": "r1v4", "out1": "r1v5"}, "metadata": {"m0": "r1v6"}}], "update_records": + [], "delete_records": []}}}' + headers: + Accept: + - '*/*' + ? !!python/object/apply:multidict._multidict.istr + - Accept-Encoding + : - identity + Connection: + - keep-alive + Content-Length: + - '434' + ? !!python/object/apply:multidict._multidict.istr + - Content-Type + : - application/json + User-Agent: + - python-requests/2.32.3 + method: POST + uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/8dbba503-cf48-4e82-805e-1d9cb46e5ed3/batch_update + response: + body: + string: '{"data":[]}' + headers: + content-length: + - '11' + content-security-policy: + - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com + content-type: + - application/vnd.api+json + date: + - Wed, 23 Jul 2025 16:27:16 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + x-frame-options: + - SAMEORIGIN + status: + code: 200 + message: OK +version: 1 diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_8dbba503-cf48-4e82-805e-1d9cb46e5ed3_batch_update_post_466da69c.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_8dbba503-cf48-4e82-805e-1d9cb46e5ed3_batch_update_post_466da69c.yaml new file mode 100644 index 00000000000..81adf342dcc --- /dev/null +++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_8dbba503-cf48-4e82-805e-1d9cb46e5ed3_batch_update_post_466da69c.yaml @@ -0,0 +1,49 @@ +interactions: +- request: + body: '{"data": {"type": "datasets", "id": "8dbba503-cf48-4e82-805e-1d9cb46e5ed3", + "attributes": {"insert_records": [{"input": {"in0": "r0v1", "in1": "r0v2", "in2": + "r0v3"}, "expected_output": {"out0": "r0v4", "out1": "r0v5"}, "metadata": {}}, + {"input": {"in0": "r1v1", "in1": "r1v2", "in2": "r1v3"}, "expected_output": + {"out0": "r1v4", "out1": "r1v5"}, "metadata": {}}], "update_records": [], "delete_records": + []}}}' + headers: + Accept: + - '*/*' + ? !!python/object/apply:multidict._multidict.istr + - Accept-Encoding + : - identity + Connection: + - keep-alive + Content-Length: + - '410' + ? !!python/object/apply:multidict._multidict.istr + - Content-Type + : - application/json + User-Agent: + - python-requests/2.32.3 + method: POST + uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/8dbba503-cf48-4e82-805e-1d9cb46e5ed3/batch_update + response: + body: + string: '{"data":[{"id":"79dc4c72-e4fd-4de7-b46e-419b8072a207","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:48.152449869Z","dataset_id":"8dbba503-cf48-4e82-805e-1d9cb46e5ed3","expected_output":{"out0":"r0v4","out1":"r0v5"},"input":{"in0":"r0v1","in1":"r0v2","in2":"r0v3"},"metadata":{},"updated_at":"2025-07-23T02:50:48.152449869Z","version":1}},{"id":"f0f3fe26-1060-452f-a292-30b1293dd5a1","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:48.152449869Z","dataset_id":"8dbba503-cf48-4e82-805e-1d9cb46e5ed3","expected_output":{"out0":"r1v4","out1":"r1v5"},"input":{"in0":"r1v1","in1":"r1v2","in2":"r1v3"},"metadata":{},"updated_at":"2025-07-23T02:50:48.152449869Z","version":1}}]}' + headers: + content-length: + - '812' + content-security-policy: + - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com + content-type: + - application/vnd.api+json + date: + - Wed, 23 Jul 2025 02:50:48 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + x-frame-options: + - SAMEORIGIN + status: + code: 200 + message: OK +version: 1 diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_8dbba503-cf48-4e82-805e-1d9cb46e5ed3_records_get_46238641.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_8dbba503-cf48-4e82-805e-1d9cb46e5ed3_records_get_46238641.yaml new file mode 100644 index 00000000000..f8b4fdefe78 --- /dev/null +++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_8dbba503-cf48-4e82-805e-1d9cb46e5ed3_records_get_46238641.yaml @@ -0,0 +1,45 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + ? !!python/object/apply:multidict._multidict.istr + - Accept-Encoding + : - identity + Connection: + - keep-alive + ? !!python/object/apply:multidict._multidict.istr + - Content-Length + : - '0' + ? !!python/object/apply:multidict._multidict.istr + - Content-Type + : - application/json + User-Agent: + - python-requests/2.32.3 + method: GET + uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/8dbba503-cf48-4e82-805e-1d9cb46e5ed3/records + response: + body: + string: '{"data":[{"id":"79dc4c72-e4fd-4de7-b46e-419b8072a207","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:48.152449Z","dataset_id":"8dbba503-cf48-4e82-805e-1d9cb46e5ed3","expected_output":{"out0":"r0v4","out1":"r0v5"},"input":{"in0":"r0v1","in1":"r0v2","in2":"r0v3"},"metadata":{},"updated_at":"2025-07-23T02:50:48.152449Z"}},{"id":"f0f3fe26-1060-452f-a292-30b1293dd5a1","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:48.152449Z","dataset_id":"8dbba503-cf48-4e82-805e-1d9cb46e5ed3","expected_output":{"out0":"r1v4","out1":"r1v5"},"input":{"in0":"r1v1","in1":"r1v2","in2":"r1v3"},"metadata":{},"updated_at":"2025-07-23T02:50:48.152449Z"}}],"meta":{"after":""}}' + headers: + content-length: + - '796' + content-security-policy: + - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com + content-type: + - application/vnd.api+json + date: + - Wed, 23 Jul 2025 02:50:52 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + x-frame-options: + - SAMEORIGIN + status: + code: 200 + message: OK +version: 1 diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_acf19ca4-8062-4548-abbf-95b33f55d51d_batch_update_post_b8c84073.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_acf19ca4-8062-4548-abbf-95b33f55d51d_batch_update_post_b8c84073.yaml new file mode 100644 index 00000000000..eb8a50f0d92 --- /dev/null +++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_acf19ca4-8062-4548-abbf-95b33f55d51d_batch_update_post_b8c84073.yaml @@ -0,0 +1,49 @@ +interactions: +- request: + body: '{"data": {"type": "datasets", "id": "acf19ca4-8062-4548-abbf-95b33f55d51d", + "attributes": {"insert_records": [{"input": {"in0": "r0v1", "in1": "r0v2", "in2": + "r0v3"}, "expected_output": {"out0": "r0v4", "out1": "r0v5"}, "metadata": {"m0": + "r0v6"}}, {"input": {"in0": "r1v1", "in1": "r1v2", "in2": "r1v3"}, "expected_output": + {"out0": "r1v4", "out1": "r1v5"}, "metadata": {"m0": "r1v6"}}], "update_records": + [], "delete_records": []}}}' + headers: + Accept: + - '*/*' + ? !!python/object/apply:multidict._multidict.istr + - Accept-Encoding + : - identity + Connection: + - keep-alive + Content-Length: + - '434' + ? !!python/object/apply:multidict._multidict.istr + - Content-Type + : - application/json + User-Agent: + - python-requests/2.32.3 + method: POST + uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/acf19ca4-8062-4548-abbf-95b33f55d51d/batch_update + response: + body: + string: '{"data":[{"id":"016350fb-8b44-4d5d-af57-0c8408bb5d25","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:52.795124332Z","dataset_id":"acf19ca4-8062-4548-abbf-95b33f55d51d","expected_output":{"out0":"r0v4","out1":"r0v5"},"input":{"in0":"r0v1","in1":"r0v2","in2":"r0v3"},"metadata":{"m0":"r0v6"},"updated_at":"2025-07-23T02:50:52.795124332Z","version":1}},{"id":"91fcfd23-040c-40ad-a0b0-f497f10b41e3","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:52.795124332Z","dataset_id":"acf19ca4-8062-4548-abbf-95b33f55d51d","expected_output":{"out0":"r1v4","out1":"r1v5"},"input":{"in0":"r1v1","in1":"r1v2","in2":"r1v3"},"metadata":{"m0":"r1v6"},"updated_at":"2025-07-23T02:50:52.795124332Z","version":1}}]}' + headers: + content-length: + - '834' + content-security-policy: + - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com + content-type: + - application/vnd.api+json + date: + - Wed, 23 Jul 2025 02:50:52 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + x-frame-options: + - SAMEORIGIN + status: + code: 200 + message: OK +version: 1 diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_acf19ca4-8062-4548-abbf-95b33f55d51d_records_get_d7e731ad.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_acf19ca4-8062-4548-abbf-95b33f55d51d_records_get_d7e731ad.yaml new file mode 100644 index 00000000000..51d0723acfa --- /dev/null +++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_acf19ca4-8062-4548-abbf-95b33f55d51d_records_get_d7e731ad.yaml @@ -0,0 +1,45 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + ? !!python/object/apply:multidict._multidict.istr + - Accept-Encoding + : - identity + Connection: + - keep-alive + ? !!python/object/apply:multidict._multidict.istr + - Content-Length + : - '0' + ? !!python/object/apply:multidict._multidict.istr + - Content-Type + : - application/json + User-Agent: + - python-requests/2.32.3 + method: GET + uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/acf19ca4-8062-4548-abbf-95b33f55d51d/records + response: + body: + string: '{"data":[{"id":"016350fb-8b44-4d5d-af57-0c8408bb5d25","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:52.795124Z","dataset_id":"acf19ca4-8062-4548-abbf-95b33f55d51d","expected_output":{"out0":"r0v4","out1":"r0v5"},"input":{"in0":"r0v1","in1":"r0v2","in2":"r0v3"},"metadata":{"m0":"r0v6"},"updated_at":"2025-07-23T02:50:52.795124Z"}},{"id":"91fcfd23-040c-40ad-a0b0-f497f10b41e3","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:52.795124Z","dataset_id":"acf19ca4-8062-4548-abbf-95b33f55d51d","expected_output":{"out0":"r1v4","out1":"r1v5"},"input":{"in0":"r1v1","in1":"r1v2","in2":"r1v3"},"metadata":{"m0":"r1v6"},"updated_at":"2025-07-23T02:50:52.795124Z"}}],"meta":{"after":""}}' + headers: + content-length: + - '818' + content-security-policy: + - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com + content-type: + - application/vnd.api+json + date: + - Wed, 23 Jul 2025 02:50:55 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + x-frame-options: + - SAMEORIGIN + status: + code: 200 + message: OK +version: 1 diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_d7976483.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_d7976483.yaml new file mode 100644 index 00000000000..698ca4baa2d --- /dev/null +++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_d7976483.yaml @@ -0,0 +1,46 @@ +interactions: +- request: + body: '{"data": {"type": "datasets", "attributes": {"type": "soft", "dataset_ids": + ["8dbba503-cf48-4e82-805e-1d9cb46e5ed3"]}}}' + headers: + Accept: + - '*/*' + ? !!python/object/apply:multidict._multidict.istr + - Accept-Encoding + : - identity + Connection: + - keep-alive + Content-Length: + - '119' + ? !!python/object/apply:multidict._multidict.istr + - Content-Type + : - application/json + User-Agent: + - python-requests/2.32.3 + method: POST + uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/delete + response: + body: + string: '{"data":[{"id":"8dbba503-cf48-4e82-805e-1d9cb46e5ed3","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:46.680898Z","current_version":1,"deleted_at":"2025-07-23T02:50:52.35158Z","description":"A + good csv dataset","name":"test-dataset-good-csv","updated_at":"2025-07-23T02:50:48.30675Z"}}]}' + headers: + content-length: + - '357' + content-security-policy: + - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com + content-type: + - application/vnd.api+json + date: + - Wed, 23 Jul 2025 02:50:52 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + x-frame-options: + - SAMEORIGIN + status: + code: 200 + message: OK +version: 1 diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_f8dc510e.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_f8dc510e.yaml new file mode 100644 index 00000000000..10725a15b17 --- /dev/null +++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_f8dc510e.yaml @@ -0,0 +1,46 @@ +interactions: +- request: + body: '{"data": {"type": "datasets", "attributes": {"type": "soft", "dataset_ids": + ["acf19ca4-8062-4548-abbf-95b33f55d51d"]}}}' + headers: + Accept: + - '*/*' + ? !!python/object/apply:multidict._multidict.istr + - Accept-Encoding + : - identity + Connection: + - keep-alive + Content-Length: + - '119' + ? !!python/object/apply:multidict._multidict.istr + - Content-Type + : - application/json + User-Agent: + - python-requests/2.32.3 + method: POST + uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/delete + response: + body: + string: '{"data":[{"id":"acf19ca4-8062-4548-abbf-95b33f55d51d","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:52.663543Z","current_version":1,"deleted_at":"2025-07-23T02:50:55.361839Z","description":"A + good pipe separated csv dataset","name":"test-dataset-good-csv-pipe","updated_at":"2025-07-23T02:50:52.943101Z"}}]}' + headers: + content-length: + - '379' + content-security-policy: + - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com + content-type: + - application/vnd.api+json + date: + - Wed, 23 Jul 2025 02:50:55 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + x-frame-options: + - SAMEORIGIN + status: + code: 200 + message: OK +version: 1 diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_filter_name__test-dataset-good-csv-pipe_get_bcb704ce.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_filter_name__test-dataset-good-csv-pipe_get_bcb704ce.yaml new file mode 100644 index 00000000000..032e3c997f3 --- /dev/null +++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_filter_name__test-dataset-good-csv-pipe_get_bcb704ce.yaml @@ -0,0 +1,46 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + ? !!python/object/apply:multidict._multidict.istr + - Accept-Encoding + : - identity + Connection: + - keep-alive + ? !!python/object/apply:multidict._multidict.istr + - Content-Length + : - '0' + ? !!python/object/apply:multidict._multidict.istr + - Content-Type + : - application/json + User-Agent: + - python-requests/2.32.3 + method: GET + uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets?filter%5Bname%5D=test-dataset-good-csv-pipe + response: + body: + string: '{"data":[{"id":"acf19ca4-8062-4548-abbf-95b33f55d51d","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:52.663543Z","current_version":1,"description":"A + good pipe separated csv dataset","name":"test-dataset-good-csv-pipe","updated_at":"2025-07-23T02:50:52.943101Z"}}],"meta":{"after":""}}' + headers: + content-length: + - '356' + content-security-policy: + - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com + content-type: + - application/vnd.api+json + date: + - Wed, 23 Jul 2025 02:50:55 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + x-frame-options: + - SAMEORIGIN + status: + code: 200 + message: OK +version: 1 diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_filter_name__test-dataset-good-csv_get_989b2028.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_filter_name__test-dataset-good-csv_get_989b2028.yaml new file mode 100644 index 00000000000..02e7ba102ae --- /dev/null +++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_filter_name__test-dataset-good-csv_get_989b2028.yaml @@ -0,0 +1,46 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + ? !!python/object/apply:multidict._multidict.istr + - Accept-Encoding + : - identity + Connection: + - keep-alive + ? !!python/object/apply:multidict._multidict.istr + - Content-Length + : - '0' + ? !!python/object/apply:multidict._multidict.istr + - Content-Type + : - application/json + User-Agent: + - python-requests/2.32.3 + method: GET + uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets?filter%5Bname%5D=test-dataset-good-csv + response: + body: + string: '{"data":[{"id":"8dbba503-cf48-4e82-805e-1d9cb46e5ed3","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:46.680898Z","current_version":1,"description":"A + good csv dataset","name":"test-dataset-good-csv","updated_at":"2025-07-23T02:50:48.30675Z"}}],"meta":{"after":""}}' + headers: + content-length: + - '335' + content-security-policy: + - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com + content-type: + - application/vnd.api+json + date: + - Wed, 23 Jul 2025 02:50:51 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + x-frame-options: + - SAMEORIGIN + status: + code: 200 + message: OK +version: 1 diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_post_027be704.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_post_027be704.yaml new file mode 100644 index 00000000000..232be655b93 --- /dev/null +++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_post_027be704.yaml @@ -0,0 +1,46 @@ +interactions: +- request: + body: '{"data": {"type": "datasets", "attributes": {"name": "test-dataset-good-csv-pipe", + "description": "A good pipe separated csv dataset"}}}' + headers: + Accept: + - '*/*' + ? !!python/object/apply:multidict._multidict.istr + - Accept-Encoding + : - identity + Connection: + - keep-alive + Content-Length: + - '136' + ? !!python/object/apply:multidict._multidict.istr + - Content-Type + : - application/json + User-Agent: + - python-requests/2.32.3 + method: POST + uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets + response: + body: + string: '{"data":{"id":"acf19ca4-8062-4548-abbf-95b33f55d51d","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:52.663543594Z","current_version":0,"description":"A + good pipe separated csv dataset","name":"test-dataset-good-csv-pipe","updated_at":"2025-07-23T02:50:52.663543594Z"}}}' + headers: + content-length: + - '340' + content-security-policy: + - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com + content-type: + - application/vnd.api+json + date: + - Wed, 23 Jul 2025 02:50:52 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + x-frame-options: + - SAMEORIGIN + status: + code: 200 + message: OK +version: 1 diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_post_c1d4ae31.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_post_c1d4ae31.yaml new file mode 100644 index 00000000000..de032063c0c --- /dev/null +++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_post_c1d4ae31.yaml @@ -0,0 +1,46 @@ +interactions: +- request: + body: '{"data": {"type": "datasets", "attributes": {"name": "test-dataset-good-csv", + "description": "A good csv dataset"}}}' + headers: + Accept: + - '*/*' + ? !!python/object/apply:multidict._multidict.istr + - Accept-Encoding + : - identity + Connection: + - keep-alive + Content-Length: + - '116' + ? !!python/object/apply:multidict._multidict.istr + - Content-Type + : - application/json + User-Agent: + - python-requests/2.32.3 + method: POST + uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets + response: + body: + string: '{"data":{"id":"8dbba503-cf48-4e82-805e-1d9cb46e5ed3","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-23T02:50:46.680898186Z","current_version":0,"description":"A + good csv dataset","name":"test-dataset-good-csv","updated_at":"2025-07-23T02:50:46.680898186Z"}}}' + headers: + content-length: + - '320' + content-security-policy: + - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com + content-type: + - application/vnd.api+json + date: + - Wed, 23 Jul 2025 02:50:46 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + x-frame-options: + - SAMEORIGIN + status: + code: 200 + message: OK +version: 1 diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_post_d59b5313.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_post_d59b5313.yaml new file mode 100644 index 00000000000..07a85cd5c60 --- /dev/null +++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_post_d59b5313.yaml @@ -0,0 +1,46 @@ +interactions: +- request: + body: '{"data": {"type": "datasets", "attributes": {"name": "test-dataset-bad-csv", + "description": "not a real csv dataset"}}}' + headers: + Accept: + - '*/*' + ? !!python/object/apply:multidict._multidict.istr + - Accept-Encoding + : - identity + Connection: + - keep-alive + Content-Length: + - '119' + ? !!python/object/apply:multidict._multidict.istr + - Content-Type + : - application/json + User-Agent: + - python-requests/2.32.3 + method: POST + uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets + response: + body: + string: '{"data":{"id":"6890ba6e-8023-414c-bdb1-662c63f9f489","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-07-22T23:10:06.524865Z","current_version":0,"description":"not + a real csv dataset","name":"test-dataset-bad-csv","updated_at":"2025-07-22T23:10:06.524865Z"}}}' + headers: + content-length: + - '317' + content-security-policy: + - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com + content-type: + - application/vnd.api+json + date: + - Wed, 23 Jul 2025 02:50:47 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + x-frame-options: + - SAMEORIGIN + status: + code: 200 + message: OK +version: 1 diff --git a/tests/llmobs/static_files/empty.csv b/tests/llmobs/static_files/empty.csv new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/llmobs/static_files/good_dataset.csv b/tests/llmobs/static_files/good_dataset.csv new file mode 100644 index 00000000000..be4f03d552c --- /dev/null +++ b/tests/llmobs/static_files/good_dataset.csv @@ -0,0 +1,3 @@ +in0,in1,in2,out0,out1,m0 +r0v1,r0v2,r0v3,r0v4,r0v5,r0v6 +r1v1,r1v2,r1v3,r1v4,r1v5,r1v6 diff --git a/tests/llmobs/static_files/good_dataset_pipe_separated.csv b/tests/llmobs/static_files/good_dataset_pipe_separated.csv new file mode 100644 index 00000000000..7a5a698efe1 --- /dev/null +++ b/tests/llmobs/static_files/good_dataset_pipe_separated.csv @@ -0,0 +1,3 @@ +in0|in1|in2|out0|out1|m0 +r0v1|r0v2|r0v3|r0v4|r0v5|r0v6 +r1v1|r1v2|r1v3|r1v4|r1v5|r1v6 diff --git a/tests/llmobs/test_experiments.py b/tests/llmobs/test_experiments.py index 8a2747bcd41..57409feae5e 100644 --- a/tests/llmobs/test_experiments.py +++ b/tests/llmobs/test_experiments.py @@ -28,11 +28,11 @@ def wait_for_backend(): time.sleep(2) -def dummy_task(input_data): +def dummy_task(input_data, config): return input_data -def faulty_task(input_data): +def faulty_task(input_data, config): raise ValueError("This is a test error") @@ -86,6 +86,177 @@ def test_dataset_create_delete(llmobs): llmobs._delete_dataset(dataset_id=dataset._id) +def test_dataset_as_dataframe(llmobs, test_dataset_one_record): + dataset = test_dataset_one_record + df = dataset.as_dataframe() + assert len(df.columns) == 2 + assert df.size == 2 # size is num elements in a series + + +def test_csv_dataset_as_dataframe(llmobs): + test_path = os.path.dirname(__file__) + csv_path = os.path.join(test_path, "static_files/good_dataset.csv") + dataset_id = None + try: + dataset = llmobs.create_dataset_from_csv( + csv_path=csv_path, + dataset_name="test-dataset-good-csv", + description="A good csv dataset", + input_data_columns=["in0", "in1", "in2"], + expected_output_columns=["out0", "out1"], + metadata_columns=["m0"], + ) + dataset_id = dataset._id + assert len(dataset) == 2 + + df = dataset.as_dataframe() + assert len(df.columns) == 6 + assert sorted(df.columns) == [ + ("expected_output", "out0"), + ("expected_output", "out1"), + ("input_data", "in0"), + ("input_data", "in1"), + ("input_data", "in2"), + ("metadata", "m0"), + ] + finally: + if dataset_id: + llmobs._delete_dataset(dataset_id=dataset_id) + + +def test_dataset_csv_missing_input_col(llmobs): + test_path = os.path.dirname(__file__) + csv_path = os.path.join(test_path, "static_files/good_dataset.csv") + with pytest.raises(ValueError, match=re.escape("Input columns not found in CSV header: ['in998', 'in999']")): + llmobs.create_dataset_from_csv( + csv_path=csv_path, + dataset_name="test-dataset-good-csv", + description="A good csv dataset", + input_data_columns=["in998", "in999"], + expected_output_columns=["out0", "out1"], + ) + + +def test_dataset_csv_missing_output_col(llmobs): + test_path = os.path.dirname(__file__) + csv_path = os.path.join(test_path, "static_files/good_dataset.csv") + with pytest.raises(ValueError, match=re.escape("Expected output columns not found in CSV header: ['out999']")): + llmobs.create_dataset_from_csv( + csv_path=csv_path, + dataset_name="test-dataset-good-csv", + description="A good csv dataset", + input_data_columns=["in0", "in1", "in2"], + expected_output_columns=["out999"], + ) + + +def test_dataset_csv_empty_csv(llmobs): + test_path = os.path.dirname(__file__) + csv_path = os.path.join(test_path, "static_files/empty.csv") + with pytest.raises(ValueError, match=re.escape("CSV file appears to be empty or header is missing.")): + llmobs.create_dataset_from_csv( + csv_path=csv_path, + dataset_name="test-dataset-bad-csv", + description="not a real csv dataset", + input_data_columns=["in0", "in1", "in2"], + expected_output_columns=["out0"], + ) + + +def test_dataset_csv(llmobs): + test_path = os.path.dirname(__file__) + csv_path = os.path.join(test_path, "static_files/good_dataset.csv") + dataset_id = None + try: + dataset = llmobs.create_dataset_from_csv( + csv_path=csv_path, + dataset_name="test-dataset-good-csv", + description="A good csv dataset", + input_data_columns=["in0", "in1", "in2"], + expected_output_columns=["out0", "out1"], + ) + dataset_id = dataset._id + assert len(dataset) == 2 + assert len(dataset[0]["input_data"]) == 3 + assert dataset[0]["input_data"]["in0"] == "r0v1" + assert dataset[0]["input_data"]["in1"] == "r0v2" + assert dataset[0]["input_data"]["in2"] == "r0v3" + assert dataset[1]["input_data"]["in0"] == "r1v1" + assert dataset[1]["input_data"]["in1"] == "r1v2" + assert dataset[1]["input_data"]["in2"] == "r1v3" + + assert len(dataset[0]["expected_output"]) == 2 + assert dataset[0]["expected_output"]["out0"] == "r0v4" + assert dataset[0]["expected_output"]["out1"] == "r0v5" + assert dataset[1]["expected_output"]["out0"] == "r1v4" + assert dataset[1]["expected_output"]["out1"] == "r1v5" + + assert dataset.description == "A good csv dataset" + + assert dataset._id is not None + + wait_for_backend() + ds = llmobs.pull_dataset(name=dataset.name) + + assert len(ds) == len(dataset) + assert ds.name == dataset.name + assert ds.description == dataset.description + assert ds._version == 1 + finally: + if dataset_id: + llmobs._delete_dataset(dataset_id=dataset_id) + + +def test_dataset_csv_pipe_separated(llmobs): + test_path = os.path.dirname(__file__) + csv_path = os.path.join(test_path, "static_files/good_dataset_pipe_separated.csv") + dataset_id = None + try: + dataset = llmobs.create_dataset_from_csv( + csv_path=csv_path, + dataset_name="test-dataset-good-csv-pipe", + description="A good pipe separated csv dataset", + input_data_columns=["in0", "in1", "in2"], + expected_output_columns=["out0", "out1"], + metadata_columns=["m0"], + csv_delimiter="|", + ) + dataset_id = dataset._id + assert len(dataset) == 2 + assert len(dataset[0]["input_data"]) == 3 + assert dataset[0]["input_data"]["in0"] == "r0v1" + assert dataset[0]["input_data"]["in1"] == "r0v2" + assert dataset[0]["input_data"]["in2"] == "r0v3" + assert dataset[1]["input_data"]["in0"] == "r1v1" + assert dataset[1]["input_data"]["in1"] == "r1v2" + assert dataset[1]["input_data"]["in2"] == "r1v3" + + assert len(dataset[0]["expected_output"]) == 2 + assert dataset[0]["expected_output"]["out0"] == "r0v4" + assert dataset[0]["expected_output"]["out1"] == "r0v5" + assert dataset[1]["expected_output"]["out0"] == "r1v4" + assert dataset[1]["expected_output"]["out1"] == "r1v5" + + assert len(dataset[0]["metadata"]) == 1 + assert dataset[0]["metadata"]["m0"] == "r0v6" + assert dataset[1]["metadata"]["m0"] == "r1v6" + + assert dataset.description == "A good pipe separated csv dataset" + + assert dataset._id is not None + + wait_for_backend() + ds = llmobs.pull_dataset(name=dataset.name) + + assert len(ds) == len(dataset) + assert ds.name == dataset.name + assert ds.description == dataset.description + assert ds._version == 1 + finally: + if dataset_id: + llmobs._delete_dataset(dataset_id=dataset._id) + + def test_dataset_pull_non_existent(llmobs): with pytest.raises(ValueError): llmobs.pull_dataset(name="test-dataset-non-existent") @@ -218,12 +389,18 @@ def test_experiment_invalid_task_type_raises(llmobs, test_dataset_one_record): def test_experiment_invalid_task_signature_raises(llmobs, test_dataset_one_record): - with pytest.raises(TypeError, match="Task function must accept 'input_data' parameters."): + with pytest.raises(TypeError, match="Task function must have 'input_data' and 'config' parameters."): def my_task(not_input): pass llmobs.experiment("test_experiment", my_task, test_dataset_one_record, [dummy_evaluator]) + with pytest.raises(TypeError, match="Task function must have 'input_data' and 'config' parameters."): + + def my_task(input_data, not_config): + pass + + llmobs.experiment("test_experiment", my_task, test_dataset_one_record, [dummy_evaluator]) def test_experiment_invalid_dataset_raises(llmobs):