diff --git a/integrations/cometapi/pyproject.toml b/integrations/cometapi/pyproject.toml index 2b31e2288f..03289b72da 100644 --- a/integrations/cometapi/pyproject.toml +++ b/integrations/cometapi/pyproject.toml @@ -85,7 +85,7 @@ module = [ ignore_missing_imports = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/cometapi/src/haystack_integrations/components/generators/cometapi/chat/chat_generator.py b/integrations/cometapi/src/haystack_integrations/components/generators/cometapi/chat/chat_generator.py index cc389302e8..5586eb928f 100644 --- a/integrations/cometapi/src/haystack_integrations/components/generators/cometapi/chat/chat_generator.py +++ b/integrations/cometapi/src/haystack_integrations/components/generators/cometapi/chat/chat_generator.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack.components.generators.chat import OpenAIChatGenerator from haystack.dataclasses import StreamingCallbackT @@ -35,12 +35,12 @@ def __init__( api_key: Secret = Secret.from_env_var("COMET_API_KEY"), model: str = "gpt-4o-mini", streaming_callback: Optional[StreamingCallbackT] = None, - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, timeout: Optional[int] = None, max_retries: Optional[int] = None, - tools: Optional[Union[List[Union[Tool, Toolset]], Toolset]] = None, + tools: Optional[Union[list[Union[Tool, Toolset]], Toolset]] = None, tools_strict: bool = False, - http_client_kwargs: Optional[Dict[str, Any]] = None, + http_client_kwargs: Optional[dict[str, Any]] = None, ): api_base_url = "https://api.cometapi.com/v1" diff --git a/integrations/elasticsearch/pyproject.toml b/integrations/elasticsearch/pyproject.toml index b1a337e939..7716242fa1 100644 --- a/integrations/elasticsearch/pyproject.toml +++ b/integrations/elasticsearch/pyproject.toml @@ -83,7 +83,7 @@ disallow_incomplete_defs = true allow-direct-references = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py b/integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py index 8576924c4a..d61409fadf 100644 --- a/integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +++ b/integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document @@ -47,7 +47,7 @@ def __init__( self, *, document_store: ElasticsearchDocumentStore, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, fuzziness: str = "AUTO", top_k: int = 10, scale_score: bool = False, @@ -79,7 +79,7 @@ def __init__( self._scale_score = scale_score self._filter_policy = FilterPolicy.from_str(filter_policy) if isinstance(filter_policy, str) else filter_policy - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -97,7 +97,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "ElasticsearchBM25Retriever": + def from_dict(cls, data: dict[str, Any]) -> "ElasticsearchBM25Retriever": """ Deserializes the component from a dictionary. @@ -115,10 +115,10 @@ def from_dict(cls, data: Dict[str, Any]) -> "ElasticsearchBM25Retriever": data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy) return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( - self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None - ) -> Dict[str, List[Document]]: + self, query: str, filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None + ) -> dict[str, list[Document]]: """ Retrieve documents using the BM25 keyword-based algorithm. @@ -140,10 +140,10 @@ def run( ) return {"documents": docs} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) async def run_async( - self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None - ) -> Dict[str, List[Document]]: + self, query: str, filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None + ) -> dict[str, list[Document]]: """ Asynchronously retrieve documents using the BM25 keyword-based algorithm. diff --git a/integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py b/integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py index c9cb2cbe41..913d861230 100644 --- a/integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +++ b/integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document @@ -49,7 +49,7 @@ def __init__( self, *, document_store: ElasticsearchDocumentStore, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, num_candidates: Optional[int] = None, filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE, @@ -78,7 +78,7 @@ def __init__( self._num_candidates = num_candidates self._filter_policy = FilterPolicy.from_str(filter_policy) if isinstance(filter_policy, str) else filter_policy - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -95,7 +95,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "ElasticsearchEmbeddingRetriever": + def from_dict(cls, data: dict[str, Any]) -> "ElasticsearchEmbeddingRetriever": """ Deserializes the component from a dictionary. @@ -113,10 +113,10 @@ def from_dict(cls, data: Dict[str, Any]) -> "ElasticsearchEmbeddingRetriever": data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy) return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( - self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None - ) -> Dict[str, List[Document]]: + self, query_embedding: list[float], filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None + ) -> dict[str, list[Document]]: """ Retrieve documents using a vector similarity metric. @@ -138,10 +138,10 @@ def run( ) return {"documents": docs} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) async def run_async( - self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None - ) -> Dict[str, List[Document]]: + self, query_embedding: list[float], filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None + ) -> dict[str, list[Document]]: """ Asynchronously retrieve documents using a vector similarity metric. diff --git a/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py b/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py index 0e1dd76b7f..800b103704 100644 --- a/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py +++ b/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py @@ -8,7 +8,7 @@ from collections.abc import Mapping -from typing import Any, Dict, List, Literal, Optional, Tuple, Union +from typing import Any, Literal, Optional, Union import numpy as np from elastic_transport import NodeConfig @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) -Hosts = Union[str, List[Union[str, Mapping[str, Union[str, int]], NodeConfig]]] +Hosts = Union[str, list[Union[str, Mapping[str, Union[str, int]], NodeConfig]]] # document scores are essentially unbounded and will be scaled to values between 0 and 1 if scale_score is set to # True. Scaling uses the expit function (inverse of the logit function) after applying a scaling factor @@ -71,7 +71,7 @@ def __init__( self, *, hosts: Optional[Hosts] = None, - custom_mapping: Optional[Dict[str, Any]] = None, + custom_mapping: Optional[dict[str, Any]] = None, index: str = "default", api_key: Secret = Secret.from_env_var("ELASTIC_API_KEY", strict=False), api_key_id: Secret = Secret.from_env_var("ELASTIC_API_KEY_ID", strict=False), @@ -120,7 +120,7 @@ def __init__( self._kwargs = kwargs self._initialized = False - if self._custom_mapping and not isinstance(self._custom_mapping, Dict): + if self._custom_mapping and not isinstance(self._custom_mapping, dict): msg = "custom_mapping must be a dictionary" raise ValueError(msg) @@ -186,7 +186,7 @@ def _ensure_initialized(self): self._initialized = True - def _handle_auth(self) -> Optional[Union[str, Tuple[str, str]]]: + def _handle_auth(self) -> Optional[Union[str, tuple[str, str]]]: """ Handles authentication for the Elasticsearch client. @@ -206,7 +206,7 @@ def _handle_auth(self) -> Optional[Union[str, Tuple[str, str]]]: """ - api_key: Optional[Union[str, Tuple[str, str]]] # make the type checker happy + api_key: Optional[Union[str, tuple[str, str]]] # make the type checker happy api_key_resolved = self._api_key.resolve_value() api_key_id_resolved = self._api_key_id.resolve_value() @@ -247,7 +247,7 @@ def async_client(self) -> AsyncElasticsearch: assert self._async_client is not None return self._async_client - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -269,7 +269,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "ElasticsearchDocumentStore": + def from_dict(cls, data: dict[str, Any]) -> "ElasticsearchDocumentStore": """ Deserializes the component from a dictionary. @@ -300,7 +300,7 @@ async def count_documents_async(self) -> int: result = await self._async_client.count(index=self._index) # type: ignore return result["count"] - def _search_documents(self, **kwargs: Any) -> List[Document]: + def _search_documents(self, **kwargs: Any) -> list[Document]: """ Calls the Elasticsearch client's search method and handles pagination. """ @@ -308,7 +308,7 @@ def _search_documents(self, **kwargs: Any) -> List[Document]: if top_k is None and "knn" in kwargs and "k" in kwargs["knn"]: top_k = kwargs["knn"]["k"] - documents: List[Document] = [] + documents: list[Document] = [] from_ = 0 # Handle pagination while True: @@ -327,7 +327,7 @@ def _search_documents(self, **kwargs: Any) -> List[Document]: break return documents - async def _search_documents_async(self, **kwargs: Any) -> List[Document]: + async def _search_documents_async(self, **kwargs: Any) -> list[Document]: """ Asynchronously calls the Elasticsearch client's search method and handles pagination. """ @@ -335,7 +335,7 @@ async def _search_documents_async(self, **kwargs: Any) -> List[Document]: if top_k is None and "knn" in kwargs and "k" in kwargs["knn"]: top_k = kwargs["knn"]["k"] - documents: List[Document] = [] + documents: list[Document] = [] from_ = 0 # handle pagination @@ -352,7 +352,7 @@ async def _search_documents_async(self, **kwargs: Any) -> List[Document]: return documents - def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]: + def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: """ The main query method for the document store. It retrieves all documents that match the filters. @@ -370,7 +370,7 @@ def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Doc documents = self._search_documents(query=query) return documents - async def filter_documents_async(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]: + async def filter_documents_async(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: """ Asynchronously retrieves all documents that match the filters. @@ -389,7 +389,7 @@ async def filter_documents_async(self, filters: Optional[Dict[str, Any]] = None) return documents @staticmethod - def _deserialize_document(hit: Dict[str, Any]) -> Document: + def _deserialize_document(hit: dict[str, Any]) -> Document: """ Creates a `Document` from the search hit provided. This is mostly useful in self.filter_documents(). @@ -404,7 +404,7 @@ def _deserialize_document(hit: Dict[str, Any]) -> Document: return Document.from_dict(data) - def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int: + def write_documents(self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int: """ Writes `Document`s to Elasticsearch. @@ -482,7 +482,7 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D return documents_written async def write_documents_async( - self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE + self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE ) -> int: """ Asynchronously writes `Document`s to Elasticsearch. @@ -550,7 +550,7 @@ async def write_documents_async( msg = f"Failed to write documents to Elasticsearch: {e!s}" raise DocumentStoreError(msg) from e - def delete_documents(self, document_ids: List[str]) -> None: + def delete_documents(self, document_ids: list[str]) -> None: """ Deletes all documents with a matching document_ids from the document store. @@ -564,7 +564,7 @@ def delete_documents(self, document_ids: List[str]) -> None: raise_on_error=False, ) - def _prepare_delete_all_request(self, *, is_async: bool) -> Dict[str, Any]: + def _prepare_delete_all_request(self, *, is_async: bool) -> dict[str, Any]: return { "index": self._index, "body": {"query": {"match_all": {}}}, # Delete all documents @@ -572,7 +572,7 @@ def _prepare_delete_all_request(self, *, is_async: bool) -> Dict[str, Any]: "refresh": True, # Ensure changes are visible immediately } - async def delete_documents_async(self, document_ids: List[str]) -> None: + async def delete_documents_async(self, document_ids: list[str]) -> None: """ Asynchronously deletes all documents with a matching document_ids from the document store. @@ -681,11 +681,11 @@ def _bm25_retrieval( self, query: str, *, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, fuzziness: str = "AUTO", top_k: int = 10, scale_score: bool = False, - ) -> List[Document]: + ) -> list[Document]: """ Retrieves documents using BM25 retrieval. @@ -700,7 +700,7 @@ def _bm25_retrieval( msg = "query must be a non empty string" raise ValueError(msg) - body: Dict[str, Any] = { + body: dict[str, Any] = { "size": top_k, "query": { "bool": { @@ -735,11 +735,11 @@ async def _bm25_retrieval_async( self, query: str, *, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, fuzziness: str = "AUTO", top_k: int = 10, scale_score: bool = False, - ) -> List[Document]: + ) -> list[Document]: """ Asynchronously retrieves documents using BM25 retrieval. @@ -789,12 +789,12 @@ async def _bm25_retrieval_async( def _embedding_retrieval( self, - query_embedding: List[float], + query_embedding: list[float], *, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, num_candidates: Optional[int] = None, - ) -> List[Document]: + ) -> list[Document]: """ Retrieves documents using dense vector similarity search. @@ -811,7 +811,7 @@ def _embedding_retrieval( if not num_candidates: num_candidates = top_k * 10 - body: Dict[str, Any] = { + body: dict[str, Any] = { "knn": { "field": "embedding", "query_vector": query_embedding, @@ -828,12 +828,12 @@ def _embedding_retrieval( async def _embedding_retrieval_async( self, - query_embedding: List[float], + query_embedding: list[float], *, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, num_candidates: Optional[int] = None, - ) -> List[Document]: + ) -> list[Document]: """ Asynchronously retrieves documents using dense vector similarity search. diff --git a/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/filters.py b/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/filters.py index 1e2c2f29f9..32fa3b52f9 100644 --- a/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/filters.py +++ b/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/filters.py @@ -2,12 +2,12 @@ # # SPDX-License-Identifier: Apache-2.0 from datetime import datetime -from typing import Any, Dict, List +from typing import Any from haystack.errors import FilterError -def _normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]: +def _normalize_filters(filters: dict[str, Any]) -> dict[str, Any]: """ Converts Haystack filters in ElasticSearch compatible filters. """ @@ -20,7 +20,7 @@ def _normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]: return _parse_logical_condition(filters) -def _parse_logical_condition(condition: Dict[str, Any]) -> Dict[str, Any]: +def _parse_logical_condition(condition: dict[str, Any]) -> dict[str, Any]: if "operator" not in condition: msg = f"'operator' key missing in {condition}" raise FilterError(msg) @@ -43,7 +43,7 @@ def _parse_logical_condition(condition: Dict[str, Any]) -> Dict[str, Any]: raise FilterError(msg) -def _equal(field: str, value: Any) -> Dict[str, Any]: +def _equal(field: str, value: Any) -> dict[str, Any]: if value is None: return {"bool": {"must_not": {"exists": {"field": field}}}} @@ -62,7 +62,7 @@ def _equal(field: str, value: Any) -> Dict[str, Any]: return {"term": {field: value}} -def _not_equal(field: str, value: Any) -> Dict[str, Any]: +def _not_equal(field: str, value: Any) -> dict[str, Any]: if value is None: return {"exists": {"field": field}} @@ -75,7 +75,7 @@ def _not_equal(field: str, value: Any) -> Dict[str, Any]: return {"bool": {"must_not": {"term": {field: value}}}} -def _greater_than(field: str, value: Any) -> Dict[str, Any]: +def _greater_than(field: str, value: Any) -> dict[str, Any]: if value is None: # When the value is None and '>' is used we create a filter that would return a Document # if it has a field set and not set at the same time. @@ -97,7 +97,7 @@ def _greater_than(field: str, value: Any) -> Dict[str, Any]: return {"range": {field: {"gt": value}}} -def _greater_than_equal(field: str, value: Any) -> Dict[str, Any]: +def _greater_than_equal(field: str, value: Any) -> dict[str, Any]: if value is None: # When the value is None and '>=' is used we create a filter that would return a Document # if it has a field set and not set at the same time. @@ -119,7 +119,7 @@ def _greater_than_equal(field: str, value: Any) -> Dict[str, Any]: return {"range": {field: {"gte": value}}} -def _less_than(field: str, value: Any) -> Dict[str, Any]: +def _less_than(field: str, value: Any) -> dict[str, Any]: if value is None: # When the value is None and '<' is used we create a filter that would return a Document # if it has a field set and not set at the same time. @@ -141,7 +141,7 @@ def _less_than(field: str, value: Any) -> Dict[str, Any]: return {"range": {field: {"lt": value}}} -def _less_than_equal(field: str, value: Any) -> Dict[str, Any]: +def _less_than_equal(field: str, value: Any) -> dict[str, Any]: if value is None: # When the value is None and '<=' is used we create a filter that would return a Document # if it has a field set and not set at the same time. @@ -163,14 +163,14 @@ def _less_than_equal(field: str, value: Any) -> Dict[str, Any]: return {"range": {field: {"lte": value}}} -def _in(field: str, value: Any) -> Dict[str, Any]: +def _in(field: str, value: Any) -> dict[str, Any]: if not isinstance(value, list): msg = f"{field}'s value must be a list when using 'in' or 'not in' comparators" raise FilterError(msg) return {"terms": {field: value}} -def _not_in(field: str, value: Any) -> Dict[str, Any]: +def _not_in(field: str, value: Any) -> dict[str, Any]: if not isinstance(value, list): msg = f"{field}'s value must be a list when using 'in' or 'not in' comparators" raise FilterError(msg) @@ -189,7 +189,7 @@ def _not_in(field: str, value: Any) -> Dict[str, Any]: } -def _parse_comparison_condition(condition: Dict[str, Any]) -> Dict[str, Any]: +def _parse_comparison_condition(condition: dict[str, Any]) -> dict[str, Any]: if "field" not in condition: # 'field' key is only found in comparison dictionaries. # We assume this is a logic dictionary since it's not present. @@ -215,7 +215,7 @@ def _parse_comparison_condition(condition: Dict[str, Any]) -> Dict[str, Any]: return COMPARISON_OPERATORS[operator](field, value) -def _normalize_ranges(conditions: List[Dict[str, Any]]) -> List[Dict[str, Any]]: +def _normalize_ranges(conditions: list[dict[str, Any]]) -> list[dict[str, Any]]: """ Merges range conditions acting on a same field. @@ -235,7 +235,7 @@ def _normalize_ranges(conditions: List[Dict[str, Any]]) -> List[Dict[str, Any]]: range_conditions = [next(iter(c["range"].items())) for c in conditions if "range" in c] if range_conditions: conditions = [c for c in conditions if "range" not in c] - range_conditions_dict: Dict[str, Any] = {} + range_conditions_dict: dict[str, Any] = {} for field_name, comparison in range_conditions: if field_name not in range_conditions_dict: range_conditions_dict[field_name] = {} diff --git a/integrations/elasticsearch/tests/test_document_store.py b/integrations/elasticsearch/tests/test_document_store.py index 81dd81d054..2eab29f8ce 100644 --- a/integrations/elasticsearch/tests/test_document_store.py +++ b/integrations/elasticsearch/tests/test_document_store.py @@ -4,7 +4,6 @@ import random import time -from typing import List from unittest.mock import Mock, patch import pytest @@ -245,7 +244,7 @@ def document_store(self, request): store.client.options(ignore_status=[400, 404]).indices.delete(index=index) store.client.close() - def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): + def assert_documents_are_equal(self, received: list[Document], expected: list[Document]): """ The ElasticSearchDocumentStore.filter_documents() method returns a Documents with their score set. We don't want to compare the score, so we set it to None before comparing the documents. diff --git a/integrations/fastembed/pyproject.toml b/integrations/fastembed/pyproject.toml index c13bc6dac8..af322fa8b3 100644 --- a/integrations/fastembed/pyproject.toml +++ b/integrations/fastembed/pyproject.toml @@ -75,7 +75,7 @@ check_untyped_defs = true disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/embedding_backend/fastembed_backend.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/embedding_backend/fastembed_backend.py index 65b0ea296d..214c9715d6 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/embedding_backend/fastembed_backend.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/embedding_backend/fastembed_backend.py @@ -1,4 +1,4 @@ -from typing import Any, ClassVar, Dict, List, Optional +from typing import Any, ClassVar, Optional from haystack.dataclasses.sparse_embedding import SparseEmbedding from tqdm import tqdm @@ -12,7 +12,7 @@ class _FastembedEmbeddingBackendFactory: Factory class to create instances of fastembed embedding backends. """ - _instances: ClassVar[Dict[str, "_FastembedEmbeddingBackend"]] = {} + _instances: ClassVar[dict[str, "_FastembedEmbeddingBackend"]] = {} @staticmethod def get_embedding_backend( @@ -49,7 +49,7 @@ def __init__( model_name=model_name, cache_dir=cache_dir, threads=threads, local_files_only=local_files_only ) - def embed(self, data: List[str], progress_bar: bool = True, **kwargs: Any) -> List[List[float]]: + def embed(self, data: list[str], progress_bar: bool = True, **kwargs: Any) -> list[list[float]]: # the embed method returns a Iterable[np.ndarray], so we convert it to a list of lists embeddings = [] embeddings_iterable = self.model.embed(data, **kwargs) @@ -65,7 +65,7 @@ class _FastembedSparseEmbeddingBackendFactory: Factory class to create instances of fastembed sparse embedding backends. """ - _instances: ClassVar[Dict[str, "_FastembedSparseEmbeddingBackend"]] = {} + _instances: ClassVar[dict[str, "_FastembedSparseEmbeddingBackend"]] = {} @staticmethod def get_embedding_backend( @@ -73,7 +73,7 @@ def get_embedding_backend( cache_dir: Optional[str] = None, threads: Optional[int] = None, local_files_only: bool = False, - model_kwargs: Optional[Dict[str, Any]] = None, + model_kwargs: Optional[dict[str, Any]] = None, ) -> "_FastembedSparseEmbeddingBackend": embedding_backend_id = f"{model_name}{cache_dir}{threads}{local_files_only}{model_kwargs}" @@ -102,7 +102,7 @@ def __init__( cache_dir: Optional[str] = None, threads: Optional[int] = None, local_files_only: bool = False, - model_kwargs: Optional[Dict[str, Any]] = None, + model_kwargs: Optional[dict[str, Any]] = None, ): model_kwargs = model_kwargs or {} @@ -114,7 +114,7 @@ def __init__( **model_kwargs, ) - def embed(self, data: List[str], progress_bar: bool = True, **kwargs: Any) -> List[SparseEmbedding]: + def embed(self, data: list[str], progress_bar: bool = True, **kwargs: Any) -> list[SparseEmbedding]: # The embed method returns a Iterable[SparseEmbedding], so we convert to Haystack SparseEmbedding type. # Each SparseEmbedding contains an `indices` key containing a list of int and # an `values` key containing a list of floats. diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py index 1424a506fa..bcd1a6111d 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Optional from haystack import Document, component, default_to_dict @@ -66,7 +66,7 @@ def __init__( progress_bar: bool = True, parallel: Optional[int] = None, local_files_only: bool = False, - meta_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", ): """ @@ -103,7 +103,7 @@ def __init__( self.meta_fields_to_embed = meta_fields_to_embed or [] self.embedding_separator = embedding_separator - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. :returns: @@ -136,7 +136,7 @@ def warm_up(self): local_files_only=self.local_files_only, ) - def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: + def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]: texts_to_embed = [] for doc in documents: meta_values_to_embed = [ @@ -149,8 +149,8 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: texts_to_embed.append(text_to_embed) return texts_to_embed - @component.output_types(documents=List[Document]) - def run(self, documents: List[Document]) -> Dict[str, List[Document]]: + @component.output_types(documents=list[Document]) + def run(self, documents: list[Document]) -> dict[str, list[Document]]: """ Embeds a list of Documents. diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py index 242cef70a8..40137b4ea9 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Optional from haystack import Document, component, default_to_dict @@ -60,9 +60,9 @@ def __init__( progress_bar: bool = True, parallel: Optional[int] = None, local_files_only: bool = False, - meta_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", - model_kwargs: Optional[Dict[str, Any]] = None, + model_kwargs: Optional[dict[str, Any]] = None, ): """ Create an FastembedDocumentEmbedder component. @@ -96,7 +96,7 @@ def __init__( self.embedding_separator = embedding_separator self.model_kwargs = model_kwargs - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. :returns: @@ -129,7 +129,7 @@ def warm_up(self): model_kwargs=self.model_kwargs, ) - def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: + def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]: texts_to_embed = [] for doc in documents: meta_values_to_embed = [ @@ -140,8 +140,8 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: texts_to_embed.append(text_to_embed) return texts_to_embed - @component.output_types(documents=List[Document]) - def run(self, documents: List[Document]) -> Dict[str, List[Document]]: + @component.output_types(documents=list[Document]) + def run(self, documents: list[Document]) -> dict[str, list[Document]]: """ Embeds a list of Documents. diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py index 71342565be..cac95f697a 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional +from typing import Any, Optional from haystack import component, default_to_dict from haystack.dataclasses.sparse_embedding import SparseEmbedding @@ -35,7 +35,7 @@ def __init__( progress_bar: bool = True, parallel: Optional[int] = None, local_files_only: bool = False, - model_kwargs: Optional[Dict[str, Any]] = None, + model_kwargs: Optional[dict[str, Any]] = None, ): """ Create a FastembedSparseTextEmbedder component. @@ -62,7 +62,7 @@ def __init__( self.local_files_only = local_files_only self.model_kwargs = model_kwargs - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -94,7 +94,7 @@ def warm_up(self): ) @component.output_types(sparse_embedding=SparseEmbedding) - def run(self, text: str) -> Dict[str, SparseEmbedding]: + def run(self, text: str) -> dict[str, SparseEmbedding]: """ Embeds text using the Fastembed model. diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py index a9fbbcb5fe..0c6bb646f3 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Optional from haystack import component, default_to_dict @@ -64,7 +64,7 @@ def __init__( self.parallel = parallel self.local_files_only = local_files_only - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -95,8 +95,8 @@ def warm_up(self): local_files_only=self.local_files_only, ) - @component.output_types(embedding=List[float]) - def run(self, text: str) -> Dict[str, List[float]]: + @component.output_types(embedding=list[float]) + def run(self, text: str) -> dict[str, list[float]]: """ Embeds text using the Fastembed model. diff --git a/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py b/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py index 3006135a00..9b55fbb4e8 100644 --- a/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py +++ b/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Optional from haystack import Document, component, default_from_dict, default_to_dict, logging @@ -40,7 +40,7 @@ def __init__( batch_size: int = 64, parallel: Optional[int] = None, local_files_only: bool = False, - meta_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[list[str]] = None, meta_data_separator: str = "\n", ): """ @@ -78,7 +78,7 @@ def __init__( self.meta_data_separator = meta_data_separator self._model: Optional[TextCrossEncoder] = None - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -99,7 +99,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "FastembedRanker": + def from_dict(cls, data: dict[str, Any]) -> "FastembedRanker": """ Deserializes the component from a dictionary. @@ -122,7 +122,7 @@ def warm_up(self): local_files_only=self.local_files_only, ) - def _prepare_fastembed_input_docs(self, documents: List[Document]) -> List[str]: + def _prepare_fastembed_input_docs(self, documents: list[Document]) -> list[str]: """ Prepare the input by concatenating the document text with the metadata fields specified. :param documents: The list of Document objects. @@ -139,8 +139,8 @@ def _prepare_fastembed_input_docs(self, documents: List[Document]) -> List[str]: return concatenated_input_list - @component.output_types(documents=List[Document]) - def run(self, query: str, documents: List[Document], top_k: Optional[int] = None) -> Dict[str, List[Document]]: + @component.output_types(documents=list[Document]) + def run(self, query: str, documents: list[Document], top_k: Optional[int] = None) -> dict[str, list[Document]]: """ Returns a list of documents ranked by their similarity to the given query, using FastEmbed. diff --git a/integrations/github/pyproject.toml b/integrations/github/pyproject.toml index c9834638f2..fd7d9eb9aa 100644 --- a/integrations/github/pyproject.toml +++ b/integrations/github/pyproject.toml @@ -77,7 +77,7 @@ check_untyped_defs = true disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py index fcbe080787..edbfdad8dd 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from base64 import b64decode, b64encode from enum import Enum -from typing import Any, Dict, Optional, Union +from typing import Any, Optional, Union import requests from haystack import component, default_from_dict, default_to_dict, logging @@ -145,7 +145,7 @@ def _update_file(self, owner: str, repo: str, path: str, content: str, message: def _check_last_commit(self, owner: str, repo: str, branch: str) -> bool: """Check if last commit was made by the current token user.""" url = f"https://api.github.com/repos/{owner}/{repo}/commits" - params: Dict[str, Union[str, int]] = {"per_page": 1, "sha": branch} + params: dict[str, Union[str, int]] = {"per_page": 1, "sha": branch} response = requests.get(url, headers=self._get_request_headers(), params=params, timeout=10) response.raise_for_status() last_commit = response.json()[0] @@ -158,7 +158,7 @@ def _check_last_commit(self, owner: str, repo: str, branch: str) -> bool: return commit_author == current_user - def _edit_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + def _edit_file(self, owner: str, repo: str, payload: dict[str, str], branch: str) -> str: """Handle file editing.""" try: content, sha = self._get_file_content(owner, repo, payload["path"], branch) @@ -180,7 +180,7 @@ def _edit_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str raise return f"Error: {e!s}" - def _undo_changes(self, owner: str, repo: str, payload: Dict[str, Any], branch: str) -> str: + def _undo_changes(self, owner: str, repo: str, payload: dict[str, Any], branch: str) -> str: """Handle undoing changes.""" try: if not self._check_last_commit(owner, repo, branch): @@ -191,7 +191,7 @@ def _undo_changes(self, owner: str, repo: str, payload: Dict[str, Any], branch: commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits" # Get the previous commit SHA - params: Dict[str, Union[str, int]] = {"per_page": 2, "sha": branch} + params: dict[str, Union[str, int]] = {"per_page": 2, "sha": branch} commits = requests.get(commits_url, headers=self._get_request_headers(), params=params, timeout=10).json() previous_sha = commits[1]["sha"] @@ -207,7 +207,7 @@ def _undo_changes(self, owner: str, repo: str, payload: Dict[str, Any], branch: raise return f"Error: {e!s}" - def _create_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + def _create_file(self, owner: str, repo: str, payload: dict[str, str], branch: str) -> str: """Handle file creation.""" try: url = f"https://api.github.com/repos/{owner}/{repo}/contents/{payload['path']}" @@ -224,7 +224,7 @@ def _create_file(self, owner: str, repo: str, payload: Dict[str, str], branch: s raise return f"Error: {e!s}" - def _delete_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + def _delete_file(self, owner: str, repo: str, payload: dict[str, str], branch: str) -> str: """Handle file deletion.""" try: _, sha = self._get_file_content(owner, repo, payload["path"], branch) @@ -245,10 +245,10 @@ def _delete_file(self, owner: str, repo: str, payload: Dict[str, str], branch: s def run( self, command: Union[Command, str], - payload: Dict[str, Any], + payload: dict[str, Any], repo: Optional[str] = None, branch: Optional[str] = None, - ) -> Dict[str, str]: + ) -> dict[str, str]: """ Process GitHub file operations. @@ -287,7 +287,7 @@ def run( result = command_handlers[command](owner, repo_name, payload, working_branch) return {"result": result} - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """Serialize the component to a dictionary.""" return default_to_dict( self, @@ -298,7 +298,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubFileEditor": + def from_dict(cls, data: dict[str, Any]) -> "GitHubFileEditor": """Deserialize the component from a dictionary.""" init_params = data["init_parameters"] deserialize_secrets_inplace(init_params, keys=["github_token"]) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py index 78a770c9d2..9f3eaca6ab 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 import re -from typing import Any, Dict +from typing import Any import requests from haystack import component, default_from_dict, default_to_dict, logging @@ -115,7 +115,7 @@ def _post_comment(self, owner: str, repo: str, issue_number: int, comment: str) return False - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize the component to a dictionary. @@ -129,7 +129,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueCommenter": + def from_dict(cls, data: dict[str, Any]) -> "GitHubIssueCommenter": """ Deserialize the component from a dictionary. diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py index a148bd4969..2182400594 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 import re -from typing import Any, Dict, List, Optional +from typing import Any, Optional import requests from haystack import Document, component, default_from_dict, default_to_dict, logging @@ -154,7 +154,7 @@ def _create_comment_document(self, comment_data: dict, issue_number: int) -> Doc }, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize the component to a dictionary. @@ -168,7 +168,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueViewer": + def from_dict(cls, data: dict[str, Any]) -> "GitHubIssueViewer": """ Deserialize the component from a dictionary. @@ -179,7 +179,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueViewer": deserialize_secrets_inplace(init_params, keys=["github_token"]) return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run(self, url: str) -> dict: """ Process a GitHub issue URL and return documents. diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py index 6b36087634..72193a63dd 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 import re -from typing import Any, Dict, Optional +from typing import Any, Optional import requests from haystack import component, default_from_dict, default_to_dict, logging @@ -215,7 +215,7 @@ def _create_pull_request( @component.output_types(result=str) def run( self, issue_url: str, title: str, branch: str, base: str, body: str = "", draft: bool = False - ) -> Dict[str, str]: + ) -> dict[str, str]: """ Create a new pull request from your fork to the original repository, linked to the specified issue. @@ -263,7 +263,7 @@ def run( raise return {"result": f"Error: {e!s}"} - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """Serialize the component to a dictionary.""" return default_to_dict( self, @@ -272,7 +272,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubPRCreator": + def from_dict(cls, data: dict[str, Any]) -> "GitHubPRCreator": """Deserialize the component from a dictionary.""" init_params = data["init_parameters"] deserialize_secrets_inplace(init_params, keys=["github_token"]) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py index e0b97281f6..021c60dfeb 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import re import time -from typing import Any, Dict, Optional +from typing import Any, Optional import requests from haystack import component, default_from_dict, default_to_dict, logging @@ -221,7 +221,7 @@ def _create_fork(self, owner: str, repo: str) -> str: fork_data = response.json() return f"{fork_data['owner']['login']}/{fork_data['name']}" - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize the component to a dictionary. @@ -239,7 +239,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoForker": + def from_dict(cls, data: dict[str, Any]) -> "GitHubRepoForker": """ Deserialize the component from a dictionary. diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py index 70856a1817..692c823a5a 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import base64 from dataclasses import dataclass -from typing import Any, Dict, List, Optional +from typing import Any, Optional import requests from haystack import Document, component, default_from_dict, default_to_dict, logging @@ -114,7 +114,7 @@ def _get_request_headers(self) -> dict: headers["Authorization"] = f"Bearer {token_value}" return headers - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize the component to a dictionary. @@ -130,7 +130,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoViewer": + def from_dict(cls, data: dict[str, Any]) -> "GitHubRepoViewer": """ Deserialize the component from a dictionary. @@ -181,7 +181,7 @@ def _create_file_document(self, item: GitHubItem) -> Document: }, ) - def _create_directory_documents(self, items: List[GitHubItem]) -> List[Document]: + def _create_directory_documents(self, items: list[GitHubItem]) -> list[Document]: """Create a list of Documents from directory contents""" return [ Document( @@ -206,8 +206,8 @@ def _create_error_document(self, error: Exception, path: str) -> Document: }, ) - @component.output_types(documents=List[Document]) - def run(self, path: str, repo: Optional[str] = None, branch: Optional[str] = None) -> Dict[str, List[Document]]: + @component.output_types(documents=list[Document]) + def run(self, path: str, repo: Optional[str] = None, branch: Optional[str] = None) -> dict[str, list[Document]]: """ Process a GitHub repository path and return documents. diff --git a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py index eea05433bd..59c18530d6 100644 --- a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Optional, Union from haystack.core.serialization import generate_qualified_class_name from haystack.tools import ComponentTool @@ -22,14 +22,14 @@ def __init__( *, name: Optional[str] = "file_editor", description: Optional[str] = FILE_EDITOR_PROMPT, - parameters: Optional[Dict[str, Any]] = FILE_EDITOR_SCHEMA, + parameters: Optional[dict[str, Any]] = FILE_EDITOR_SCHEMA, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), repo: Optional[str] = None, branch: str = "main", raise_on_failure: bool = True, - outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, - inputs_from_state: Optional[Dict[str, str]] = None, - outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, + outputs_to_string: Optional[dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[dict[str, str]] = None, + outputs_to_state: Optional[dict[str, dict[str, Union[str, Callable]]]] = None, ): """ Initialize the GitHub file editor tool. @@ -86,7 +86,7 @@ def __init__( outputs_to_state=outputs_to_state, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the tool to a dictionary. @@ -110,7 +110,7 @@ def to_dict(self) -> Dict[str, Any]: return {"type": generate_qualified_class_name(type(self)), "data": serialized} @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubFileEditorTool": + def from_dict(cls, data: dict[str, Any]) -> "GitHubFileEditorTool": """ Deserializes the tool from a dictionary. diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py index 4408b10877..00afc4233c 100644 --- a/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Optional, Union from haystack.core.serialization import generate_qualified_class_name from haystack.tools import ComponentTool @@ -22,13 +22,13 @@ def __init__( *, name: Optional[str] = "issue_commenter", description: Optional[str] = ISSUE_COMMENTER_PROMPT, - parameters: Optional[Dict[str, Any]] = ISSUE_COMMENTER_SCHEMA, + parameters: Optional[dict[str, Any]] = ISSUE_COMMENTER_SCHEMA, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True, retry_attempts: int = 2, - outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, - inputs_from_state: Optional[Dict[str, str]] = None, - outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, + outputs_to_string: Optional[dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[dict[str, str]] = None, + outputs_to_state: Optional[dict[str, dict[str, Union[str, Callable]]]] = None, ): """ Initialize the GitHub issue commenter tool. @@ -82,7 +82,7 @@ def __init__( outputs_to_state=outputs_to_state, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the tool to a dictionary. @@ -105,7 +105,7 @@ def to_dict(self) -> Dict[str, Any]: return {"type": generate_qualified_class_name(type(self)), "data": serialized} @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueCommenterTool": + def from_dict(cls, data: dict[str, Any]) -> "GitHubIssueCommenterTool": """ Deserializes the tool from a dictionary. diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py index faa22e9d2d..171fdf99ce 100644 --- a/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Optional, Union from haystack.core.serialization import generate_qualified_class_name from haystack.tools import ComponentTool @@ -22,13 +22,13 @@ def __init__( *, name: Optional[str] = "issue_viewer", description: Optional[str] = ISSUE_VIEWER_PROMPT, - parameters: Optional[Dict[str, Any]] = ISSUE_VIEWER_SCHEMA, + parameters: Optional[dict[str, Any]] = ISSUE_VIEWER_SCHEMA, github_token: Optional[Secret] = None, raise_on_failure: bool = True, retry_attempts: int = 2, - outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, - inputs_from_state: Optional[Dict[str, str]] = None, - outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, + outputs_to_string: Optional[dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[dict[str, str]] = None, + outputs_to_state: Optional[dict[str, dict[str, Union[str, Callable]]]] = None, ): """ Initialize the GitHub issue viewer tool. @@ -82,7 +82,7 @@ def __init__( outputs_to_state=outputs_to_state, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the tool to a dictionary. @@ -105,7 +105,7 @@ def to_dict(self) -> Dict[str, Any]: return {"type": generate_qualified_class_name(type(self)), "data": serialized} @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueViewerTool": + def from_dict(cls, data: dict[str, Any]) -> "GitHubIssueViewerTool": """ Deserializes the tool from a dictionary. diff --git a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py index 955f1ebbac..984a34880d 100644 --- a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Optional, Union from haystack.core.serialization import generate_qualified_class_name from haystack.tools import ComponentTool @@ -22,12 +22,12 @@ def __init__( *, name: Optional[str] = "pr_creator", description: Optional[str] = PR_CREATOR_PROMPT, - parameters: Optional[Dict[str, Any]] = PR_CREATOR_SCHEMA, + parameters: Optional[dict[str, Any]] = PR_CREATOR_SCHEMA, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True, - outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, - inputs_from_state: Optional[Dict[str, str]] = None, - outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, + outputs_to_string: Optional[dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[dict[str, str]] = None, + outputs_to_state: Optional[dict[str, dict[str, Union[str, Callable]]]] = None, ): """ Initialize the GitHub PR creator tool. @@ -78,7 +78,7 @@ def __init__( outputs_to_state=outputs_to_state, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the tool to a dictionary. @@ -99,7 +99,7 @@ def to_dict(self) -> Dict[str, Any]: return {"type": generate_qualified_class_name(type(self)), "data": serialized} @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubPRCreatorTool": + def from_dict(cls, data: dict[str, Any]) -> "GitHubPRCreatorTool": """ Deserializes the tool from a dictionary. diff --git a/integrations/github/src/haystack_integrations/tools/github/repo_forker_tool.py b/integrations/github/src/haystack_integrations/tools/github/repo_forker_tool.py index e38f4ef1df..a662eb33c3 100644 --- a/integrations/github/src/haystack_integrations/tools/github/repo_forker_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/repo_forker_tool.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Optional, Union from haystack.core.serialization import generate_qualified_class_name from haystack.tools import ComponentTool @@ -22,12 +22,12 @@ def __init__( *, name: Optional[str] = "repo_forker", description: Optional[str] = REPO_FORKER_PROMPT, - parameters: Optional[Dict[str, Any]] = REPO_FORKER_SCHEMA, + parameters: Optional[dict[str, Any]] = REPO_FORKER_SCHEMA, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True, - outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, - inputs_from_state: Optional[Dict[str, str]] = None, - outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, + outputs_to_string: Optional[dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[dict[str, str]] = None, + outputs_to_state: Optional[dict[str, dict[str, Union[str, Callable]]]] = None, ): """ Initialize the GitHub Repo Forker tool. @@ -79,7 +79,7 @@ def __init__( outputs_to_state=self.outputs_to_state, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the tool to a dictionary. @@ -101,7 +101,7 @@ def to_dict(self) -> Dict[str, Any]: return {"type": generate_qualified_class_name(type(self)), "data": serialized} @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoForkerTool": + def from_dict(cls, data: dict[str, Any]) -> "GitHubRepoForkerTool": """ Deserializes the tool from a dictionary. diff --git a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py index bb24a1f1b4..be6eb153dc 100644 --- a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Optional, Union from haystack.core.serialization import generate_qualified_class_name from haystack.tools import ComponentTool @@ -22,15 +22,15 @@ def __init__( *, name: Optional[str] = "repo_viewer", description: Optional[str] = REPO_VIEWER_PROMPT, - parameters: Optional[Dict[str, Any]] = REPO_VIEWER_SCHEMA, + parameters: Optional[dict[str, Any]] = REPO_VIEWER_SCHEMA, github_token: Optional[Secret] = None, repo: Optional[str] = None, branch: str = "main", raise_on_failure: bool = True, max_file_size: int = 1_000_000, # 1MB default limit - outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, - inputs_from_state: Optional[Dict[str, str]] = None, - outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, + outputs_to_string: Optional[dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[dict[str, str]] = None, + outputs_to_state: Optional[dict[str, dict[str, Union[str, Callable]]]] = None, ): """ Initialize the GitHub repository viewer tool. @@ -95,7 +95,7 @@ def __init__( outputs_to_state=self.outputs_to_state, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the tool to a dictionary. @@ -120,7 +120,7 @@ def to_dict(self) -> Dict[str, Any]: return {"type": generate_qualified_class_name(type(self)), "data": serialized} @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoViewerTool": + def from_dict(cls, data: dict[str, Any]) -> "GitHubRepoViewerTool": """ Deserializes the tool from a dictionary. diff --git a/integrations/github/src/haystack_integrations/tools/github/utils.py b/integrations/github/src/haystack_integrations/tools/github/utils.py index e4a57aab6b..4321a41347 100644 --- a/integrations/github/src/haystack_integrations/tools/github/utils.py +++ b/integrations/github/src/haystack_integrations/tools/github/utils.py @@ -2,13 +2,13 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Optional, Union from haystack import Document from haystack.utils.callable_serialization import deserialize_callable, serialize_callable -def message_handler(documents: List[Document], max_length: int = 150_000) -> str: +def message_handler(documents: list[Document], max_length: int = 150_000) -> str: """ Handles the tool output before conversion to ChatMessage. @@ -32,9 +32,9 @@ def message_handler(documents: List[Document], max_length: int = 150_000) -> str def serialize_handlers( - serialized: Dict[str, Any], - outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]], - outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]], + serialized: dict[str, Any], + outputs_to_state: Optional[dict[str, dict[str, Union[str, Callable]]]], + outputs_to_string: Optional[dict[str, Union[str, Callable[[Any], str]]]], ) -> None: """ Serializes callable handlers in outputs_to_state and outputs_to_string. @@ -64,7 +64,7 @@ def serialize_handlers( serialized["outputs_to_string"] = serialized_string -def deserialize_handlers(data: Dict[str, Any]) -> None: +def deserialize_handlers(data: dict[str, Any]) -> None: """ Deserializes callable handlers in outputs_to_state and outputs_to_string. diff --git a/integrations/google_genai/pyproject.toml b/integrations/google_genai/pyproject.toml index 63e9d49cab..7b632efb28 100644 --- a/integrations/google_genai/pyproject.toml +++ b/integrations/google_genai/pyproject.toml @@ -87,7 +87,7 @@ module = [ ignore_missing_imports = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/google_genai/src/haystack_integrations/components/embedders/google_genai/document_embedder.py b/integrations/google_genai/src/haystack_integrations/components/embedders/google_genai/document_embedder.py index 0208280275..c7ab540fa1 100644 --- a/integrations/google_genai/src/haystack_integrations/components/embedders/google_genai/document_embedder.py +++ b/integrations/google_genai/src/haystack_integrations/components/embedders/google_genai/document_embedder.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Literal, Optional, Tuple, Union +from typing import Any, Literal, Optional, Union from google.genai import types from haystack import Document, component, default_from_dict, default_to_dict, logging @@ -82,9 +82,9 @@ def __init__( suffix: str = "", batch_size: int = 32, progress_bar: bool = True, - meta_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", - config: Optional[Dict[str, Any]] = None, + config: Optional[dict[str, Any]] = None, ) -> None: """ Creates an GoogleGenAIDocumentEmbedder component. @@ -138,7 +138,7 @@ def __init__( vertex_ai_location=vertex_ai_location, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -162,7 +162,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GoogleGenAIDocumentEmbedder": + def from_dict(cls, data: dict[str, Any]) -> "GoogleGenAIDocumentEmbedder": """ Deserializes the component from a dictionary. @@ -174,11 +174,11 @@ def from_dict(cls, data: Dict[str, Any]) -> "GoogleGenAIDocumentEmbedder": deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: + def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]: """ Prepare the texts to embed by concatenating the Document text with the metadata fields to embed. """ - texts_to_embed: List[str] = [] + texts_to_embed: list[str] = [] for doc in documents: meta_values_to_embed = [ str(doc.meta[key]) @@ -194,19 +194,19 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: return texts_to_embed def _embed_batch( - self, texts_to_embed: List[str], batch_size: int - ) -> Tuple[List[Optional[List[float]]], Dict[str, Any]]: + self, texts_to_embed: list[str], batch_size: int + ) -> tuple[list[Optional[list[float]]], dict[str, Any]]: """ Embed a list of texts in batches. """ resolved_config = types.EmbedContentConfig(**self._config) if self._config else None all_embeddings = [] - meta: Dict[str, Any] = {} + meta: dict[str, Any] = {} for batch in tqdm( batched(texts_to_embed, batch_size), disable=not self._progress_bar, desc="Calculating embeddings" ): - args: Dict[str, Any] = {"model": self._model, "contents": [b[1] for b in batch]} + args: dict[str, Any] = {"model": self._model, "contents": [b[1] for b in batch]} if resolved_config: args["config"] = resolved_config @@ -226,18 +226,18 @@ def _embed_batch( return all_embeddings, meta async def _embed_batch_async( - self, texts_to_embed: List[str], batch_size: int - ) -> Tuple[List[Optional[List[float]]], Dict[str, Any]]: + self, texts_to_embed: list[str], batch_size: int + ) -> tuple[list[Optional[list[float]]], dict[str, Any]]: """ Embed a list of texts in batches asynchronously. """ all_embeddings = [] - meta: Dict[str, Any] = {} + meta: dict[str, Any] = {} for batch in tqdm( batched(texts_to_embed, batch_size), disable=not self._progress_bar, desc="Calculating embeddings" ): - args: Dict[str, Any] = {"model": self._model, "contents": [b[1] for b in batch]} + args: dict[str, Any] = {"model": self._model, "contents": [b[1] for b in batch]} if self._config: args["config"] = types.EmbedContentConfig(**self._config) if self._config else None @@ -256,8 +256,8 @@ async def _embed_batch_async( return all_embeddings, meta - @component.output_types(documents=List[Document], meta=Dict[str, Any]) - def run(self, documents: List[Document]) -> Union[Dict[str, List[Document]], Dict[str, Any]]: + @component.output_types(documents=list[Document], meta=dict[str, Any]) + def run(self, documents: list[Document]) -> Union[dict[str, list[Document]], dict[str, Any]]: """ Embeds a list of documents. @@ -278,7 +278,7 @@ def run(self, documents: List[Document]) -> Union[Dict[str, List[Document]], Dic texts_to_embed = self._prepare_texts_to_embed(documents=documents) - meta: Dict[str, Any] + meta: dict[str, Any] embeddings, meta = self._embed_batch(texts_to_embed=texts_to_embed, batch_size=self._batch_size) for doc, emb in zip(documents, embeddings): @@ -286,8 +286,8 @@ def run(self, documents: List[Document]) -> Union[Dict[str, List[Document]], Dic return {"documents": documents, "meta": meta} - @component.output_types(documents=List[Document], meta=Dict[str, Any]) - async def run_async(self, documents: List[Document]) -> Union[Dict[str, List[Document]], Dict[str, Any]]: + @component.output_types(documents=list[Document], meta=dict[str, Any]) + async def run_async(self, documents: list[Document]) -> Union[dict[str, list[Document]], dict[str, Any]]: """ Embeds a list of documents asynchronously. diff --git a/integrations/google_genai/src/haystack_integrations/components/embedders/google_genai/text_embedder.py b/integrations/google_genai/src/haystack_integrations/components/embedders/google_genai/text_embedder.py index 18e0ad1cb3..1582d6f161 100644 --- a/integrations/google_genai/src/haystack_integrations/components/embedders/google_genai/text_embedder.py +++ b/integrations/google_genai/src/haystack_integrations/components/embedders/google_genai/text_embedder.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal, Optional, Union from google.genai import types from haystack import component, default_from_dict, default_to_dict, logging @@ -81,7 +81,7 @@ def __init__( model: str = "text-embedding-004", prefix: str = "", suffix: str = "", - config: Optional[Dict[str, Any]] = None, + config: Optional[dict[str, Any]] = None, ) -> None: """ Creates an GoogleGenAITextEmbedder component. @@ -123,7 +123,7 @@ def __init__( vertex_ai_location=vertex_ai_location, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -143,7 +143,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GoogleGenAITextEmbedder": + def from_dict(cls, data: dict[str, Any]) -> "GoogleGenAITextEmbedder": """ Deserializes the component from a dictionary. @@ -155,7 +155,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "GoogleGenAITextEmbedder": deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - def _prepare_input(self, text: str) -> Dict[str, Any]: + def _prepare_input(self, text: str) -> dict[str, Any]: if not isinstance(text, str): error_message_text = ( "GoogleGenAITextEmbedder expects a string as an input. " @@ -166,18 +166,18 @@ def _prepare_input(self, text: str) -> Dict[str, Any]: text_to_embed = self._prefix + text + self._suffix - kwargs: Dict[str, Any] = {"model": self._model_name, "contents": text_to_embed} + kwargs: dict[str, Any] = {"model": self._model_name, "contents": text_to_embed} if self._config: kwargs["config"] = types.EmbedContentConfig(**self._config) return kwargs - def _prepare_output(self, result: types.EmbedContentResponse) -> Dict[str, Any]: + def _prepare_output(self, result: types.EmbedContentResponse) -> dict[str, Any]: embedding = result.embeddings[0].values if result.embeddings else [] return {"embedding": embedding, "meta": {"model": self._model_name}} - @component.output_types(embedding=List[float], meta=Dict[str, Any]) - def run(self, text: str) -> Union[Dict[str, List[float]], Dict[str, Any]]: + @component.output_types(embedding=list[float], meta=dict[str, Any]) + def run(self, text: str) -> Union[dict[str, list[float]], dict[str, Any]]: """ Embeds a single string. @@ -193,8 +193,8 @@ def run(self, text: str) -> Union[Dict[str, List[float]], Dict[str, Any]]: response = self._client.models.embed_content(**create_kwargs) return self._prepare_output(result=response) - @component.output_types(embedding=List[float], meta=Dict[str, Any]) - async def run_async(self, text: str) -> Union[Dict[str, List[float]], Dict[str, Any]]: + @component.output_types(embedding=list[float], meta=dict[str, Any]) + async def run_async(self, text: str) -> Union[dict[str, list[float]], dict[str, Any]]: """ Asynchronously embed a single string. diff --git a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/chat_generator.py b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/chat_generator.py index 346e1a2d4b..5a1b836782 100644 --- a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/chat_generator.py +++ b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/chat_generator.py @@ -4,8 +4,9 @@ import base64 import json +from collections.abc import AsyncIterator, Iterator from datetime import datetime, timezone -from typing import Any, AsyncIterator, Dict, Iterator, List, Literal, Optional +from typing import Any, Literal, Optional from google.genai import types from haystack import logging @@ -40,7 +41,7 @@ from haystack_integrations.components.generators.google_genai.chat.utils import remove_key_from_schema # Mapping from Google GenAI finish reasons to Haystack FinishReason values -FINISH_REASON_MAPPING: Dict[str, FinishReason] = { +FINISH_REASON_MAPPING: dict[str, FinishReason] = { "STOP": "stop", "MAX_TOKENS": "length", "SAFETY": "content_filter", @@ -91,7 +92,7 @@ def _convert_message_to_google_genai_format(message: ChatMessage) -> types.Conte # Reconstruct parts with their original thought signatures for sig_info in thought_signatures: - part_dict: Dict[str, Any] = {} + part_dict: dict[str, Any] = {} # Check what type of content this part had if sig_info.get("has_text"): @@ -207,7 +208,7 @@ def _convert_message_to_google_genai_format(message: ChatMessage) -> types.Conte return types.Content(role=role, parts=parts) -def _sanitize_tool_schema(tool_schema: Dict[str, Any]) -> Dict[str, Any]: +def _sanitize_tool_schema(tool_schema: dict[str, Any]) -> dict[str, Any]: """ Sanitizes a tool schema to remove any keys that are not supported by Google Gen AI. @@ -231,7 +232,7 @@ def _sanitize_tool_schema(tool_schema: Dict[str, Any]) -> Dict[str, Any]: return final_schema -def _convert_tools_to_google_genai_format(tools: ToolsType) -> List[types.Tool]: +def _convert_tools_to_google_genai_format(tools: ToolsType) -> list[types.Tool]: """ Converts a list of Haystack Tools, Toolsets, or a mix to Google Gen AI Tool format. @@ -241,7 +242,7 @@ def _convert_tools_to_google_genai_format(tools: ToolsType) -> List[types.Tool]: # Flatten Tools and Toolsets into a single list of Tools flattened_tools = flatten_tools_or_toolsets(tools) - function_declarations: List[types.FunctionDeclaration] = [] + function_declarations: list[types.FunctionDeclaration] = [] for tool in flattened_tools: parameters = _sanitize_tool_schema(tool.parameters) function_declarations.append( @@ -321,7 +322,7 @@ def _convert_google_genai_response_to_chatmessage(response: types.GenerateConten usage["thoughts_token_count"] = usage_metadata.thoughts_token_count # Create meta with reasoning content and thought signatures if available - meta: Dict[str, Any] = { + meta: dict[str, Any] = { "model": model, "finish_reason": FINISH_REASON_MAPPING.get(finish_reason or ""), "usage": usage, @@ -458,8 +459,8 @@ def __init__( vertex_ai_project: Optional[str] = None, vertex_ai_location: Optional[str] = None, model: str = "gemini-2.0-flash", - generation_kwargs: Optional[Dict[str, Any]] = None, - safety_settings: Optional[List[Dict[str, Any]]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, + safety_settings: Optional[list[dict[str, Any]]] = None, streaming_callback: Optional[StreamingCallbackT] = None, tools: Optional[ToolsType] = None, ): @@ -506,7 +507,7 @@ def __init__( self._streaming_callback = streaming_callback self._tools = tools - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -528,7 +529,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GoogleGenAIChatGenerator": + def from_dict(cls, data: dict[str, Any]) -> "GoogleGenAIChatGenerator": """ Deserializes the component from a dictionary. @@ -556,10 +557,10 @@ def _convert_google_chunk_to_streaming_chunk( :returns: A StreamingChunk object. """ content = "" - tool_calls: List[ToolCallDelta] = [] + tool_calls: list[ToolCallDelta] = [] finish_reason = None - reasoning_deltas: List[Dict[str, str]] = [] - thought_signature_deltas: List[Dict[str, Any]] = [] # Track thought signatures in streaming + reasoning_deltas: list[dict[str, str]] = [] + thought_signature_deltas: list[dict[str, Any]] = [] # Track thought signatures in streaming if chunk.candidates: candidate = chunk.candidates[0] @@ -620,7 +621,7 @@ def _convert_google_chunk_to_streaming_chunk( start = index == 0 or len(tool_calls) > 0 # Create meta with reasoning deltas and thought signatures if available - meta: Dict[str, Any] = { + meta: dict[str, Any] = { "received_at": datetime.now(timezone.utc).isoformat(), "model": self._model, "usage": usage, @@ -644,7 +645,7 @@ def _convert_google_chunk_to_streaming_chunk( meta=meta, ) - def _aggregate_streaming_chunks_with_reasoning(self, chunks: List[StreamingChunk]) -> ChatMessage: + def _aggregate_streaming_chunks_with_reasoning(self, chunks: list[StreamingChunk]) -> ChatMessage: """ Aggregate streaming chunks into a final ChatMessage with reasoning content and thought signatures. @@ -660,7 +661,7 @@ def _aggregate_streaming_chunks_with_reasoning(self, chunks: List[StreamingChunk # Now enhance with Google-specific features: reasoning content, thinking token usage, and thought signatures reasoning_text_parts: list[str] = [] - thought_signatures: List[Dict[str, Any]] = [] + thought_signatures: list[dict[str, Any]] = [] thoughts_token_count = None for chunk in chunks: @@ -708,7 +709,7 @@ def _aggregate_streaming_chunks_with_reasoning(self, chunks: List[StreamingChunk def _handle_streaming_response( self, response_stream: Iterator[types.GenerateContentResponse], streaming_callback: StreamingCallbackT - ) -> Dict[str, List[ChatMessage]]: + ) -> dict[str, list[ChatMessage]]: """ Handle streaming response from Google Gen AI generate_content_stream. :param response_stream: The streaming response from generate_content_stream. @@ -740,7 +741,7 @@ def _handle_streaming_response( async def _handle_streaming_response_async( self, response_stream: AsyncIterator[types.GenerateContentResponse], streaming_callback: AsyncStreamingCallbackT - ) -> Dict[str, List[ChatMessage]]: + ) -> dict[str, list[ChatMessage]]: """ Handle async streaming response from Google Gen AI generate_content_stream. :param response_stream: The async streaming response from generate_content_stream. @@ -773,7 +774,7 @@ async def _handle_streaming_response_async( msg = f"Error in async streaming response: {e}" raise RuntimeError(msg) from e - def _process_thinking_config(self, generation_kwargs: Dict[str, Any]) -> Dict[str, Any]: + def _process_thinking_config(self, generation_kwargs: dict[str, Any]) -> dict[str, Any]: """ Process thinking configuration from generation_kwargs. @@ -796,15 +797,15 @@ def _process_thinking_config(self, generation_kwargs: Dict[str, Any]) -> Dict[st return generation_kwargs - @component.output_types(replies=List[ChatMessage]) + @component.output_types(replies=list[ChatMessage]) def run( self, - messages: List[ChatMessage], - generation_kwargs: Optional[Dict[str, Any]] = None, - safety_settings: Optional[List[Dict[str, Any]]] = None, + messages: list[ChatMessage], + generation_kwargs: Optional[dict[str, Any]] = None, + safety_settings: Optional[list[dict[str, Any]]] = None, streaming_callback: Optional[StreamingCallbackT] = None, tools: Optional[ToolsType] = None, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """ Run the Google Gen AI chat generator on the given input data. @@ -851,7 +852,7 @@ def run( chat_messages = messages[1:] # Convert messages to Google Gen AI Content format - contents: List[types.ContentUnionDict] = [] + contents: list[types.ContentUnionDict] = [] for msg in chat_messages: contents.append(_convert_message_to_google_genai_format(msg)) @@ -904,15 +905,15 @@ def run( error_msg = f"Error in Google Gen AI chat generation: {e}" raise RuntimeError(error_msg) from e - @component.output_types(replies=List[ChatMessage]) + @component.output_types(replies=list[ChatMessage]) async def run_async( self, - messages: List[ChatMessage], - generation_kwargs: Optional[Dict[str, Any]] = None, - safety_settings: Optional[List[Dict[str, Any]]] = None, + messages: list[ChatMessage], + generation_kwargs: Optional[dict[str, Any]] = None, + safety_settings: Optional[list[dict[str, Any]]] = None, streaming_callback: Optional[StreamingCallbackT] = None, tools: Optional[ToolsType] = None, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """ Async version of the run method. Run the Google Gen AI chat generator on the given input data. @@ -960,7 +961,7 @@ async def run_async( chat_messages = messages[1:] # Convert messages to Google Gen AI Content format - contents: List[types.ContentUnion] = [] + contents: list[types.ContentUnion] = [] for msg in chat_messages: contents.append(_convert_message_to_google_genai_format(msg)) diff --git a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py index 9cecb6a85e..038f531d1d 100644 --- a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py +++ b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py @@ -1,9 +1,13 @@ -from typing import Any, Dict, List, Union +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, Union def remove_key_from_schema( - schema: Union[Dict[str, Any], List[Any], Any], target_key: str -) -> Union[Dict[str, Any], List[Any], Any]: + schema: Union[dict[str, Any], list[Any], Any], target_key: str +) -> Union[dict[str, Any], list[Any], Any]: """ Recursively traverse a schema and remove all occurrences of the target key. diff --git a/integrations/google_genai/tests/test_document_embedder.py b/integrations/google_genai/tests/test_document_embedder.py index 6abbf89011..2579801bc6 100644 --- a/integrations/google_genai/tests/test_document_embedder.py +++ b/integrations/google_genai/tests/test_document_embedder.py @@ -4,7 +4,6 @@ import os import random -from typing import List import pytest from haystack import Document @@ -13,7 +12,7 @@ from haystack_integrations.components.embedders.google_genai import GoogleGenAIDocumentEmbedder -def mock_google_response(contents: List[str], model: str = "text-embedding-004", **kwargs) -> dict: +def mock_google_response(contents: list[str], model: str = "text-embedding-004", **kwargs) -> dict: secure_random = random.SystemRandom() dict_response = { "embedding": [[secure_random.random() for _ in range(768)] for _ in contents],