diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 7325737e..ff9d60e6 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -6,4 +6,4 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: psf/black@stable \ No newline at end of file + - uses: chartboost/ruff-action@v1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 365d8a2c..34d137d2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,8 +6,12 @@ repos: - id: end-of-file-fixer - id: check-yaml - id: check-added-large-files - - repo: https://github.com/psf/black-pre-commit-mirror - rev: 24.4.2 + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.6.7 hooks: - - id: black - language_version: python3.12 + # Run the linter. + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 835c5d0a..a3ab358d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,4 @@ -# Contributing +# Contributing ## Installing development versions @@ -17,45 +17,45 @@ poetry add git+https://github.com/pinecone-io/pinecone-python-client.git@44fc7ed ``` -## Developing locally with Poetry +## Developing locally with Poetry [Poetry](https://python-poetry.org/) is a tool that combines [virtualenv](https://virtualenv.pypa.io/en/latest/) usage with dependency management, to provide a consistent experience for project maintainers and contributors who need to develop the pinecone-python-client -as a library. +as a library. -A common need when making changes to the Pinecone client is to test your changes against existing Python code or Jupyter Notebooks that `pip install` the Pinecone Python client as a library. +A common need when making changes to the Pinecone client is to test your changes against existing Python code or Jupyter Notebooks that `pip install` the Pinecone Python client as a library. -Developers want to be able to see their changes to the library immediately reflected in their main application code, as well as to track all changes they make in git, so that they can be contributed back in the form of a pull request. +Developers want to be able to see their changes to the library immediately reflected in their main application code, as well as to track all changes they make in git, so that they can be contributed back in the form of a pull request. -The Pinecone Python client therefore supports Poetry as its primary means of enabling a consistent local development experience. This guide will walk you through the setup process so that you can: +The Pinecone Python client therefore supports Poetry as its primary means of enabling a consistent local development experience. This guide will walk you through the setup process so that you can: 1. Make local changes to the Pinecone Python client that are separated from your system's Python installation 2. Make local changes to the Pinecone Python client that are immediately reflected in other local code that imports the pinecone client 3. Track all your local changes to the Pinecone Python client so that you can contribute your fixes and feature additions back via GitHub pull requests ### Step 1. Fork the Pinecone python client repository -On the [GitHub repository page](https://github.com/pinecone-io/pinecone-python-client) page, click the fork button at the top of the screen and create a personal fork of the repository: +On the [GitHub repository page](https://github.com/pinecone-io/pinecone-python-client) page, click the fork button at the top of the screen and create a personal fork of the repository: ![Create a GitHub fork of the Pinecone Python client](./docs/pinecone-python-client-fork.png) -It will take a few seconds for your fork to be ready. When it's ready, **clone your fork** of the Pinecone python client repository to your machine. +It will take a few seconds for your fork to be ready. When it's ready, **clone your fork** of the Pinecone python client repository to your machine. -Change directory into the repository, as we'll be setting up a virtualenv from within the root of the repository. +Change directory into the repository, as we'll be setting up a virtualenv from within the root of the repository. -### Step 1. Install Poetry +### Step 1. Install Poetry -Visit [the Poetry site](https://python-poetry.org/) for installation instructions. +Visit [the Poetry site](https://python-poetry.org/) for installation instructions. -### Step 2. Install dependencies +### Step 2. Install dependencies -Run `poetry install` from the root of the project. +Run `poetry install` from the root of the project. ### Step 3. Activate the Poetry virtual environment and verify success -Run `poetry shell` from the root of the project. At this point, you now have a virtualenv set up in this directory, which you can verify by running: +Run `poetry shell` from the root of the project. At this point, you now have a virtualenv set up in this directory, which you can verify by running: `poetry env info` -You should see something similar to the following output: +You should see something similar to the following output: ```bash Virtualenv @@ -73,17 +73,61 @@ Path: /home/linuxbrew/.linuxbrew/opt/python@3.9 ``` If you want to extract only the path to your new virtualenv, you can run `poetry env info --path` -## Loading your virtualenv in another shell +### Step 4. Enable pre-commit hooks. -It's a common need when developing against this client to load it as part of some other application or Jupyter Notebook code, modify -it directly, see your changes reflected immediately and also have your changes tracked in git so you can contribute them back. +Run `poetry run pre-commit install` to enable checks to run when you commit so you don't have to find out during your CI run that minor lint issues need to be addressed. -It's important to understand that, by default, if you open a new shell or terminal window, or, for example, a new pane in a tmux session, -your new shell will not yet reference the new virtualenv you created in the previous step. +## Common tasks + +### Running tests + +- Unit tests: `make test-unit` +- Integration tests: `PINECONE_API_KEY="YOUR API KEY" make test-integration` +- Run the tests in a single file: `poetry run pytest tests/unit/data/test_bulk_import.py -s -vv` + +### Running the ruff linter / formatter + +These should automatically trigger if you have enabled pre-commit hooks with `poetry run pre-commit install`. But in case you want to trigger these yourself, you can run them like this: + +``` +poetry run ruff check --fix # lint rules +poetry run ruff format # formatting +``` + +If you want to adjust the behavior of ruff, configurations are in `pyproject.toml`. + + +### Consuming API version upgrades + +These instructions can only be followed by Pinecone employees with access to our private APIs repository. + +Prerequisites: +- You must be an employee with access to private Pinecone repositories +- You must have [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed and running. Our code generation script uses a dockerized version of the OpenAPI CLI. +- You must have initialized the git submodules under codegen + +```sh +git submodule +``` + +To regenerate the generated portions of the client with the latest version of the API specifications, you need to have Docker Desktop running on your local machine. + +```sh +./codegen/ +``` + + +## Loading your virtualenv in another shell + +It's a common need when developing against this client to load it as part of some other application or Jupyter Notebook code, modify +it directly, see your changes reflected immediately and also have your changes tracked in git so you can contribute them back. + +It's important to understand that, by default, if you open a new shell or terminal window, or, for example, a new pane in a tmux session, +your new shell will not yet reference the new virtualenv you created in the previous step. ### Step 1. Get the path to your virtualenv -We're going to first get the path to the virtualenv we just created, by running: +We're going to first get the path to the virtualenv we just created, by running: ```bash poetry env info --path @@ -93,34 +137,34 @@ You'll get a path similar to this one: `/home/youruser/.cache/pypoetry/virtuale ### Step 2. Load your existing virtualenv in your new shell -Within this path is a shell script that lives at `/bin/activate`. Importantly, you cannot simply run this script, but you -must instead source it like so: +Within this path is a shell script that lives at `/bin/activate`. Importantly, you cannot simply run this script, but you +must instead source it like so: ```bash source /home/youruser/.cache/pypoetry/virtualenvs/pinecone-fWu70vbC-py3.9/bin/activate ``` In the above example, ensure you're using your own virtualenv path as returned by `poetry env info --path`. -### Step 3. Test out your virtualenv +### Step 3. Test out your virtualenv -Now, we can test that our virtualenv is working properly by adding a new test module and function to the `pinecone` client within our virtualenv -and running it from the second shell. +Now, we can test that our virtualenv is working properly by adding a new test module and function to the `pinecone` client within our virtualenv +and running it from the second shell. #### Create a new test file in pinecone-python-client -In the root of your working directory of the `pinecone-python-client` where you first ran `poetry shell`, add a new file named `hello_virtualenv.py` under the `pinecone` folder. +In the root of your working directory of the `pinecone-python-client` where you first ran `poetry shell`, add a new file named `hello_virtualenv.py` under the `pinecone` folder. -In that file write the following: +In that file write the following: ```python def hello(): print("Hello, from your virtualenv!") ``` -Save the file. +Save the file. -#### Create a new test file in your second shell -This step demonstrates how you can immediately test your latest Pinecone client code from any local Python application or Jupyter Notebook: +#### Create a new test file in your second shell +This step demonstrates how you can immediately test your latest Pinecone client code from any local Python application or Jupyter Notebook: -In your second shell, where you ran `source` to load your virtualenv, create a python file named `test.py` and write the following: +In your second shell, where you ran `source` to load your virtualenv, create a python file named `test.py` and write the following: ```python from pinecone import hello_virtualenv @@ -128,13 +172,13 @@ from pinecone import hello_virtualenv hello_virtualenv.hello() ``` -Save the file. Run it with your Python binary. Depending on your system, this may either be `python` or `python3`: +Save the file. Run it with your Python binary. Depending on your system, this may either be `python` or `python3`: ```bash python3 test.py ``` -You should see the following output: +You should see the following output: ```bash ❯ python3 test.py @@ -142,26 +186,3 @@ Hello, from your virtualenv! ``` If you experience any issues please [file a new issue](https://github.com/pinecone-io/pinecone-python-client/issues/new). - - -## Consuming API version upgrades - -These instructions can only be followed by Pinecone employees with access to our private APIs repository. - -Prerequisites: -- You must be an employee with access to private Pinecone repositories -- You must have [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed and running. Our code generation script uses a dockerized version of the OpenAPI CLI. -- You must have initialized the git submodules under codegen - -```sh -git submodule -``` - - -To regenerate the generated portions of the client with the latest version of the API specifications, you need to have Docker Desktop running on your local machine. - - - -```sh -./codegen/ -``` \ No newline at end of file diff --git a/codegen/build-oas.sh b/codegen/build-oas.sh index 3b995499..232ab1dd 100755 --- a/codegen/build-oas.sh +++ b/codegen/build-oas.sh @@ -71,7 +71,7 @@ generate_client() { oas_file="codegen/apis/_build/${version}/${module_name}_${version}.oas.yaml" package_name="pinecone.${py_module_name}.openapi.${module_name}" - + verify_file_exists $oas_file verify_directory_exists $template_dir @@ -106,9 +106,9 @@ extract_shared_classes() { # Define the list of shared source files sharedFiles=( "api_client" - "configuration" - "exceptions" - "model_utils" + "configuration" + "exceptions" + "model_utils" "rest" ) @@ -127,7 +127,7 @@ extract_shared_classes() { done done - # Remove the docstring headers that aren't really correct in the + # Remove the docstring headers that aren't really correct in the # context of this new shared package structure find "$target_directory" -name "*.py" -print0 | xargs -0 -I {} sh -c 'sed -i "" "/^\"\"\"/,/^\"\"\"/d" "{}"' @@ -166,4 +166,4 @@ done extract_shared_classes # Format generated files -poetry run black "${destination}" +poetry run ruff format "${destination}" diff --git a/pinecone/config/__init__.py b/pinecone/config/__init__.py index a13a602a..23626082 100644 --- a/pinecone/config/__init__.py +++ b/pinecone/config/__init__.py @@ -4,5 +4,5 @@ from .config import ConfigBuilder, Config from .pinecone_config import PineconeConfig -if os.getenv("PINECONE_DEBUG") != None: +if os.getenv("PINECONE_DEBUG") is not None: logging.basicConfig(level=logging.DEBUG) diff --git a/pinecone/config/config.py b/pinecone/config/config.py index 2180d3e5..a3b28725 100644 --- a/pinecone/config/config.py +++ b/pinecone/config/config.py @@ -3,9 +3,7 @@ from pinecone.exceptions.exceptions import PineconeConfigurationError from pinecone.config.openapi import OpenApiConfigFactory -from pinecone.core.openapi.shared.configuration import ( - Configuration as OpenApiConfiguration, -) +from pinecone.core.openapi.shared.configuration import Configuration as OpenApiConfiguration from pinecone.utils import normalize_host from pinecone.utils.constants import SOURCE_TAG @@ -72,15 +70,11 @@ def build( @staticmethod def build_openapi_config( - config: Config, - openapi_config: Optional[OpenApiConfiguration] = None, - **kwargs, + config: Config, openapi_config: Optional[OpenApiConfiguration] = None, **kwargs ) -> OpenApiConfiguration: if openapi_config: openapi_config = OpenApiConfigFactory.copy( - openapi_config=openapi_config, - api_key=config.api_key, - host=config.host, + openapi_config=openapi_config, api_key=config.api_key, host=config.host ) elif openapi_config is None: openapi_config = OpenApiConfigFactory.build(api_key=config.api_key, host=config.host) @@ -95,7 +89,7 @@ def build_openapi_config( openapi_config.proxy_headers = config.proxy_headers if config.ssl_ca_certs: openapi_config.ssl_ca_cert = config.ssl_ca_certs - if config.ssl_verify != None: + if config.ssl_verify is not None: openapi_config.verify_ssl = config.ssl_verify return openapi_config diff --git a/pinecone/config/openapi.py b/pinecone/config/openapi.py index dc85fc43..d48bd88c 100644 --- a/pinecone/config/openapi.py +++ b/pinecone/config/openapi.py @@ -7,9 +7,7 @@ from urllib3.connection import HTTPConnection -from pinecone.core.openapi.shared.configuration import ( - Configuration as OpenApiConfiguration, -) +from pinecone.core.openapi.shared.configuration import Configuration as OpenApiConfiguration TCP_KEEPINTVL = 60 # Sec TCP_KEEPIDLE = 300 # Sec @@ -29,7 +27,9 @@ def build(cls, api_key: str, host: Optional[str] = None, **kwargs): return openapi_config @classmethod - def copy(cls, openapi_config: OpenApiConfiguration, api_key: str, host: str) -> OpenApiConfiguration: + def copy( + cls, openapi_config: OpenApiConfiguration, api_key: str, host: str + ) -> OpenApiConfiguration: """ Copy a user-supplied openapi configuration and update it with the user's api key and host. If they have not specified other socket configuration, we will use the default values. @@ -88,13 +88,7 @@ def _get_socket_options( and hasattr(socket, "TCP_KEEPCNT") ): socket_params += [(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, keep_alive_idle_sec)] - socket_params += [ - ( - socket.IPPROTO_TCP, - socket.TCP_KEEPINTVL, - keep_alive_interval_sec, - ) - ] + socket_params += [(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, keep_alive_interval_sec)] socket_params += [(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, keep_alive_tries)] # TCP Keep Alive Probes for Windows OS diff --git a/pinecone/config/pinecone_config.py b/pinecone/config/pinecone_config.py index c447b222..403687e9 100644 --- a/pinecone/config/pinecone_config.py +++ b/pinecone/config/pinecone_config.py @@ -17,7 +17,12 @@ def build( additional_headers: Optional[Dict[str, str]] = {}, **kwargs, ) -> Config: - host = host or kwargs.get("host") or os.getenv("PINECONE_CONTROLLER_HOST") or DEFAULT_CONTROLLER_HOST + host = ( + host + or kwargs.get("host") + or os.getenv("PINECONE_CONTROLLER_HOST") + or DEFAULT_CONTROLLER_HOST + ) headers_json = os.getenv("PINECONE_ADDITIONAL_HEADERS") if headers_json: try: @@ -27,8 +32,5 @@ def build( logger.warn(f"Ignoring PINECONE_ADDITIONAL_HEADERS: {e}") return ConfigBuilder.build( - api_key=api_key, - host=host, - additional_headers=additional_headers, - **kwargs, + api_key=api_key, host=host, additional_headers=additional_headers, **kwargs ) diff --git a/pinecone/control/pinecone.py b/pinecone/control/pinecone.py index 7d6ce02e..7e245ec3 100644 --- a/pinecone/control/pinecone.py +++ b/pinecone/control/pinecone.py @@ -1,5 +1,4 @@ import time -import warnings import logging from typing import Optional, Dict, Any, Union, List, Tuple, Literal @@ -7,16 +6,11 @@ from pinecone.config import PineconeConfig, Config, ConfigBuilder -from pinecone.core.openapi.control.api.manage_indexes_api import ( - ManageIndexesApi, -) +from pinecone.core.openapi.control.api.manage_indexes_api import ManageIndexesApi from pinecone.core.openapi.shared.api_client import ApiClient -from pinecone.utils import ( - normalize_host, - setup_openapi_client, - build_plugin_setup_client, -) + +from pinecone.utils import normalize_host, setup_openapi_client, build_plugin_setup_client from pinecone.core.openapi.control.models import ( CreateCollectionRequest, CreateIndexRequest, @@ -215,7 +209,7 @@ def __init__( if kwargs.get("openapi_config", None): raise Exception( - "Passing openapi_config is no longer supported. Please pass settings such as proxy_url, proxy_headers, ssl_ca_certs, and ssl_verify directly to the Pinecone constructor as keyword arguments. See the README at https://github.com/pinecone-io/pinecone-python-client for examples.", + "Passing openapi_config is no longer supported. Please pass settings such as proxy_url, proxy_headers, ssl_ca_certs, and ssl_verify directly to the Pinecone constructor as keyword arguments. See the README at https://github.com/pinecone-io/pinecone-python-client for examples." ) self.openapi_config = ConfigBuilder.build_openapi_config(self.config, **kwargs) @@ -354,10 +348,7 @@ def _parse_non_empty_args(args: List[Tuple[str, Any]]) -> Dict[str, Any]: raise ValueError("spec must contain either 'serverless' or 'pod' key") elif isinstance(spec, ServerlessSpec): index_spec = IndexSpec( - serverless=ServerlessSpecModel( - cloud=spec.cloud, - region=spec.region, - ) + serverless=ServerlessSpecModel(cloud=spec.cloud, region=spec.region) ) elif isinstance(spec, PodSpec): args_dict = _parse_non_empty_args( @@ -369,14 +360,12 @@ def _parse_non_empty_args(args: List[Tuple[str, Any]]) -> Dict[str, Any]: ] ) if spec.metadata_config: - args_dict["metadata_config"] = PodSpecMetadataConfig(indexed=spec.metadata_config.get("indexed", None)) + args_dict["metadata_config"] = PodSpecMetadataConfig( + indexed=spec.metadata_config.get("indexed", None) + ) index_spec = IndexSpec( - pod=PodSpecModel( - environment=spec.environment, - pod_type=spec.pod_type, - **args_dict, - ) + pod=PodSpecModel(environment=spec.environment, pod_type=spec.pod_type, **args_dict) ) else: raise TypeError("spec must be of type dict, ServerlessSpec, or PodSpec") @@ -388,7 +377,7 @@ def _parse_non_empty_args(args: List[Tuple[str, Any]]) -> Dict[str, Any]: metric=metric, spec=index_spec, deletion_protection=dp, - ), + ) ) def is_ready(): @@ -631,7 +620,9 @@ def create_collection(self, name: str, source: str): :param source: Name of the source index """ api_instance = self.index_api - api_instance.create_collection(create_collection_request=CreateCollectionRequest(name=name, source=source)) + api_instance.create_collection( + create_collection_request=CreateCollectionRequest(name=name, source=source) + ) def list_collections(self) -> CollectionList: """List all collections diff --git a/pinecone/data/errors.py b/pinecone/data/errors.py index 7df0dee0..05daeab7 100644 --- a/pinecone/data/errors.py +++ b/pinecone/data/errors.py @@ -40,5 +40,7 @@ def __init__(self, sparse_values_dict): class MetadataDictionaryExpectedError(ValueError, TypeError): def __init__(self, item): - message = f"Column `metadata` is expected to be a dictionary, found {type(item['metadata'])}" + message = ( + f"Column `metadata` is expected to be a dictionary, found {type(item['metadata'])}" + ) super().__init__(message) diff --git a/pinecone/data/features/bulk_import.py b/pinecone/data/features/bulk_import.py index dddb3ef5..e4888c05 100644 --- a/pinecone/data/features/bulk_import.py +++ b/pinecone/data/features/bulk_import.py @@ -26,10 +26,10 @@ class ImportFeatureMixin: def __init__(self, **kwargs): - config = ConfigBuilder.build( - **kwargs, + config = ConfigBuilder.build(**kwargs) + openapi_config = ConfigBuilder.build_openapi_config( + config, kwargs.get("openapi_config", None) ) - openapi_config = ConfigBuilder.build_openapi_config(config, kwargs.get("openapi_config", None)) if kwargs.get("__import_operations_api", None): self.__import_operations_api = kwargs.get("__import_operations_api") @@ -123,10 +123,7 @@ def list_imports(self, **kwargs) -> Iterator[List[ImportModel]]: done = True def list_imports_paginated( - self, - limit: Optional[int] = None, - pagination_token: Optional[str] = None, - **kwargs, + self, limit: Optional[int] = None, pagination_token: Optional[str] = None, **kwargs ) -> ImportListResponse: """ The list_imports_paginated operation returns information about import operations. @@ -158,12 +155,7 @@ def list_imports_paginated( Returns: ImportListResponse object which contains the list of operations as ImportModel objects, pagination information, and usage showing the number of read_units consumed. """ - args_dict = parse_non_empty_args( - [ - ("limit", limit), - ("pagination_token", pagination_token), - ] - ) + args_dict = parse_non_empty_args([("limit", limit), ("pagination_token", pagination_token)]) return self.__import_operations_api.list_imports(**args_dict) def describe_import(self, id: str) -> ImportModel: diff --git a/pinecone/data/index.py b/pinecone/data/index.py index e94734f9..cc1bae6c 100644 --- a/pinecone/data/index.py +++ b/pinecone/data/index.py @@ -1,6 +1,6 @@ from tqdm.autonotebook import tqdm -from typing import Union, List, Tuple, Optional, Dict, Any +from typing import Union, List, Optional, Dict, Any from pinecone.config import ConfigBuilder @@ -17,7 +17,6 @@ DescribeIndexStatsResponse, UpsertRequest, UpsertResponse, - UpdateRequest, Vector, DeleteRequest, UpdateRequest, @@ -91,10 +90,7 @@ def __init__( ) self._config = ConfigBuilder.build( - api_key=api_key, - host=host, - additional_headers=additional_headers, - **kwargs, + api_key=api_key, host=host, additional_headers=additional_headers, **kwargs ) openapi_config = ConfigBuilder.build_openapi_config(self._config, openapi_config) @@ -189,14 +185,12 @@ def upsert( if not isinstance(batch_size, int) or batch_size <= 0: raise ValueError("batch_size must be a positive integer") - pbar = tqdm( - total=len(vectors), - disable=not show_progress, - desc="Upserted vectors", - ) + pbar = tqdm(total=len(vectors), disable=not show_progress, desc="Upserted vectors") total_upserted = 0 for i in range(0, len(vectors), batch_size): - batch_result = self._upsert_batch(vectors[i : i + batch_size], namespace, _check_type, **kwargs) + batch_result = self._upsert_batch( + vectors[i : i + batch_size], namespace, _check_type, **kwargs + ) pbar.update(batch_result.upserted_count) # we can't use here pbar.n for the case show_progress=False total_upserted += batch_result.upserted_count @@ -211,7 +205,9 @@ def _upsert_batch( **kwargs, ) -> UpsertResponse: args_dict = parse_non_empty_args([("namespace", namespace)]) - vec_builder = lambda v: VectorFactory.build(v, check_type=_check_type) + + def vec_builder(v): + return VectorFactory.build(v, check_type=_check_type) return self._vector_api.upsert( UpsertRequest( @@ -230,11 +226,7 @@ def _iter_dataframe(df, batch_size): yield batch def upsert_from_dataframe( - self, - df, - namespace: Optional[str] = None, - batch_size: int = 500, - show_progress: bool = True, + self, df, namespace: Optional[str] = None, batch_size: int = 500, show_progress: bool = True ) -> UpsertResponse: """Upserts a dataframe into the index. @@ -254,11 +246,7 @@ def upsert_from_dataframe( if not isinstance(df, pd.DataFrame): raise ValueError(f"Only pandas dataframes are supported. Found: {type(df)}") - pbar = tqdm( - total=len(df), - disable=not show_progress, - desc="sending upsert requests", - ) + pbar = tqdm(total=len(df), disable=not show_progress, desc="sending upsert requests") results = [] for chunk in self._iter_dataframe(df, batch_size=batch_size): res = self.upsert(vectors=chunk, namespace=namespace) @@ -317,18 +305,17 @@ def delete( """ _check_type = kwargs.pop("_check_type", False) args_dict = parse_non_empty_args( - [ - ("ids", ids), - ("delete_all", delete_all), - ("namespace", namespace), - ("filter", filter), - ] + [("ids", ids), ("delete_all", delete_all), ("namespace", namespace), ("filter", filter)] ) return self._vector_api.delete( DeleteRequest( **args_dict, - **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS and v is not None}, + **{ + k: v + for k, v in kwargs.items() + if k not in _OPENAPI_ENDPOINT_PARAMS and v is not None + }, _check_type=_check_type, ), **{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS}, @@ -370,7 +357,9 @@ def query( filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, include_values: Optional[bool] = None, include_metadata: Optional[bool] = None, - sparse_vector: Optional[Union[SparseValues, Dict[str, Union[List[float], List[int]]]]] = None, + sparse_vector: Optional[ + Union[SparseValues, Dict[str, Union[List[float], List[int]]]] + ] = None, **kwargs, ) -> QueryResponse: """ @@ -454,13 +443,12 @@ def update( id: str, values: Optional[List[float]] = None, set_metadata: Optional[ - Dict[ - str, - Union[str, float, int, bool, List[int], List[float], List[str]], - ] + Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]] ] = None, namespace: Optional[str] = None, - sparse_values: Optional[Union[SparseValues, Dict[str, Union[List[float], List[int]]]]] = None, + sparse_values: Optional[ + Union[SparseValues, Dict[str, Union[List[float], List[int]]]] + ] = None, **kwargs, ) -> Dict[str, Any]: """ @@ -516,9 +504,7 @@ def update( @validate_and_convert_errors def describe_index_stats( - self, - filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, - **kwargs, + self, filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, **kwargs ) -> DescribeIndexStatsResponse: """ The DescribeIndexStats operation returns statistics about the index's contents. @@ -628,7 +614,7 @@ def list(self, **kwargs): @staticmethod def _parse_sparse_values_arg( - sparse_values: Optional[Union[SparseValues, Dict[str, Union[List[float], List[int]]]]] + sparse_values: Optional[Union[SparseValues, Dict[str, Union[List[float], List[int]]]]], ) -> Optional[SparseValues]: if sparse_values is None: return None @@ -636,7 +622,11 @@ def _parse_sparse_values_arg( if isinstance(sparse_values, SparseValues): return sparse_values - if not isinstance(sparse_values, dict) or "indices" not in sparse_values or "values" not in sparse_values: + if ( + not isinstance(sparse_values, dict) + or "indices" not in sparse_values + or "values" not in sparse_values + ): raise ValueError( "Invalid sparse values argument. Expected a dict of: {'indices': List[int], 'values': List[float]}." f"Received: {sparse_values}" diff --git a/pinecone/data/sparse_vector_factory.py b/pinecone/data/sparse_vector_factory.py index dcc7320e..b44cc41a 100644 --- a/pinecone/data/sparse_vector_factory.py +++ b/pinecone/data/sparse_vector_factory.py @@ -1,5 +1,3 @@ -import numbers - from collections.abc import Mapping from typing import Union, Dict diff --git a/pinecone/data/vector_factory.py b/pinecone/data/vector_factory.py index 8734633a..9482108c 100644 --- a/pinecone/data/vector_factory.py +++ b/pinecone/data/vector_factory.py @@ -76,5 +76,5 @@ def _dict_to_vector(item, check_type: bool) -> Vector: if not isinstance(item["values"], Iterable) or not isinstance( item["values"].__iter__().__next__(), numbers.Real ): - raise TypeError(f"Column `values` is expected to be a list of floats") + raise TypeError("Column `values` is expected to be a list of floats") raise e diff --git a/pinecone/exceptions/__init__.py b/pinecone/exceptions/__init__.py index 65e1309d..eb0e10fa 100644 --- a/pinecone/exceptions/__init__.py +++ b/pinecone/exceptions/__init__.py @@ -10,11 +10,7 @@ ForbiddenException, ServiceException, ) -from .exceptions import ( - PineconeConfigurationError, - PineconeProtocolError, - ListConversionException, -) +from .exceptions import PineconeConfigurationError, PineconeProtocolError, ListConversionException __all__ = [ "PineconeConfigurationError", diff --git a/pinecone/grpc/__init__.py b/pinecone/grpc/__init__.py index bc0a40a0..381f1841 100644 --- a/pinecone/grpc/__init__.py +++ b/pinecone/grpc/__init__.py @@ -1,8 +1,8 @@ """ Connecting to Pinecone with GRPC -The `pinecone.grpc` submodule provides an alternative version of the Pinecone -client that uses gRPC instead of HTTP for data operations. This provides a +The `pinecone.grpc` submodule provides an alternative version of the Pinecone +client that uses gRPC instead of HTTP for data operations. This provides a significant performance boost for data operations. ### Installing the gRPC client @@ -54,3 +54,13 @@ Vector, SparseValues, ) + +__all__ = [ + "GRPCIndex", + "PineconeGRPC", + "GRPCClientConfig", + "GRPCVector", + "GRPCSparseValues", + "Vector", + "SparseValues", +] diff --git a/pinecone/grpc/base.py b/pinecone/grpc/base.py index d758bae5..db1cabf4 100644 --- a/pinecone/grpc/base.py +++ b/pinecone/grpc/base.py @@ -134,7 +134,9 @@ def channel(self): def grpc_server_on(self) -> bool: try: - grpc.channel_ready_future(self._channel).result(timeout=self.grpc_client_config.conn_timeout) + grpc.channel_ready_future(self._channel).result( + timeout=self.grpc_client_config.conn_timeout + ) return True except grpc.FutureTimeoutError: return False diff --git a/pinecone/grpc/index_grpc.py b/pinecone/grpc/index_grpc.py index e20d993a..424fb576 100644 --- a/pinecone/grpc/index_grpc.py +++ b/pinecone/grpc/index_grpc.py @@ -18,10 +18,7 @@ QueryResponse, DescribeIndexStatsResponse, ) -from pinecone.models.list_response import ( - ListResponse as SimpleListResponse, - Pagination, -) +from pinecone.models.list_response import ListResponse as SimpleListResponse, Pagination from pinecone.core.grpc.protos.vector_service_pb2 import ( Vector as GRPCVector, QueryVector as GRPCQueryVector, @@ -32,7 +29,6 @@ FetchRequest, UpdateRequest, ListRequest, - ListResponse, DescribeIndexStatsRequest, DeleteResponse, UpdateResponse, @@ -146,18 +142,11 @@ def upsert( if not isinstance(batch_size, int) or batch_size <= 0: raise ValueError("batch_size must be a positive integer") - pbar = tqdm( - total=len(vectors), - disable=not show_progress, - desc="Upserted vectors", - ) + pbar = tqdm(total=len(vectors), disable=not show_progress, desc="Upserted vectors") total_upserted = 0 for i in range(0, len(vectors), batch_size): batch_result = self._upsert_batch( - vectors[i : i + batch_size], - namespace, - timeout=timeout, - **kwargs, + vectors[i : i + batch_size], namespace, timeout=timeout, **kwargs ) pbar.update(batch_result.upserted_count) # we can't use here pbar.n for the case show_progress=False @@ -204,18 +193,10 @@ def upsert_from_dataframe( if not isinstance(df, pd.DataFrame): raise ValueError(f"Only pandas dataframes are supported. Found: {type(df)}") - pbar = tqdm( - total=len(df), - disable=not show_progress, - desc="sending upsert requests", - ) + pbar = tqdm(total=len(df), disable=not show_progress, desc="sending upsert requests") results = [] for chunk in self._iter_dataframe(df, batch_size=batch_size): - res = self.upsert( - vectors=chunk, - namespace=namespace, - async_req=use_async_requests, - ) + res = self.upsert(vectors=chunk, namespace=namespace, async_req=use_async_requests) pbar.update(len(chunk)) results.append(res) @@ -224,9 +205,7 @@ def upsert_from_dataframe( results = [ async_result.result() for async_result in tqdm( - cast_results, - disable=not show_progress, - desc="collecting async responses", + cast_results, disable=not show_progress, desc="collecting async responses" ) ] @@ -307,10 +286,7 @@ def delete( return self._wrap_grpc_call(self.stub.Delete, request, timeout=timeout) def fetch( - self, - ids: Optional[List[str]], - namespace: Optional[str] = None, - **kwargs, + self, ids: Optional[List[str]], namespace: Optional[str] = None, **kwargs ) -> FetchResponse: """ The fetch operation looks up and returns vectors, by ID, from a single namespace. @@ -422,10 +398,7 @@ def update( async_req: bool = False, values: Optional[List[float]] = None, set_metadata: Optional[ - Dict[ - str, - Union[str, float, int, bool, List[int], List[float], List[str]], - ] + Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]] ] = None, namespace: Optional[str] = None, sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None, @@ -534,9 +507,7 @@ def list_paginated( pagination = None return SimpleListResponse( - namespace=response.namespace, - vectors=response.vectors, - pagination=pagination, + namespace=response.namespace, vectors=response.vectors, pagination=pagination ) def list(self, **kwargs): @@ -576,9 +547,7 @@ def list(self, **kwargs): done = True def describe_index_stats( - self, - filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, - **kwargs, + self, filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, **kwargs ) -> DescribeIndexStatsResponse: """ The DescribeIndexStats operation returns statistics about the index's contents. @@ -613,7 +582,7 @@ def _parse_non_empty_args(args: List[Tuple[str, Any]]) -> Dict[str, Any]: @staticmethod def _parse_sparse_values_arg( - sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] + sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]], ) -> Optional[GRPCSparseValues]: if sparse_values is None: return None @@ -621,7 +590,11 @@ def _parse_sparse_values_arg( if isinstance(sparse_values, GRPCSparseValues): return sparse_values - if not isinstance(sparse_values, dict) or "indices" not in sparse_values or "values" not in sparse_values: + if ( + not isinstance(sparse_values, dict) + or "indices" not in sparse_values + or "values" not in sparse_values + ): raise ValueError( "Invalid sparse values argument. Expected a dict of: {'indices': List[int], 'values': List[float]}." f"Received: {sparse_values}" diff --git a/pinecone/grpc/retry.py b/pinecone/grpc/retry.py index b2718288..836bce7d 100644 --- a/pinecone/grpc/retry.py +++ b/pinecone/grpc/retry.py @@ -83,5 +83,7 @@ class RetryConfig(NamedTuple): """Config settings related to retry""" max_attempts: int = 4 - sleep_policy: SleepPolicy = ExponentialBackoff(init_backoff_ms=100, max_backoff_ms=1600, multiplier=2) + sleep_policy: SleepPolicy = ExponentialBackoff( + init_backoff_ms=100, max_backoff_ms=1600, multiplier=2 + ) retryable_status: Optional[Tuple[grpc.StatusCode, ...]] = (grpc.StatusCode.UNAVAILABLE,) diff --git a/pinecone/grpc/sparse_values_factory.py b/pinecone/grpc/sparse_values_factory.py index d6e602b8..433feb95 100644 --- a/pinecone/grpc/sparse_values_factory.py +++ b/pinecone/grpc/sparse_values_factory.py @@ -1,5 +1,3 @@ -import numbers - from collections.abc import Mapping from typing import Union, Dict @@ -11,9 +9,7 @@ SparseValuesDictionaryExpectedError, ) -from pinecone.core.grpc.protos.vector_service_pb2 import ( - SparseValues as GRPCSparseValues, -) +from pinecone.core.grpc.protos.vector_service_pb2 import SparseValues as GRPCSparseValues from pinecone import SparseValues as NonGRPCSparseValues diff --git a/pinecone/grpc/utils.py b/pinecone/grpc/utils.py index b0981a3e..99f45460 100644 --- a/pinecone/grpc/utils.py +++ b/pinecone/grpc/utils.py @@ -1,12 +1,6 @@ +from typing import Optional import uuid -from google.protobuf.struct_pb2 import Struct - - -def _generate_request_id() -> str: - return str(uuid.uuid4()) - - from pinecone.core.openapi.data.models import ( Vector as _Vector, Usage, @@ -18,7 +12,11 @@ def _generate_request_id() -> str: NamespaceSummary, ) -from typing import Optional +from google.protobuf.struct_pb2 import Struct + + +def _generate_request_id() -> str: + return str(uuid.uuid4()) def dict_to_proto_struct(d: Optional[dict]) -> "Struct": diff --git a/pinecone/grpc/vector_factory_grpc.py b/pinecone/grpc/vector_factory_grpc.py index b2529bae..db4de016 100644 --- a/pinecone/grpc/vector_factory_grpc.py +++ b/pinecone/grpc/vector_factory_grpc.py @@ -20,10 +20,7 @@ Vector as GRPCVector, SparseValues as GRPCSparseValues, ) -from pinecone import ( - Vector as NonGRPCVector, - SparseValues as NonGRPCSparseValues, -) +from pinecone import Vector as NonGRPCVector, SparseValues as NonGRPCSparseValues class VectorFactoryGRPC: @@ -34,8 +31,7 @@ def build(item: Union[GRPCVector, NonGRPCVector, Tuple, Dict]) -> GRPCVector: elif isinstance(item, NonGRPCVector): if item.sparse_values: sv = GRPCSparseValues( - indices=item.sparse_values.indices, - values=item.sparse_values.values, + indices=item.sparse_values.indices, values=item.sparse_values.values ) return GRPCVector( id=item.id, @@ -67,9 +63,7 @@ def _tuple_to_vector(item) -> GRPCVector: ) else: return GRPCVector( - id=id, - values=convert_to_list(values), - metadata=dict_to_proto_struct(metadata or {}), + id=id, values=convert_to_list(values), metadata=dict_to_proto_struct(metadata or {}) ) @staticmethod @@ -87,10 +81,10 @@ def _dict_to_vector(item) -> GRPCVector: try: item["values"] = convert_to_list(values) except TypeError as e: - raise TypeError(f"Column `values` is expected to be a list of floats") from e + raise TypeError("Column `values` is expected to be a list of floats") from e sparse_values = item.get("sparse_values") - if sparse_values != None and not isinstance(sparse_values, GRPCSparseValues): + if sparse_values is not None and not isinstance(sparse_values, GRPCSparseValues): item["sparse_values"] = SparseValuesFactory.build(sparse_values) metadata = item.get("metadata") @@ -115,5 +109,5 @@ def _dict_to_vector(item) -> GRPCVector: if not isinstance(item["values"], Iterable) or not isinstance( item["values"].__iter__().__next__(), numbers.Real ): - raise TypeError(f"Column `values` is expected to be a list of floats") + raise TypeError("Column `values` is expected to be a list of floats") raise e diff --git a/pinecone/models/__init__.py b/pinecone/models/__init__.py index d917323a..3a8f15b5 100644 --- a/pinecone/models/__init__.py +++ b/pinecone/models/__init__.py @@ -1,7 +1,4 @@ -from .index_description import ( - ServerlessSpecDefinition, - PodSpecDefinition, -) +from .index_description import ServerlessSpecDefinition, PodSpecDefinition from .collection_description import CollectionDescription from .serverless_spec import ServerlessSpec from .pod_spec import PodSpec diff --git a/pinecone/models/collection_list.py b/pinecone/models/collection_list.py index 865a55ac..c3a8f578 100644 --- a/pinecone/models/collection_list.py +++ b/pinecone/models/collection_list.py @@ -1,7 +1,5 @@ import json -from pinecone.core.openapi.control.models import ( - CollectionList as OpenAPICollectionList, -) +from pinecone.core.openapi.control.models import CollectionList as OpenAPICollectionList class CollectionList: diff --git a/pinecone/models/index_description.py b/pinecone/models/index_description.py index 1d29b526..8518b925 100644 --- a/pinecone/models/index_description.py +++ b/pinecone/models/index_description.py @@ -1,4 +1,4 @@ -from typing import NamedTuple, Dict, Optional, Union, Literal +from typing import NamedTuple, Dict, Optional, Literal class PodSpecDefinition(NamedTuple): diff --git a/pinecone/utils/__init__.py b/pinecone/utils/__init__.py index 07d27498..999daabf 100644 --- a/pinecone/utils/__init__.py +++ b/pinecone/utils/__init__.py @@ -5,10 +5,22 @@ from .fix_tuple_length import fix_tuple_length from .convert_to_list import convert_to_list from .normalize_host import normalize_host -from .setup_openapi_client import ( - setup_openapi_client, - build_plugin_setup_client, -) +from .setup_openapi_client import setup_openapi_client, build_plugin_setup_client from .parse_args import parse_non_empty_args from .docslinks import docslinks from .repr_overrides import install_json_repr_override + +__all__ = [ + "check_kwargs", + "__version__", + "get_user_agent", + "warn_deprecated", + "fix_tuple_length", + "convert_to_list", + "normalize_host", + "setup_openapi_client", + "build_plugin_setup_client", + "parse_non_empty_args", + "docslinks", + "install_json_repr_override", +] diff --git a/pinecone/utils/constants.py b/pinecone/utils/constants.py index 30cefcb7..b2461a4c 100644 --- a/pinecone/utils/constants.py +++ b/pinecone/utils/constants.py @@ -1,5 +1,4 @@ import os -import enum from .version import __version__ diff --git a/pinecone/utils/convert_to_list.py b/pinecone/utils/convert_to_list.py index 521ee692..6898b029 100644 --- a/pinecone/utils/convert_to_list.py +++ b/pinecone/utils/convert_to_list.py @@ -11,9 +11,13 @@ def convert_to_list(obj): elif obj is None or isinstance(obj, str) or isinstance(obj, dict): # The string and dictionary classes in python can be passed to list() # but they're not going to yield sensible results for our use case. - raise ListConversionException(f"Expected a list or list-like data structure, but got: {obj}") + raise ListConversionException( + f"Expected a list or list-like data structure, but got: {obj}" + ) else: try: return list(obj) except Exception as e: - raise ListConversionException(f"Expected a list or list-like data structure, but got: {obj}") from e + raise ListConversionException( + f"Expected a list or list-like data structure, but got: {obj}" + ) from e diff --git a/pinecone/utils/deprecation_notice.py b/pinecone/utils/deprecation_notice.py index 69e33aba..03bc0837 100644 --- a/pinecone/utils/deprecation_notice.py +++ b/pinecone/utils/deprecation_notice.py @@ -1,4 +1,3 @@ -from typing import Optional import warnings diff --git a/pinecone/utils/error_handling.py b/pinecone/utils/error_handling.py index 8d6c0dd7..5cdaaaf4 100644 --- a/pinecone/utils/error_handling.py +++ b/pinecone/utils/error_handling.py @@ -11,11 +11,13 @@ def inner_func(*args, **kwargs): return func(*args, **kwargs) except MaxRetryError as e: if isinstance(e.reason, ProtocolError): - raise ProtocolError(f"Failed to connect to {e.url}; did you specify the correct index name?") from e + raise ProtocolError( + f"Failed to connect to {e.url}; did you specify the correct index name?" + ) from e else: raise except ProtocolError as e: - raise ProtocolError(f"Failed to connect; did you specify the correct index name?") from e + raise ProtocolError("Failed to connect; did you specify the correct index name?") from e # Override signature sig = inspect.signature(func) diff --git a/pinecone/utils/repr_overrides.py b/pinecone/utils/repr_overrides.py index 91ca2d95..e3dfdb66 100644 --- a/pinecone/utils/repr_overrides.py +++ b/pinecone/utils/repr_overrides.py @@ -10,4 +10,6 @@ def custom_serializer(obj): def install_json_repr_override(klass): - klass.__repr__ = lambda self: json.dumps(self.to_dict(), indent=4, sort_keys=False, default=custom_serializer) + klass.__repr__ = lambda self: json.dumps( + self.to_dict(), indent=4, sort_keys=False, default=custom_serializer + ) diff --git a/pinecone/utils/setup_openapi_client.py b/pinecone/utils/setup_openapi_client.py index 8ea1f7b3..2d268cdc 100644 --- a/pinecone/utils/setup_openapi_client.py +++ b/pinecone/utils/setup_openapi_client.py @@ -3,13 +3,7 @@ def setup_openapi_client( - api_client_klass, - api_klass, - config, - openapi_config, - pool_threads, - api_version=None, - **kwargs, + api_client_klass, api_klass, config, openapi_config, pool_threads, api_version=None, **kwargs ): # It is important that we allow the user to pass in a reference to api_client_klass # instead of creating a direct dependency on ApiClient because plugins have their @@ -37,13 +31,7 @@ def setup_openapi_client( def build_plugin_setup_client(config, openapi_config, pool_threads): def setup_plugin_client(api_client_klass, api_klass, api_version, **kwargs): return setup_openapi_client( - api_client_klass, - api_klass, - config, - openapi_config, - pool_threads, - api_version, - **kwargs, + api_client_klass, api_klass, config, openapi_config, pool_threads, api_version, **kwargs ) return setup_plugin_client diff --git a/pinecone/utils/user_agent.py b/pinecone/utils/user_agent.py index a0fc7af3..845a0ab1 100644 --- a/pinecone/utils/user_agent.py +++ b/pinecone/utils/user_agent.py @@ -21,8 +21,7 @@ def _build_source_tag_field(source_tag): def _get_user_agent(client_id, config): user_agent_details = {"urllib3": urllib3.__version__} user_agent = "{} ({})".format( - client_id, - ", ".join([f"{k}:{v}" for k, v in user_agent_details.items()]), + client_id, ", ".join([f"{k}:{v}" for k, v in user_agent_details.items()]) ) user_agent += f"; {_build_source_tag_field(config.source_tag)}" if config.source_tag else "" return user_agent diff --git a/poetry.lock b/poetry.lock index 10f3864c..2842c814 100644 --- a/poetry.lock +++ b/poetry.lock @@ -15,52 +15,6 @@ files = [ six = ">=1.6.1,<2.0" wheel = ">=0.23.0,<1.0" -[[package]] -name = "black" -version = "24.4.2" -description = "The uncompromising code formatter." -optional = false -python-versions = ">=3.8" -files = [ - {file = "black-24.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dd1b5a14e417189db4c7b64a6540f31730713d173f0b63e55fabd52d61d8fdce"}, - {file = "black-24.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e537d281831ad0e71007dcdcbe50a71470b978c453fa41ce77186bbe0ed6021"}, - {file = "black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaea3008c281f1038edb473c1aa8ed8143a5535ff18f978a318f10302b254063"}, - {file = "black-24.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:7768a0dbf16a39aa5e9a3ded568bb545c8c2727396d063bbaf847df05b08cd96"}, - {file = "black-24.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:257d724c2c9b1660f353b36c802ccece186a30accc7742c176d29c146df6e474"}, - {file = "black-24.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bdde6f877a18f24844e381d45e9947a49e97933573ac9d4345399be37621e26c"}, - {file = "black-24.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e151054aa00bad1f4e1f04919542885f89f5f7d086b8a59e5000e6c616896ffb"}, - {file = "black-24.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:7e122b1c4fb252fd85df3ca93578732b4749d9be076593076ef4d07a0233c3e1"}, - {file = "black-24.4.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:accf49e151c8ed2c0cdc528691838afd217c50412534e876a19270fea1e28e2d"}, - {file = "black-24.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:88c57dc656038f1ab9f92b3eb5335ee9b021412feaa46330d5eba4e51fe49b04"}, - {file = "black-24.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be8bef99eb46d5021bf053114442914baeb3649a89dc5f3a555c88737e5e98fc"}, - {file = "black-24.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:415e686e87dbbe6f4cd5ef0fbf764af7b89f9057b97c908742b6008cc554b9c0"}, - {file = "black-24.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bf10f7310db693bb62692609b397e8d67257c55f949abde4c67f9cc574492cc7"}, - {file = "black-24.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:98e123f1d5cfd42f886624d84464f7756f60ff6eab89ae845210631714f6db94"}, - {file = "black-24.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48a85f2cb5e6799a9ef05347b476cce6c182d6c71ee36925a6c194d074336ef8"}, - {file = "black-24.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:b1530ae42e9d6d5b670a34db49a94115a64596bc77710b1d05e9801e62ca0a7c"}, - {file = "black-24.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:37aae07b029fa0174d39daf02748b379399b909652a806e5708199bd93899da1"}, - {file = "black-24.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da33a1a5e49c4122ccdfd56cd021ff1ebc4a1ec4e2d01594fef9b6f267a9e741"}, - {file = "black-24.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef703f83fc32e131e9bcc0a5094cfe85599e7109f896fe8bc96cc402f3eb4b6e"}, - {file = "black-24.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:b9176b9832e84308818a99a561e90aa479e73c523b3f77afd07913380ae2eab7"}, - {file = "black-24.4.2-py3-none-any.whl", hash = "sha256:d36ed1124bb81b32f8614555b34cc4259c3fbc7eec17870e8ff8ded335b58d8c"}, - {file = "black-24.4.2.tar.gz", hash = "sha256:c872b53057f000085da66a19c55d68f6f8ddcac2642392ad3a355878406fbd4d"}, -] - -[package.dependencies] -click = ">=8.0.0" -mypy-extensions = ">=0.4.3" -packaging = ">=22.0" -pathspec = ">=0.9.0" -platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -uvloop = ["uvloop (>=0.15.2)"] - [[package]] name = "certifi" version = "2024.7.4" @@ -182,20 +136,6 @@ files = [ {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, ] -[[package]] -name = "click" -version = "8.1.7" -description = "Composable command line interface toolkit" -optional = false -python-versions = ">=3.7" -files = [ - {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, - {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - [[package]] name = "colorama" version = "0.4.6" @@ -837,17 +777,6 @@ files = [ numpy = {version = ">=1.26.0", markers = "python_version < \"3.13\""} types-pytz = ">=2022.1.1" -[[package]] -name = "pathspec" -version = "0.12.1" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.8" -files = [ - {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, - {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, -] - [[package]] name = "pdoc" version = "14.5.1" @@ -1383,4 +1312,4 @@ grpc = ["googleapis-common-protos", "grpcio", "grpcio", "lz4", "protobuf", "prot [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "7fcaf8376166ea98dd704c1ee4a27e09c64bfb2c598e4be8547ba3127a762f82" +content-hash = "e0d4455deaef7d3e7d0334b97c34777f7bb0ed1f36b40bd5338d8395c917b0ac" diff --git a/pyproject.toml b/pyproject.toml index 12591369..aa518299 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,19 +1,3 @@ -[tool.black] -line-length = 120 -target-version = ['py38'] -include = '\.pyi?$' -exclude = ''' -( - /( - \.git # exclude a few common directories in the - | \.mypy_cache # root of the project - | \.pytest_cache - )/ - | foo.py # also separately exclude a file named foo.py in - # the root of the project -) -''' - [tool.poetry] name = "pinecone" version = "5.3.1" @@ -55,7 +39,7 @@ urllib3 = [ { version = ">=1.26.5", python = "^3.12" } ] tqdm = ">=4.64.1" -# certifi does not follow semver. Should always be +# certifi does not follow semver. Should always be # on latest but setting a broad range to have maximum # compatibility with libraries that may pin version. certifi = ">=2019.11.17" @@ -100,7 +84,6 @@ pytest-mock = "3.6.1" pytest-timeout = "2.2.0" urllib3_mock = "0.3.3" responses = ">=0.8.1" -black = "^24.4.2" [tool.poetry.extras] grpc = ["grpcio", "googleapis-common-protos", "lz4", "protobuf", "protoc-gen-openapiv2"] @@ -108,3 +91,57 @@ grpc = ["grpcio", "googleapis-common-protos", "lz4", "protobuf", "protoc-gen-ope [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" + +[tool.ruff] +exclude = [ + ".eggs", + ".git", + ".ipynb_checkpoints", + ".mypy_cache", + ".pytest_cache", + ".ruff_cache", + ".vscode", + "_build", + "build", + "dist", + "pinecone/core", + "pinecone/core_ea", +] + +line-length = 100 +indent-width = 4 +target-version = "py38" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = true +line-ending = "auto" +docstring-code-format = false +docstring-code-line-length = "dynamic" + +[tool.ruff.lint.per-file-ignores] +# F403 Allow star imports +# F401 allow imported but unused +"__init__.py" = ["F401", "F403"] + +# E402 Allow module level import not at top of file so +# tqdm warnings can be disabled ahead of loading any code +"pinecone/__init__.py" = ["E402"] + +# E712 Allow == comparison to True/False +"tests/**" = ["E712"] diff --git a/scripts/create-index-legacy.py b/scripts/create-index-legacy.py index 6c85109d..12cb212b 100644 --- a/scripts/create-index-legacy.py +++ b/scripts/create-index-legacy.py @@ -39,7 +39,7 @@ def main(): print(f"Waiting for index {index_name} to be ready...") time.sleep(60) - print(f"Done waiting.") + print("Done waiting.") description = pinecone.describe_index(index_name) print(f"Index description: {description}") @@ -50,11 +50,11 @@ def main(): vector = random_embedding_values(dimension) vecs = [{"id": random_string(10), "values": vector} for i in range(10)] index.upsert(vectors=[vecs]) - print(f"Done upserting.") + print("Done upserting.") print(f"Beginning query of index {index_name}...") index.query(vector=random_embedding_values(dimension)) - print(f"Done querying.") + print("Done querying.") if __name__ == "__main__": diff --git a/scripts/create.py b/scripts/create.py index a8b63493..93b2c5c5 100644 --- a/scripts/create.py +++ b/scripts/create.py @@ -27,12 +27,7 @@ def main(): name=index_name, metric=read_env_var("METRIC"), dimension=int(read_env_var("DIMENSION")), - spec={ - "serverless": { - "cloud": read_env_var("CLOUD"), - "region": read_env_var("REGION"), - } - }, + spec={"serverless": {"cloud": read_env_var("CLOUD"), "region": read_env_var("REGION")}}, ) write_gh_output("index_name", index_name) diff --git a/scripts/generate_usage.py b/scripts/generate_usage.py index ba5791e0..aefc64b4 100755 --- a/scripts/generate_usage.py +++ b/scripts/generate_usage.py @@ -34,12 +34,7 @@ def create_index_if_not_exists(pc, index_name): name=index_name, metric="cosine", dimension=DIMENSION, - spec={ - "serverless": { - "cloud": read_env_var("CLOUD"), - "region": read_env_var("REGION"), - } - }, + spec={"serverless": {"cloud": read_env_var("CLOUD"), "region": read_env_var("REGION")}}, ) @@ -78,14 +73,14 @@ def main(): # Fetch some vectors ids_to_fetch = random.sample(upserted_ids, k=random.randint(1, 20)) print("Fetching {} vectors".format(len(ids_to_fetch))) - fetched_vectors = index.fetch(ids=ids_to_fetch) + index.fetch(ids=ids_to_fetch) # Query some vectors print("Querying 10 times") for i in range(10): # Query by vector values query_vector = random_embedding_values(DIMENSION) - query_results = index.query(vector=query_vector, top_k=10) + index.query(vector=query_vector, top_k=10) # Delete some vectors print("Deleting some vectors") diff --git a/tests/integration/control/pod/conftest.py b/tests/integration/control/pod/conftest.py index 35f6613f..4748f28e 100644 --- a/tests/integration/control/pod/conftest.py +++ b/tests/integration/control/pod/conftest.py @@ -9,7 +9,9 @@ @pytest.fixture() def client(): api_key = get_environment_var("PINECONE_API_KEY") - return Pinecone(api_key=api_key, additional_headers={"sdk-test-suite": "pinecone-python-client"}) + return Pinecone( + api_key=api_key, additional_headers={"sdk-test-suite": "pinecone-python-client"} + ) @pytest.fixture() @@ -74,7 +76,8 @@ def random_string(): @pytest.fixture(scope="session") def reusable_collection(): pc = Pinecone( - api_key=get_environment_var("PINECONE_API_KEY"), additional_headers={"sdk-test-suite": "pinecone-python-client"} + api_key=get_environment_var("PINECONE_API_KEY"), + additional_headers={"sdk-test-suite": "pinecone-python-client"}, ) index_name = "temp-index-" + random_string() dimension = int(get_environment_var("DIMENSION")) @@ -83,15 +86,15 @@ def reusable_collection(): name=index_name, dimension=dimension, metric=get_environment_var("METRIC"), - spec=PodSpec( - environment=get_environment_var("PINECONE_ENVIRONMENT"), - ), + spec=PodSpec(environment=get_environment_var("PINECONE_ENVIRONMENT")), ) print(f"Created index {index_name}. Waiting 10 seconds to make sure it's ready...") time.sleep(10) num_vectors = 10 - vectors = [(str(i), [random.uniform(0, 1) for _ in range(dimension)]) for i in range(num_vectors)] + vectors = [ + (str(i), [random.uniform(0, 1) for _ in range(dimension)]) for i in range(num_vectors) + ] index = pc.Index(index_name) index.upsert(vectors=vectors) @@ -103,7 +106,9 @@ def reusable_collection(): desc = pc.describe_collection(collection_name) collection_ready = desc["status"] while collection_ready.lower() != "ready" and time_waited < 120: - print(f"Waiting for collection {collection_name} to be ready. Waited {time_waited} seconds...") + print( + f"Waiting for collection {collection_name} to be ready. Waited {time_waited} seconds..." + ) time.sleep(5) time_waited += 5 desc = pc.describe_collection(collection_name) @@ -127,7 +132,9 @@ def cleanup(client, index_name): time_waited = 0 while index_exists(index_name, client) and time_waited < 120: - print(f"Waiting for index {index_name} to be ready to delete. Waited {time_waited} seconds..") + print( + f"Waiting for index {index_name} to be ready to delete. Waited {time_waited} seconds.." + ) time_waited += 5 time.sleep(5) try: diff --git a/tests/integration/control/pod/test_collections.py b/tests/integration/control/pod/test_collections.py index 10eb0499..fd369fef 100644 --- a/tests/integration/control/pod/test_collections.py +++ b/tests/integration/control/pod/test_collections.py @@ -28,7 +28,9 @@ def test_index_to_collection_to_index_happy_path( time_waited = 0 collection_ready = desc["status"] while collection_ready.lower() != "ready" and time_waited < 120: - print(f"Waiting for collection {collection_name} to be ready. Waited {time_waited} seconds...") + print( + f"Waiting for collection {collection_name} to be ready. Waited {time_waited} seconds..." + ) time.sleep(5) time_waited += 5 desc = client.describe_collection(collection_name) @@ -45,7 +47,7 @@ def test_index_to_collection_to_index_happy_path( assert desc["environment"] == environment assert desc["dimension"] == dimension assert desc["vector_count"] == num_vectors - assert desc["size"] != None + assert desc["size"] is not None assert desc["size"] > 0 # Create index from collection diff --git a/tests/integration/control/pod/test_collections_errors.py b/tests/integration/control/pod/test_collections_errors.py index f91835eb..2e2d7965 100644 --- a/tests/integration/control/pod/test_collections_errors.py +++ b/tests/integration/control/pod/test_collections_errors.py @@ -1,7 +1,6 @@ import string import random import pytest -import time from pinecone import PodSpec @@ -10,7 +9,9 @@ def random_string(): class TestCollectionErrorCases: - def test_create_index_with_nonexistent_source_collection(self, client, dimension, metric, environment): + def test_create_index_with_nonexistent_source_collection( + self, client, dimension, metric, environment + ): with pytest.raises(Exception) as e: index_name = "from-nonexistent-coll-" + random_string() client.create_index( @@ -22,7 +23,9 @@ def test_create_index_with_nonexistent_source_collection(self, client, dimension client.delete_index(index_name, -1) assert "Resource doesnotexist not found" in str(e.value) - def test_create_index_in_mismatched_environment(self, client, dimension, metric, environment, reusable_collection): + def test_create_index_in_mismatched_environment( + self, client, dimension, metric, environment, reusable_collection + ): envs = [ "eastus-azure", "eu-west4-gcp", @@ -51,7 +54,9 @@ def test_create_index_in_mismatched_environment(self, client, dimension, metric, assert "Source collection must be in the same environment as the index" in str(e.value) @pytest.mark.skip(reason="Bug reported in #global-cps") - def test_create_index_with_mismatched_dimension(self, client, dimension, metric, environment, reusable_collection): + def test_create_index_with_mismatched_dimension( + self, client, dimension, metric, environment, reusable_collection + ): with pytest.raises(Exception) as e: client.create_index( name="from-coll-" + random_string(), diff --git a/tests/integration/control/pod/test_configure_pod_index.py b/tests/integration/control/pod/test_configure_pod_index.py index 72246801..7dc7fd4e 100644 --- a/tests/integration/control/pod/test_configure_pod_index.py +++ b/tests/integration/control/pod/test_configure_pod_index.py @@ -1,4 +1,3 @@ -import pytest import time diff --git a/tests/integration/control/pod/test_deletion_protection.py b/tests/integration/control/pod/test_deletion_protection.py index f1aec19d..2a28aa1f 100644 --- a/tests/integration/control/pod/test_deletion_protection.py +++ b/tests/integration/control/pod/test_deletion_protection.py @@ -5,7 +5,10 @@ class TestDeletionProtection: def test_deletion_protection(self, client, index_name, environment): client.create_index( - name=index_name, dimension=2, deletion_protection="enabled", spec=PodSpec(environment=environment) + name=index_name, + dimension=2, + deletion_protection="enabled", + spec=PodSpec(environment=environment), ) desc = client.describe_index(index_name) print(desc.deletion_protection) @@ -24,7 +27,10 @@ def test_deletion_protection(self, client, index_name, environment): def test_configure_index_with_deletion_protection(self, client, index_name, environment): client.create_index( - name=index_name, dimension=2, deletion_protection="enabled", spec=PodSpec(environment=environment) + name=index_name, + dimension=2, + deletion_protection="enabled", + spec=PodSpec(environment=environment), ) desc = client.describe_index(index_name) assert desc.deletion_protection == "enabled" diff --git a/tests/integration/control/serverless/conftest.py b/tests/integration/control/serverless/conftest.py index 1a6ee0ba..ee1347c0 100644 --- a/tests/integration/control/serverless/conftest.py +++ b/tests/integration/control/serverless/conftest.py @@ -9,7 +9,9 @@ @pytest.fixture() def client(): api_key = get_environment_var("PINECONE_API_KEY") - return Pinecone(api_key=api_key, additional_headers={"sdk-test-suite": "pinecone-python-client"}) + return Pinecone( + api_key=api_key, additional_headers={"sdk-test-suite": "pinecone-python-client"} + ) @pytest.fixture() @@ -67,7 +69,14 @@ def notready_pod_index(client, index_name, create_pod_index_params): def delete_with_retry(client, index_name, retries=0, sleep_interval=5): - print("Deleting index " + index_name + ", retry " + str(retries) + ", next sleep interval " + str(sleep_interval)) + print( + "Deleting index " + + index_name + + ", retry " + + str(retries) + + ", next sleep interval " + + str(sleep_interval) + ) try: client.delete_index(index_name, -1) except NotFoundException: @@ -94,7 +103,7 @@ def cleanup(client, index_name): try: client.delete_index(index_name, -1) - except: + except Exception: pass @@ -108,5 +117,5 @@ def cleanup_all(): if index.name.startswith(buildNumber): try: delete_with_retry(client, index.name) - except: + except Exception: pass diff --git a/tests/integration/control/serverless/test_create_index_timeouts.py b/tests/integration/control/serverless/test_create_index_timeouts.py index 89344029..f01df5d0 100644 --- a/tests/integration/control/serverless/test_create_index_timeouts.py +++ b/tests/integration/control/serverless/test_create_index_timeouts.py @@ -10,7 +10,9 @@ def test_create_index_default_timeout(self, client, create_sl_index_params): assert desc.status.ready == True def test_create_index_when_timeout_set(self, client, create_sl_index_params): - create_sl_index_params["timeout"] = 1000 # effectively infinite, but different code path from None + create_sl_index_params["timeout"] = ( + 1000 # effectively infinite, but different code path from None + ) client.create_index(**create_sl_index_params) desc = client.describe_index(create_sl_index_params["name"]) assert desc.status.ready == True diff --git a/tests/integration/control/serverless/test_deletion_protection.py b/tests/integration/control/serverless/test_deletion_protection.py index 0a656e8e..9d6c3c4e 100644 --- a/tests/integration/control/serverless/test_deletion_protection.py +++ b/tests/integration/control/serverless/test_deletion_protection.py @@ -19,13 +19,17 @@ def test_deletion_protection(self, client, create_sl_index_params): client.delete_index(name) @pytest.mark.parametrize("deletion_protection", ["invalid", None]) - def test_deletion_protection_invalid_options(self, client, create_sl_index_params, deletion_protection): + def test_deletion_protection_invalid_options( + self, client, create_sl_index_params, deletion_protection + ): with pytest.raises(Exception) as e: client.create_index(**create_sl_index_params, deletion_protection=deletion_protection) assert "deletion_protection must be either 'enabled' or 'disabled'" in str(e.value) @pytest.mark.parametrize("deletion_protection", ["invalid"]) - def test_configure_deletion_protection_invalid_options(self, client, create_sl_index_params, deletion_protection): + def test_configure_deletion_protection_invalid_options( + self, client, create_sl_index_params, deletion_protection + ): with pytest.raises(Exception) as e: client.create_index(**create_sl_index_params, deletion_protection=deletion_protection) assert "deletion_protection must be either 'enabled' or 'disabled'" in str(e.value) diff --git a/tests/integration/control/serverless/test_describe_index.py b/tests/integration/control/serverless/test_describe_index.py index 6b3fae4a..3ef1b446 100644 --- a/tests/integration/control/serverless/test_describe_index.py +++ b/tests/integration/control/serverless/test_describe_index.py @@ -1,4 +1,3 @@ -import pytest from pinecone import IndexModel @@ -6,14 +5,20 @@ class TestDescribeIndex: def test_describe_index_when_ready(self, client, ready_sl_index, create_sl_index_params): description = client.describe_index(ready_sl_index) - assert type(description) == IndexModel + assert isinstance(description, IndexModel) assert description.name == ready_sl_index assert description.dimension == create_sl_index_params["dimension"] assert description.metric == create_sl_index_params["metric"] - assert description.spec.serverless["cloud"] == create_sl_index_params["spec"]["serverless"]["cloud"] - assert description.spec.serverless["region"] == create_sl_index_params["spec"]["serverless"]["region"] - - assert type(description.host) == str + assert ( + description.spec.serverless["cloud"] + == create_sl_index_params["spec"]["serverless"]["cloud"] + ) + assert ( + description.spec.serverless["region"] + == create_sl_index_params["spec"]["serverless"]["region"] + ) + + assert isinstance(description.host, str) assert description.host != "" assert ready_sl_index in description.host @@ -23,14 +28,20 @@ def test_describe_index_when_ready(self, client, ready_sl_index, create_sl_index def test_describe_index_when_not_ready(self, client, notready_sl_index, create_sl_index_params): description = client.describe_index(notready_sl_index) - assert type(description) == IndexModel + assert isinstance(description, IndexModel) assert description.name == notready_sl_index assert description.dimension == create_sl_index_params["dimension"] assert description.metric == create_sl_index_params["metric"] - assert description.spec.serverless["cloud"] == create_sl_index_params["spec"]["serverless"]["cloud"] - assert description.spec.serverless["region"] == create_sl_index_params["spec"]["serverless"]["region"] - - assert type(description.host) == str + assert ( + description.spec.serverless["cloud"] + == create_sl_index_params["spec"]["serverless"]["cloud"] + ) + assert ( + description.spec.serverless["region"] + == create_sl_index_params["spec"]["serverless"]["region"] + ) + + assert isinstance(description.host, str) assert description.host != "" assert notready_sl_index in description.host diff --git a/tests/integration/control/serverless/test_list_indexes.py b/tests/integration/control/serverless/test_list_indexes.py index 29136db5..1e22d4e8 100644 --- a/tests/integration/control/serverless/test_list_indexes.py +++ b/tests/integration/control/serverless/test_list_indexes.py @@ -2,12 +2,16 @@ class TestListIndexes: - def test_list_indexes_includes_ready_indexes(self, client, ready_sl_index, create_sl_index_params): + def test_list_indexes_includes_ready_indexes( + self, client, ready_sl_index, create_sl_index_params + ): list_response = client.list_indexes() assert len(list_response.indexes) != 0 - assert type(list_response.indexes[0]) == IndexModel + assert isinstance(list_response.indexes[0], IndexModel) - created_index = [index for index in list_response.indexes if index.name == ready_sl_index][0] + created_index = [index for index in list_response.indexes if index.name == ready_sl_index][ + 0 + ] assert created_index.name == ready_sl_index assert created_index.dimension == create_sl_index_params["dimension"] assert created_index.metric == create_sl_index_params["metric"] @@ -16,8 +20,10 @@ def test_list_indexes_includes_ready_indexes(self, client, ready_sl_index, creat def test_list_indexes_includes_not_ready_indexes(self, client, notready_sl_index): list_response = client.list_indexes() assert len(list_response.indexes) != 0 - assert type(list_response.indexes[0]) == IndexModel + assert isinstance(list_response.indexes[0], IndexModel) - created_index = [index for index in list_response.indexes if index.name == notready_sl_index][0] + created_index = [ + index for index in list_response.indexes if index.name == notready_sl_index + ][0] assert created_index.name == notready_sl_index assert notready_sl_index in created_index.name diff --git a/tests/integration/data/conftest.py b/tests/integration/data/conftest.py index b1f8d95c..ef0ab62f 100644 --- a/tests/integration/data/conftest.py +++ b/tests/integration/data/conftest.py @@ -30,7 +30,9 @@ def build_client(): else: from pinecone import Pinecone - return Pinecone(api_key=api_key(), additional_headers={"sdk-test-suite": "pinecone-python-client"}) + return Pinecone( + api_key=api_key(), additional_headers={"sdk-test-suite": "pinecone-python-client"} + ) @pytest.fixture(scope="session") diff --git a/tests/integration/data/seed.py b/tests/integration/data/seed.py index 1b3efadf..79d2665f 100644 --- a/tests/integration/data/seed.py +++ b/tests/integration/data/seed.py @@ -7,16 +7,24 @@ def setup_data(idx, target_namespace, wait): # Upsert without metadata idx.upsert( - vectors=[("1", embedding_values(2)), ("2", embedding_values(2)), ("3", embedding_values(2))], + vectors=[ + ("1", embedding_values(2)), + ("2", embedding_values(2)), + ("3", embedding_values(2)), + ], namespace=target_namespace, ) # Upsert with metadata idx.upsert( vectors=[ - Vector(id="4", values=embedding_values(2), metadata={"genre": "action", "runtime": 120}), + Vector( + id="4", values=embedding_values(2), metadata={"genre": "action", "runtime": 120} + ), Vector(id="5", values=embedding_values(2), metadata={"genre": "comedy", "runtime": 90}), - Vector(id="6", values=embedding_values(2), metadata={"genre": "romance", "runtime": 240}), + Vector( + id="6", values=embedding_values(2), metadata={"genre": "romance", "runtime": 240} + ), ], namespace=target_namespace, ) @@ -40,7 +48,10 @@ def setup_data(idx, target_namespace, wait): def setup_list_data(idx, target_namespace, wait): # Upsert a bunch more stuff for testing list pagination for i in range(0, 1000, 50): - idx.upsert(vectors=[(str(i + d), embedding_values(2)) for d in range(50)], namespace=target_namespace) + idx.upsert( + vectors=[(str(i + d), embedding_values(2)) for d in range(50)], + namespace=target_namespace, + ) if wait: poll_fetch_for_ids_in_namespace(idx, ids=["999"], namespace=target_namespace) @@ -59,7 +70,26 @@ def weird_invalid_ids(): ] emojis = list("🌲🍦") two_byte = list("田中さんにあげて下さい") - quotes = ["‘", "’", "“", "”", "„", "‟", "‹", "›", "❛", "❜", "❝", "❞", "❮", "❯", """, "'", "「", "」"] + quotes = [ + "‘", + "’", + "“", + "”", + "„", + "‟", + "‹", + "›", + "❛", + "❜", + "❝", + "❞", + "❮", + "❯", + """, + "'", + "「", + "」", + ] return invisible + emojis + two_byte + quotes @@ -103,15 +133,7 @@ def weird_valid_ids(): ] ids.extend(script_injection) - unwanted_interpolation = [ - "$HOME", - "$ENV{'HOME'}", - "%d", - "%s", - "%n", - "%x", - "{0}", - ] + unwanted_interpolation = ["$HOME", "$ENV{'HOME'}", "%d", "%s", "%n", "%x", "{0}"] ids.extend(unwanted_interpolation) return ids diff --git a/tests/integration/data/test_fetch.py b/tests/integration/data/test_fetch.py index a1646f38..c1b26cfc 100644 --- a/tests/integration/data/test_fetch.py +++ b/tests/integration/data/test_fetch.py @@ -13,8 +13,8 @@ def test_fetch_multiple_by_id(self, idx, namespace, use_nondefault_namespace): results = idx.fetch(ids=["1", "2", "4"], namespace=target_namespace) assert isinstance(results, FetchResponse) == True - assert results.usage != None - assert results.usage["read_units"] != None + assert results.usage is not None + assert results.usage["read_units"] is not None assert results.usage["read_units"] > 0 assert results.namespace == target_namespace @@ -22,13 +22,13 @@ def test_fetch_multiple_by_id(self, idx, namespace, use_nondefault_namespace): assert results.vectors["1"].id == "1" assert results.vectors["2"].id == "2" # Metadata included, if set - assert results.vectors["1"].metadata == None - assert results.vectors["2"].metadata == None - assert results.vectors["4"].metadata != None + assert results.vectors["1"].metadata is None + assert results.vectors["2"].metadata is None + assert results.vectors["4"].metadata is not None assert results.vectors["4"].metadata["genre"] == "action" assert results.vectors["4"].metadata["runtime"] == 120 # Values included - assert results.vectors["1"].values != None + assert results.vectors["1"].values is not None assert len(results.vectors["1"].values) == self.expected_dimension @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) @@ -39,8 +39,8 @@ def test_fetch_single_by_id(self, idx, namespace, use_nondefault_namespace): assert results.namespace == target_namespace assert len(results.vectors) == 1 assert results.vectors["1"].id == "1" - assert results.vectors["1"].metadata == None - assert results.vectors["1"].values != None + assert results.vectors["1"].metadata is None + assert results.vectors["1"].values is not None assert len(results.vectors["1"].values) == self.expected_dimension @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) @@ -74,5 +74,5 @@ def test_fetch_unspecified_namespace(self, idx): results = idx.fetch(ids=["1", "4"]) assert results.namespace == "" assert results.vectors["1"].id == "1" - assert results.vectors["1"].values != None - assert results.vectors["4"].metadata != None + assert results.vectors["1"].values is not None + assert results.vectors["4"].metadata is not None diff --git a/tests/integration/data/test_list.py b/tests/integration/data/test_list.py index 578afe73..47a18a90 100644 --- a/tests/integration/data/test_list.py +++ b/tests/integration/data/test_list.py @@ -4,7 +4,7 @@ class TestListPaginated: def test_list_when_no_results(self, idx): results = idx.list_paginated(namespace="no-results") - assert results != None + assert results is not None assert results.namespace == "no-results" assert len(results.vectors) == 0 # assert results.pagination == None @@ -12,7 +12,7 @@ def test_list_when_no_results(self, idx): def test_list_no_args(self, idx): results = idx.list_paginated() - assert results != None + assert results is not None assert len(results.vectors) == 9 assert results.namespace == "" # assert results.pagination == None @@ -20,11 +20,11 @@ def test_list_no_args(self, idx): def test_list_when_limit(self, idx, list_namespace): results = idx.list_paginated(limit=10, namespace=list_namespace) - assert results != None + assert results is not None assert len(results.vectors) == 10 assert results.namespace == list_namespace - assert results.pagination != None - assert results.pagination.next != None + assert results.pagination is not None + assert results.pagination.next is not None assert isinstance(results.pagination.next, str) assert results.pagination.next != "" @@ -34,7 +34,10 @@ def test_list_when_using_pagination(self, idx, list_namespace): prefix="99", limit=5, namespace=list_namespace, pagination_token=results.pagination.next ) next_next_results = idx.list_paginated( - prefix="99", limit=5, namespace=list_namespace, pagination_token=next_results.pagination.next + prefix="99", + limit=5, + namespace=list_namespace, + pagination_token=next_results.pagination.next, ) assert results.namespace == list_namespace @@ -54,7 +57,7 @@ def test_list_with_defaults(self, idx): page_count = 0 for ids in idx.list(): page_count += 1 - assert ids != None + assert ids is not None page_sizes.append(len(ids)) pages.append(ids) @@ -67,9 +70,21 @@ def test_list(self, idx, list_namespace): page_count = 0 for ids in results: page_count += 1 - assert ids != None + assert ids is not None assert len(ids) == 11 - assert ids == ["99", "990", "991", "992", "993", "994", "995", "996", "997", "998", "999"] + assert ids == [ + "99", + "990", + "991", + "992", + "993", + "994", + "995", + "996", + "997", + "998", + "999", + ] assert page_count == 1 def test_list_when_no_results_for_prefix(self, idx, list_namespace): @@ -91,7 +106,7 @@ def test_list_when_multiple_pages(self, idx, list_namespace): for ids in idx.list(prefix="99", limit=5, namespace=list_namespace): page_count += 1 - assert ids != None + assert ids is not None page_sizes.append(len(ids)) pages.append(ids) diff --git a/tests/integration/data/test_list_errors.py b/tests/integration/data/test_list_errors.py index f0e9e1bf..03939cf2 100644 --- a/tests/integration/data/test_list_errors.py +++ b/tests/integration/data/test_list_errors.py @@ -13,5 +13,7 @@ def test_list_change_prefix_while_fetching_next_page(self, idx, list_namespace): def test_list_change_namespace_while_fetching_next_page(self, idx, namespace): results = idx.list_paginated(limit=5, namespace=namespace) with pytest.raises(PineconeException) as e: - idx.list_paginated(limit=5, namespace="new-namespace", pagination_token=results.pagination.next) + idx.list_paginated( + limit=5, namespace="new-namespace", pagination_token=results.pagination.next + ) assert "namespace" in str(e.value) diff --git a/tests/integration/data/test_query.py b/tests/integration/data/test_query.py index 960ad920..7ff75f09 100644 --- a/tests/integration/data/test_query.py +++ b/tests/integration/data/test_query.py @@ -20,13 +20,13 @@ def test_query_by_id(self, idx, namespace, use_nondefault_namespace): assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace - assert results.usage != None - assert results.usage["read_units"] != None + assert results.usage is not None + assert results.usage["read_units"] is not None assert results.usage["read_units"] > 0 # By default, does not include values or metadata record_with_metadata = find_by_id(results.matches, "4") - assert record_with_metadata.metadata == None + assert record_with_metadata.metadata is None assert record_with_metadata.values == [] def test_query_by_vector(self, idx, namespace, use_nondefault_namespace): @@ -39,34 +39,44 @@ def test_query_by_vector(self, idx, namespace, use_nondefault_namespace): def test_query_by_vector_include_values(self, idx, namespace, use_nondefault_namespace): target_namespace = namespace if use_nondefault_namespace else "" - results = idx.query(vector=embedding_values(2), namespace=target_namespace, include_values=True, top_k=10) + results = idx.query( + vector=embedding_values(2), namespace=target_namespace, include_values=True, top_k=10 + ) assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace assert len(results.matches) > 0 - assert results.matches[0].values != None + assert results.matches[0].values is not None assert len(results.matches[0].values) == self.expected_dimension def test_query_by_vector_include_metadata(self, idx, namespace, use_nondefault_namespace): target_namespace = namespace if use_nondefault_namespace else "" - results = idx.query(vector=embedding_values(2), namespace=target_namespace, include_metadata=True, top_k=10) + results = idx.query( + vector=embedding_values(2), namespace=target_namespace, include_metadata=True, top_k=10 + ) assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace - matches_with_metadata = [match for match in results.matches if match.metadata != None] + matches_with_metadata = [match for match in results.matches if match.metadata is not None] assert len(matches_with_metadata) == 3 assert find_by_id(results.matches, "4").metadata["genre"] == "action" - def test_query_by_vector_include_values_and_metadata(self, idx, namespace, use_nondefault_namespace): + def test_query_by_vector_include_values_and_metadata( + self, idx, namespace, use_nondefault_namespace + ): target_namespace = namespace if use_nondefault_namespace else "" results = idx.query( - vector=embedding_values(2), namespace=target_namespace, include_values=True, include_metadata=True, top_k=10 + vector=embedding_values(2), + namespace=target_namespace, + include_values=True, + include_metadata=True, + top_k=10, ) assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace - matches_with_metadata = [match for match in results.matches if match.metadata != None] + matches_with_metadata = [match for match in results.matches if match.metadata is not None] assert len(matches_with_metadata) == 3 assert find_by_id(results.matches, "4").metadata["genre"] == "action" assert len(results.matches[0].values) == self.expected_dimension @@ -85,7 +95,9 @@ class TestQueryWithFilter: def test_query_by_id_with_filter(self, idx, namespace, use_nondefault_namespace): target_namespace = namespace if use_nondefault_namespace else "" - results = idx.query(id="1", namespace=target_namespace, filter={"genre": "action"}, top_k=10) + results = idx.query( + id="1", namespace=target_namespace, filter={"genre": "action"}, top_k=10 + ) assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace assert len(results.matches) == 1 @@ -97,12 +109,14 @@ def test_query_by_id_with_filter_gt(self, idx, namespace, use_nondefault_namespa # Vector(id='4', values=embedding_values(2), metadata={'genre': 'action', 'runtime': 120 }), # Vector(id='5', values=embedding_values(2), metadata={'genre': 'comedy', 'runtime': 90 }), # Vector(id='6', values=embedding_values(2), metadata={'genre': 'romance', 'runtime': 240 }) - results = idx.query(id="1", namespace=target_namespace, filter={"runtime": {"$gt": 100}}, top_k=10) + results = idx.query( + id="1", namespace=target_namespace, filter={"runtime": {"$gt": 100}}, top_k=10 + ) assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace assert len(results.matches) == 2 - assert find_by_id(results.matches, "4") != None - assert find_by_id(results.matches, "6") != None + assert find_by_id(results.matches, "4") is not None + assert find_by_id(results.matches, "6") is not None def test_query_by_id_with_filter_gte(self, idx, namespace, use_nondefault_namespace): target_namespace = namespace if use_nondefault_namespace else "" @@ -110,13 +124,15 @@ def test_query_by_id_with_filter_gte(self, idx, namespace, use_nondefault_namesp # Vector(id='4', values=embedding_values(2), metadata={'genre': 'action', 'runtime': 120 }), # Vector(id='5', values=embedding_values(2), metadata={'genre': 'comedy', 'runtime': 90 }), # Vector(id='6', values=embedding_values(2), metadata={'genre': 'romance', 'runtime': 240 }) - results = idx.query(id="1", namespace=target_namespace, filter={"runtime": {"$gte": 90}}, top_k=10) + results = idx.query( + id="1", namespace=target_namespace, filter={"runtime": {"$gte": 90}}, top_k=10 + ) assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace assert len(results.matches) == 3 - assert find_by_id(results.matches, "4") != None - assert find_by_id(results.matches, "5") != None - assert find_by_id(results.matches, "6") != None + assert find_by_id(results.matches, "4") is not None + assert find_by_id(results.matches, "5") is not None + assert find_by_id(results.matches, "6") is not None def test_query_by_id_with_filter_lt(self, idx, namespace, use_nondefault_namespace): target_namespace = namespace if use_nondefault_namespace else "" @@ -124,11 +140,13 @@ def test_query_by_id_with_filter_lt(self, idx, namespace, use_nondefault_namespa # Vector(id='4', values=embedding_values(2), metadata={'genre': 'action', 'runtime': 120 }), # Vector(id='5', values=embedding_values(2), metadata={'genre': 'comedy', 'runtime': 90 }), # Vector(id='6', values=embedding_values(2), metadata={'genre': 'romance', 'runtime': 240 }) - results = idx.query(id="1", namespace=target_namespace, filter={"runtime": {"$lt": 100}}, top_k=10) + results = idx.query( + id="1", namespace=target_namespace, filter={"runtime": {"$lt": 100}}, top_k=10 + ) assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace assert len(results.matches) == 1 - assert find_by_id(results.matches, "5") != None + assert find_by_id(results.matches, "5") is not None def test_query_by_id_with_filter_lte(self, idx, namespace, use_nondefault_namespace): target_namespace = namespace if use_nondefault_namespace else "" @@ -136,12 +154,14 @@ def test_query_by_id_with_filter_lte(self, idx, namespace, use_nondefault_namesp # Vector(id='4', values=embedding_values(2), metadata={'genre': 'action', 'runtime': 120 }), # Vector(id='5', values=embedding_values(2), metadata={'genre': 'comedy', 'runtime': 90 }), # Vector(id='6', values=embedding_values(2), metadata={'genre': 'romance', 'runtime': 240 }) - results = idx.query(id="1", namespace=target_namespace, filter={"runtime": {"$lte": 120}}, top_k=10) + results = idx.query( + id="1", namespace=target_namespace, filter={"runtime": {"$lte": 120}}, top_k=10 + ) assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace assert len(results.matches) == 2 - assert find_by_id(results.matches, "4") != None - assert find_by_id(results.matches, "5") != None + assert find_by_id(results.matches, "4") is not None + assert find_by_id(results.matches, "5") is not None def test_query_by_id_with_filter_in(self, idx, namespace, use_nondefault_namespace): target_namespace = namespace if use_nondefault_namespace else "" @@ -149,11 +169,13 @@ def test_query_by_id_with_filter_in(self, idx, namespace, use_nondefault_namespa # Vector(id='4', values=embedding_values(2), metadata={'genre': 'action', 'runtime': 120 }), # Vector(id='5', values=embedding_values(2), metadata={'genre': 'comedy', 'runtime': 90 }), # Vector(id='6', values=embedding_values(2), metadata={'genre': 'romance', 'runtime': 240 }) - results = idx.query(id="1", namespace=target_namespace, filter={"genre": {"$in": ["romance"]}}, top_k=10) + results = idx.query( + id="1", namespace=target_namespace, filter={"genre": {"$in": ["romance"]}}, top_k=10 + ) assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace assert len(results.matches) == 1 - assert find_by_id(results.matches, "6") != None + assert find_by_id(results.matches, "6") is not None @pytest.mark.skip(reason="Seems like a bug in the server") def test_query_by_id_with_filter_nin(self, idx, namespace, use_nondefault_namespace): @@ -162,12 +184,14 @@ def test_query_by_id_with_filter_nin(self, idx, namespace, use_nondefault_namesp # Vector(id='4', values=embedding_values(2), metadata={'genre': 'action', 'runtime': 120 }), # Vector(id='5', values=embedding_values(2), metadata={'genre': 'comedy', 'runtime': 90 }), # Vector(id='6', values=embedding_values(2), metadata={'genre': 'romance', 'runtime': 240 }) - results = idx.query(id="1", namespace=target_namespace, filter={"genre": {"$nin": ["romance"]}}, top_k=10) + results = idx.query( + id="1", namespace=target_namespace, filter={"genre": {"$nin": ["romance"]}}, top_k=10 + ) assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace assert len(results.matches) == 2 - assert find_by_id(results.matches, "4") != None - assert find_by_id(results.matches, "5") != None + assert find_by_id(results.matches, "4") is not None + assert find_by_id(results.matches, "5") is not None def test_query_by_id_with_filter_eq(self, idx, namespace, use_nondefault_namespace): target_namespace = namespace if use_nondefault_namespace else "" @@ -175,11 +199,13 @@ def test_query_by_id_with_filter_eq(self, idx, namespace, use_nondefault_namespa # Vector(id='4', values=embedding_values(2), metadata={'genre': 'action', 'runtime': 120 }), # Vector(id='5', values=embedding_values(2), metadata={'genre': 'comedy', 'runtime': 90 }), # Vector(id='6', values=embedding_values(2), metadata={'genre': 'romance', 'runtime': 240 }) - results = idx.query(id="1", namespace=target_namespace, filter={"genre": {"$eq": "action"}}, top_k=10) + results = idx.query( + id="1", namespace=target_namespace, filter={"genre": {"$eq": "action"}}, top_k=10 + ) assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace assert len(results.matches) == 1 - assert find_by_id(results.matches, "4") != None + assert find_by_id(results.matches, "4") is not None @pytest.mark.skip(reason="Seems like a bug in the server") def test_query_by_id_with_filter_ne(self, idx, namespace, use_nondefault_namespace): @@ -188,9 +214,11 @@ def test_query_by_id_with_filter_ne(self, idx, namespace, use_nondefault_namespa # Vector(id='4', values=embedding_values(2), metadata={'genre': 'action', 'runtime': 120 }), # Vector(id='5', values=embedding_values(2), metadata={'genre': 'comedy', 'runtime': 90 }), # Vector(id='6', values=embedding_values(2), metadata={'genre': 'romance', 'runtime': 240 }) - results = idx.query(id="1", namespace=target_namespace, filter={"genre": {"$ne": "action"}}, top_k=10) + results = idx.query( + id="1", namespace=target_namespace, filter={"genre": {"$ne": "action"}}, top_k=10 + ) assert isinstance(results, QueryResponse) == True assert results.namespace == target_namespace assert len(results.matches) == 2 - assert find_by_id(results.matches, "5") != None - assert find_by_id(results.matches, "6") != None + assert find_by_id(results.matches, "5") is not None + assert find_by_id(results.matches, "6") is not None diff --git a/tests/integration/data/test_query_errors.py b/tests/integration/data/test_query_errors.py index 293f1e78..48a8e5f5 100644 --- a/tests/integration/data/test_query_errors.py +++ b/tests/integration/data/test_query_errors.py @@ -15,13 +15,13 @@ def test_query_with_invalid_vector(self, idx, namespace, use_nondefault_namespac def test_query_with_invalid_id(self, idx, namespace, use_nondefault_namespace): target_namespace = namespace if use_nondefault_namespace else "" - with pytest.raises(TypeError) as e: + with pytest.raises(TypeError): idx.query(id=1, namespace=target_namespace, top_k=10) def test_query_with_invalid_top_k(self, idx, namespace, use_nondefault_namespace): target_namespace = namespace if use_nondefault_namespace else "" - with pytest.raises((PineconeException, ValueError)) as e: + with pytest.raises((PineconeException, ValueError)): idx.query(id="1", namespace=target_namespace, top_k=-1) def test_query_with_missing_top_k(self, idx, namespace, use_nondefault_namespace): diff --git a/tests/integration/data/test_upsert.py b/tests/integration/data/test_upsert.py index ae2975ff..cc87dbce 100644 --- a/tests/integration/data/test_upsert.py +++ b/tests/integration/data/test_upsert.py @@ -44,7 +44,9 @@ def test_upsert_to_namespace(idx, namespace, use_nondefault_namespace): @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) -@pytest.mark.skipif(os.getenv("METRIC") != "dotproduct", reason="Only metric=dotprodouct indexes support hybrid") +@pytest.mark.skipif( + os.getenv("METRIC") != "dotproduct", reason="Only metric=dotprodouct indexes support hybrid" +) def test_upsert_to_namespace_with_sparse_embedding_values(idx, namespace, use_nondefault_namespace): target_namespace = namespace if use_nondefault_namespace else "" @@ -52,8 +54,10 @@ def test_upsert_to_namespace_with_sparse_embedding_values(idx, namespace, use_no idx.upsert( vectors=[ Vector( - id="1", values=embedding_values(), sparse_values=SparseValues(indices=[0, 1], values=embedding_values()) - ), + id="1", + values=embedding_values(), + sparse_values=SparseValues(indices=[0, 1], values=embedding_values()), + ) ], namespace=target_namespace, ) diff --git a/tests/integration/data/test_upsert_errors.py b/tests/integration/data/test_upsert_errors.py index de90d4b6..54414fbd 100644 --- a/tests/integration/data/test_upsert_errors.py +++ b/tests/integration/data/test_upsert_errors.py @@ -11,24 +11,44 @@ def test_upsert_fails_when_api_key_invalid(self, index_name, index_host): with pytest.raises(PineconeException): from pinecone import Pinecone - pc = Pinecone(api_key=fake_api_key(), additional_headers={"sdk-test-suite": "pinecone-python-client"}) + pc = Pinecone( + api_key=fake_api_key(), + additional_headers={"sdk-test-suite": "pinecone-python-client"}, + ) idx = pc.Index(name=index_name, host=index_host) - idx.upsert(vectors=[Vector(id="1", values=embedding_values()), Vector(id="2", values=embedding_values())]) + idx.upsert( + vectors=[ + Vector(id="1", values=embedding_values()), + Vector(id="2", values=embedding_values()), + ] + ) - @pytest.mark.skipif(os.getenv("USE_GRPC") != "true", reason="Only test grpc client when grpc extras") + @pytest.mark.skipif( + os.getenv("USE_GRPC") != "true", reason="Only test grpc client when grpc extras" + ) def test_upsert_fails_when_api_key_invalid_grpc(self, index_name, index_host): with pytest.raises(PineconeException): from pinecone.grpc import PineconeGRPC pc = PineconeGRPC(api_key=fake_api_key()) idx = pc.Index(name=index_name, host=index_host) - idx.upsert(vectors=[Vector(id="1", values=embedding_values()), Vector(id="2", values=embedding_values())]) + idx.upsert( + vectors=[ + Vector(id="1", values=embedding_values()), + Vector(id="2", values=embedding_values()), + ] + ) class TestUpsertFailsWhenDimensionMismatch: def test_upsert_fails_when_dimension_mismatch_objects(self, idx): with pytest.raises(PineconeException): - idx.upsert(vectors=[Vector(id="1", values=embedding_values(2)), Vector(id="2", values=embedding_values(3))]) + idx.upsert( + vectors=[ + Vector(id="1", values=embedding_values(2)), + Vector(id="2", values=embedding_values(3)), + ] + ) def test_upsert_fails_when_dimension_mismatch_tuples(self, idx): with pytest.raises(PineconeException): @@ -36,19 +56,38 @@ def test_upsert_fails_when_dimension_mismatch_tuples(self, idx): def test_upsert_fails_when_dimension_mismatch_dicts(self, idx): with pytest.raises(PineconeException): - idx.upsert(vectors=[{"id": "1", "values": embedding_values(2)}, {"id": "2", "values": embedding_values(3)}]) + idx.upsert( + vectors=[ + {"id": "1", "values": embedding_values(2)}, + {"id": "2", "values": embedding_values(3)}, + ] + ) -@pytest.mark.skipif(os.getenv("METRIC") != "dotproduct", reason="Only metric=dotprodouct indexes support hybrid") +@pytest.mark.skipif( + os.getenv("METRIC") != "dotproduct", reason="Only metric=dotprodouct indexes support hybrid" +) class TestUpsertFailsSparseValuesDimensionMismatch: def test_upsert_fails_when_sparse_values_indices_values_mismatch_objects(self, idx): with pytest.raises(PineconeException): idx.upsert( - vectors=[Vector(id="1", values=[0.1, 0.1], sparse_values=SparseValues(indices=[0], values=[0.5, 0.5]))] + vectors=[ + Vector( + id="1", + values=[0.1, 0.1], + sparse_values=SparseValues(indices=[0], values=[0.5, 0.5]), + ) + ] ) with pytest.raises(PineconeException): idx.upsert( - vectors=[Vector(id="1", values=[0.1, 0.1], sparse_values=SparseValues(indices=[0, 1], values=[0.5]))] + vectors=[ + Vector( + id="1", + values=[0.1, 0.1], + sparse_values=SparseValues(indices=[0, 1], values=[0.5]), + ) + ] ) def test_upsert_fails_when_sparse_values_in_tuples(self, idx): @@ -64,12 +103,22 @@ def test_upsert_fails_when_sparse_values_indices_values_mismatch_dicts(self, idx with pytest.raises(PineconeException): idx.upsert( vectors=[ - {"id": "1", "values": [0.2, 0.2], "sparse_values": SparseValues(indices=[0], values=[0.5, 0.5])} + { + "id": "1", + "values": [0.2, 0.2], + "sparse_values": SparseValues(indices=[0], values=[0.5, 0.5]), + } ] ) with pytest.raises(PineconeException): idx.upsert( - vectors=[{"id": "1", "values": [0.1, 0.2], "sparse_values": SparseValues(indices=[0, 1], values=[0.5])}] + vectors=[ + { + "id": "1", + "values": [0.1, 0.2], + "sparse_values": SparseValues(indices=[0, 1], values=[0.5]), + } + ] ) @@ -123,7 +172,12 @@ def test_upsert_fails_when_vectors_missing(self, idx): class TestUpsertIdMissing: def test_upsert_fails_when_id_is_missing_objects(self, idx): with pytest.raises(TypeError): - idx.upsert(vectors=[Vector(id="1", values=embedding_values()), Vector(values=embedding_values())]) + idx.upsert( + vectors=[ + Vector(id="1", values=embedding_values()), + Vector(values=embedding_values()), + ] + ) def test_upsert_fails_when_id_is_missing_tuples(self, idx): with pytest.raises(ValueError): @@ -131,13 +185,20 @@ def test_upsert_fails_when_id_is_missing_tuples(self, idx): def test_upsert_fails_when_id_is_missing_dicts(self, idx): with pytest.raises(ValueError): - idx.upsert(vectors=[{"id": "1", "values": embedding_values()}, {"values": embedding_values()}]) + idx.upsert( + vectors=[{"id": "1", "values": embedding_values()}, {"values": embedding_values()}] + ) class TestUpsertIdWrongType: def test_upsert_fails_when_id_wrong_type_objects(self, idx): with pytest.raises(Exception): - idx.upsert(vectors=[Vector(id="1", values=embedding_values()), Vector(id=2, values=embedding_values())]) + idx.upsert( + vectors=[ + Vector(id="1", values=embedding_values()), + Vector(id=2, values=embedding_values()), + ] + ) def test_upsert_fails_when_id_wrong_type_tuples(self, idx): with pytest.raises(Exception): @@ -145,4 +206,9 @@ def test_upsert_fails_when_id_wrong_type_tuples(self, idx): def test_upsert_fails_when_id_wrong_type_dicts(self, idx): with pytest.raises(Exception): - idx.upsert(vectors=[{"id": "1", "values": embedding_values()}, {"id": 2, "values": embedding_values()}]) + idx.upsert( + vectors=[ + {"id": "1", "values": embedding_values()}, + {"id": 2, "values": embedding_values()}, + ] + ) diff --git a/tests/integration/data/test_weird_ids.py b/tests/integration/data/test_weird_ids.py index 6a91487e..6eefaf52 100644 --- a/tests/integration/data/test_weird_ids.py +++ b/tests/integration/data/test_weird_ids.py @@ -14,19 +14,21 @@ def test_fetch_weird_ids(self, idx, weird_ids_namespace): for id in ids_to_fetch: assert id in results.vectors assert results.vectors[id].id == id - assert results.vectors[id].metadata == None - assert results.vectors[id].values != None + assert results.vectors[id].metadata is None + assert results.vectors[id].values is not None assert len(results.vectors[id].values) == 2 @pytest.mark.parametrize("id_to_query", weird_valid_ids()) def test_query_weird_ids(self, idx, weird_ids_namespace, id_to_query): - results = idx.query(id=id_to_query, top_k=10, namespace=weird_ids_namespace, include_values=True) + results = idx.query( + id=id_to_query, top_k=10, namespace=weird_ids_namespace, include_values=True + ) assert results.usage["read_units"] > 0 assert len(results.matches) == 10 assert results.namespace == weird_ids_namespace - assert results.matches[0].id != None - assert results.matches[0].metadata == None - assert results.matches[0].values != None + assert results.matches[0].id is not None + assert results.matches[0].metadata is None + assert results.matches[0].values is not None assert len(results.matches[0].values) == 2 def test_list_weird_ids(self, idx, weird_ids_namespace): diff --git a/tests/integration/helpers/helpers.py b/tests/integration/helpers/helpers.py index f5b04b99..7f8ab09d 100644 --- a/tests/integration/helpers/helpers.py +++ b/tests/integration/helpers/helpers.py @@ -57,9 +57,14 @@ def poll_stats_for_namespace( total_time = 0 done = False while not done: - print(f'Waiting for namespace "{namespace}" to have vectors. Total time waited: {total_time} seconds') + print( + f'Waiting for namespace "{namespace}" to have vectors. Total time waited: {total_time} seconds' + ) stats = idx.describe_index_stats() - if namespace in stats.namespaces and stats.namespaces[namespace].vector_count >= expected_count: + if ( + namespace in stats.namespaces + and stats.namespaces[namespace].vector_count >= expected_count + ): done = True elif total_time > max_sleep: raise TimeoutError(f"Timed out waiting for namespace {namespace} to have vectors") diff --git a/tests/integration/proxy_config/test_proxy_settings.py b/tests/integration/proxy_config/test_proxy_settings.py index 91863acd..5da2238b 100644 --- a/tests/integration/proxy_config/test_proxy_settings.py +++ b/tests/integration/proxy_config/test_proxy_settings.py @@ -1,6 +1,5 @@ import os import pytest -from pinecone import Pinecone from urllib3 import make_headers from urllib3.exceptions import InsecureRequestWarning @@ -19,32 +18,26 @@ def exercise_all_apis(client, index_name): class TestProxyConfig: - @pytest.mark.skipif(os.getenv("USE_GRPC") != "false", reason="gRPC doesn't support 'https://' proxy URLs") + @pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="gRPC doesn't support 'https://' proxy URLs" + ) def test_https_proxy_with_self_signed_cert(self, client_cls, api_key, index_name, proxy1): ssl_ca_certs = os.path.join(proxy1["ssl_ca_certs"], "mitmproxy-ca-cert.pem") - pc = client_cls( - api_key=api_key, - proxy_url=PROXY1_URL_HTTPS, - ssl_ca_certs=ssl_ca_certs, - ) + pc = client_cls(api_key=api_key, proxy_url=PROXY1_URL_HTTPS, ssl_ca_certs=ssl_ca_certs) exercise_all_apis(pc, index_name) def test_http_proxy_with_self_signed_cert(self, client_cls, api_key, index_name, proxy1): ssl_ca_certs = os.path.join(proxy1["ssl_ca_certs"], "mitmproxy-ca-cert.pem") - pc = client_cls( - api_key=api_key, - proxy_url=PROXY1_URL_HTTP, - ssl_ca_certs=ssl_ca_certs, - ) + pc = client_cls(api_key=api_key, proxy_url=PROXY1_URL_HTTP, ssl_ca_certs=ssl_ca_certs) exercise_all_apis(pc, index_name) - @pytest.mark.skipif(os.getenv("USE_GRPC") != "false", reason="gRPC doesn't support disabling ssl_verify") - def test_proxy_with_ssl_verification_disabled_emits_warning(self, client_cls, api_key, index_name): - pc = client_cls( - api_key=api_key, - proxy_url=PROXY1_URL_HTTPS, - ssl_verify=False, - ) + @pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="gRPC doesn't support disabling ssl_verify" + ) + def test_proxy_with_ssl_verification_disabled_emits_warning( + self, client_cls, api_key, index_name + ): + pc = client_cls(api_key=api_key, proxy_url=PROXY1_URL_HTTPS, ssl_verify=False) with pytest.warns(InsecureRequestWarning): pc.list_indexes() @@ -52,9 +45,7 @@ def test_proxy_with_ssl_verification_disabled_emits_warning(self, client_cls, ap def test_proxy_with_incorrect_cert_path(self, client_cls, api_key): with pytest.raises(Exception) as e: pc = client_cls( - api_key=api_key, - proxy_url=PROXY1_URL_HTTPS, - ssl_ca_certs="~/incorrect/path", + api_key=api_key, proxy_url=PROXY1_URL_HTTPS, ssl_ca_certs="~/incorrect/path" ) pc.list_indexes() @@ -63,11 +54,7 @@ def test_proxy_with_incorrect_cert_path(self, client_cls, api_key): def test_proxy_with_valid_path_to_incorrect_cert(self, client_cls, api_key, proxy2): ssl_ca_certs = os.path.join(proxy2["ssl_ca_certs"], "mitmproxy-ca-cert.pem") with pytest.raises(Exception) as e: - pc = client_cls( - api_key=api_key, - proxy_url=PROXY1_URL_HTTPS, - ssl_ca_certs=ssl_ca_certs, - ) + pc = client_cls(api_key=api_key, proxy_url=PROXY1_URL_HTTPS, ssl_ca_certs=ssl_ca_certs) pc.list_indexes() assert "CERTIFICATE_VERIFY_FAILED" in str(e.value) diff --git a/tests/integration/test_upsert.py b/tests/integration/test_upsert.py index 0acd3d09..b3594160 100644 --- a/tests/integration/test_upsert.py +++ b/tests/integration/test_upsert.py @@ -10,7 +10,12 @@ def test_upsert_sanity(self, client, ready_sl_index, random_vector): idx.upsert(vectors=[("1", random_vector), ("2", random_vector), ("3", random_vector)]) # Tuples with metadata - idx.upsert(vectors=[("4", random_vector, {"key": "value"}), ("5", random_vector, {"key": "value2"})]) + idx.upsert( + vectors=[ + ("4", random_vector, {"key": "value"}), + ("5", random_vector, {"key": "value2"}), + ] + ) # Vector objects idx.upsert(vectors=[Vector(id="6", values=random_vector)]) @@ -33,7 +38,9 @@ def test_upsert_sparse_vectors(self, client, random_vector, create_sl_index_para idx.upsert( vectors=[ Vector( - id="1", values=random_vector, sparse_values=SparseValues(values=[0.1, 0.2, 0.3], indices=[1, 2, 3]) + id="1", + values=random_vector, + sparse_values=SparseValues(values=[0.1, 0.2, 0.3], indices=[1, 2, 3]), ) ] ) diff --git a/tests/unit/data/test_bulk_import.py b/tests/unit/data/test_bulk_import.py index ece0e615..45484e14 100644 --- a/tests/unit/data/test_bulk_import.py +++ b/tests/unit/data/test_bulk_import.py @@ -3,7 +3,7 @@ from urllib3 import BaseHTTPResponse from pinecone.core_ea.openapi.db_data.api.bulk_operations_api import BulkOperationsApi -from pinecone.core_ea.openapi.db_data.models import ImportModel, StartImportResponse +from pinecone.core_ea.openapi.db_data.models import StartImportResponse from pinecone.core_ea.openapi.shared.api_client import ApiClient from pinecone.core_ea.openapi.shared.exceptions import PineconeApiException @@ -17,13 +17,17 @@ def build_api_w_faked_response(mocker, body: str, status: int = 200) -> BaseHTTP response.data = body.encode("utf-8") api_client = ApiClient() - mock_request = mocker.patch.object(api_client.rest_client.pool_manager, "request", return_value=response) + mock_request = mocker.patch.object( + api_client.rest_client.pool_manager, "request", return_value=response + ) return BulkOperationsApi(api_client=api_client), mock_request def build_client_w_faked_response(mocker, body: str, status: int = 200): api_client, mock_req = build_api_w_faked_response(mocker, body, status) - return ImportFeatureMixin(__import_operations_api=api_client, api_key="asdf", host="asdf"), mock_req + return ImportFeatureMixin( + __import_operations_api=api_client, api_key="asdf", host="asdf" + ), mock_req class TestBulkImportStartImport: @@ -54,7 +58,9 @@ def test_start_import_with_kwargs(self, mocker): """ client, mock_req = build_client_w_faked_response(mocker, body) - my_import = client.start_import(uri="s3://path/to/file.parquet", integration_id="123-456-789") + my_import = client.start_import( + uri="s3://path/to/file.parquet", integration_id="123-456-789" + ) assert my_import.id == "1" assert my_import["id"] == "1" assert my_import.to_dict() == {"id": "1"} @@ -68,13 +74,7 @@ def test_start_import_with_kwargs(self, mocker): ) @pytest.mark.parametrize( - "error_mode_input", - [ - ImportErrorMode.CONTINUE, - "Continue", - "continue", - "cONTINUE", - ], + "error_mode_input", [ImportErrorMode.CONTINUE, "Continue", "continue", "cONTINUE"] ) def test_start_import_with_explicit_error_mode(self, mocker, error_mode_input): body = """ @@ -84,9 +84,12 @@ def test_start_import_with_explicit_error_mode(self, mocker, error_mode_input): """ client, mock_req = build_client_w_faked_response(mocker, body) - my_import = client.start_import(uri="s3://path/to/file.parquet", error_mode=error_mode_input) + client.start_import(uri="s3://path/to/file.parquet", error_mode=error_mode_input) _, call_kwargs = mock_req.call_args - assert call_kwargs["body"] == '{"uri": "s3://path/to/file.parquet", "errorMode": {"onError": "continue"}}' + assert ( + call_kwargs["body"] + == '{"uri": "s3://path/to/file.parquet", "errorMode": {"onError": "continue"}}' + ) def test_start_import_with_abort_error_mode(self, mocker): body = """ @@ -96,9 +99,12 @@ def test_start_import_with_abort_error_mode(self, mocker): """ client, mock_req = build_client_w_faked_response(mocker, body) - my_import = client.start_import(uri="s3://path/to/file.parquet", error_mode=ImportErrorMode.ABORT) + client.start_import(uri="s3://path/to/file.parquet", error_mode=ImportErrorMode.ABORT) _, call_kwargs = mock_req.call_args - assert call_kwargs["body"] == '{"uri": "s3://path/to/file.parquet", "errorMode": {"onError": "abort"}}' + assert ( + call_kwargs["body"] + == '{"uri": "s3://path/to/file.parquet", "errorMode": {"onError": "abort"}}' + ) def test_start_import_with_unknown_error_mode(self, mocker): body = """ @@ -109,7 +115,7 @@ def test_start_import_with_unknown_error_mode(self, mocker): client, mock_req = build_client_w_faked_response(mocker, body) with pytest.raises(ValueError) as e: - my_import = client.start_import(uri="s3://path/to/file.parquet", error_mode="unknown") + client.start_import(uri="s3://path/to/file.parquet", error_mode="unknown") assert "Invalid error_mode value: unknown" in str(e.value) @@ -124,11 +130,13 @@ def test_start_invalid_uri(self, mocker): client, mock_req = build_client_w_faked_response(mocker, body, 400) with pytest.raises(PineconeApiException) as e: - my_import = client.start_import(uri="invalid path") + client.start_import(uri="invalid path") assert e.value.status == 400 assert e.value.body == body - assert "Bulk import URIs must start with the scheme of a supported storage provider" in str(e.value) + assert "Bulk import URIs must start with the scheme of a supported storage provider" in str( + e.value + ) def test_no_arguments(self, mocker): client, mock_req = build_client_w_faked_response(mocker, "") diff --git a/tests/unit/data/test_datetime_parsing.py b/tests/unit/data/test_datetime_parsing.py index 82f2e6c2..43e67ae9 100644 --- a/tests/unit/data/test_datetime_parsing.py +++ b/tests/unit/data/test_datetime_parsing.py @@ -1,5 +1,4 @@ from pinecone import Vector, Config -from datetime import datetime class TestDatetimeConversion: @@ -12,7 +11,7 @@ def test_datetimes_not_coerced(self): _configuration=Config(), ) assert vec.metadata["created_at"] == "7th of January, 2023" - assert vec.metadata["created_at"].__class__ == str + assert isinstance(vec.metadata["created_at"], str) def test_dates_not_coerced(self): vec = Vector( @@ -23,4 +22,4 @@ def test_dates_not_coerced(self): _configuration=Config(), ) assert vec.metadata["created_at"] == "8/12/2024" - assert vec.metadata["created_at"].__class__ == str + assert isinstance(vec.metadata["created_at"], str) diff --git a/tests/unit/data/test_import_datetime_parsing.py b/tests/unit/data/test_import_datetime_parsing.py index cf6213f9..b2af7a0d 100644 --- a/tests/unit/data/test_import_datetime_parsing.py +++ b/tests/unit/data/test_import_datetime_parsing.py @@ -1,12 +1,9 @@ -import pytest - from urllib3 import BaseHTTPResponse, HTTPResponse from datetime import datetime, date from pinecone.core_ea.openapi.db_data.api.bulk_operations_api import BulkOperationsApi -from pinecone.core_ea.openapi.db_data.model.import_model import ImportModel -from pinecone.core_ea.openapi.shared.api_client import ApiClient, Endpoint as _Endpoint +from pinecone.core_ea.openapi.shared.api_client import ApiClient from pinecone.core_ea.openapi.shared.rest import RESTResponse diff --git a/tests/unit/data/test_vector_factory.py b/tests/unit/data/test_vector_factory.py index f8f4e09f..e7b7199d 100644 --- a/tests/unit/data/test_vector_factory.py +++ b/tests/unit/data/test_vector_factory.py @@ -12,14 +12,18 @@ def test_build_when_returns_vector_unmodified(self): assert VectorFactory.build(vec) == vec assert VectorFactory.build(vec).__class__ == Vector - @pytest.mark.parametrize("values_array", [[0.1, 0.2, 0.3], np.array([0.1, 0.2, 0.3]), pd.array([0.1, 0.2, 0.3])]) + @pytest.mark.parametrize( + "values_array", [[0.1, 0.2, 0.3], np.array([0.1, 0.2, 0.3]), pd.array([0.1, 0.2, 0.3])] + ) def test_build_when_tuple_with_two_values(self, values_array): tup = ("1", values_array) actual = VectorFactory.build(tup) expected = Vector(id="1", values=[0.1, 0.2, 0.3], metadata={}) assert actual == expected - @pytest.mark.parametrize("values_array", [[0.1, 0.2, 0.3], np.array([0.1, 0.2, 0.3]), pd.array([0.1, 0.2, 0.3])]) + @pytest.mark.parametrize( + "values_array", [[0.1, 0.2, 0.3], np.array([0.1, 0.2, 0.3]), pd.array([0.1, 0.2, 0.3])] + ) def test_build_when_tuple_with_three_values(self, values_array): tup = ("1", values_array, {"genre": "comedy"}) actual = VectorFactory.build(tup) @@ -39,8 +43,7 @@ def test_build_when_tuple_with_three_values(self, values_array): ) def test_build_when_tuple_values_must_be_list(self, vector_tup): with pytest.raises( - ListConversionException, - match="Expected a list or list-like data structure", + ListConversionException, match="Expected a list or list-like data structure" ): VectorFactory.build(vector_tup) @@ -54,7 +57,9 @@ def test_build_when_tuple_too_short(self): tup = ("1",) VectorFactory.build(tup) - @pytest.mark.parametrize("values_array", [[0.1, 0.2, 0.3], np.array([0.1, 0.2, 0.3]), pd.array([0.1, 0.2, 0.3])]) + @pytest.mark.parametrize( + "values_array", [[0.1, 0.2, 0.3], np.array([0.1, 0.2, 0.3]), pd.array([0.1, 0.2, 0.3])] + ) def test_build_when_dict(self, values_array): d = {"id": "1", "values": values_array, "metadata": {"genre": "comedy"}} actual = VectorFactory.build(d) @@ -68,7 +73,12 @@ def test_build_when_dict_missing_required_fields(self): def test_build_when_dict_excess_keys(self): with pytest.raises(ValueError, match="Found excess keys in the vector dictionary"): - d = {"id": "1", "values": [0.1, 0.2, 0.3], "metadata": {"genre": "comedy"}, "extra": "field"} + d = { + "id": "1", + "values": [0.1, 0.2, 0.3], + "metadata": {"genre": "comedy"}, + "extra": "field", + } VectorFactory.build(d) def test_build_when_dict_sparse_values(self): @@ -104,12 +114,21 @@ def test_build_when_dict_sparse_values_when_SparseValues(self): assert actual == expected def test_build_when_dict_sparse_values_errors_when_not_dict(self): - with pytest.raises(ValueError, match="Column `sparse_values` is expected to be a dictionary"): - d = {"id": "1", "values": [0.1, 0.2, 0.3], "metadata": {"genre": "comedy"}, "sparse_values": "not a dict"} + with pytest.raises( + ValueError, match="Column `sparse_values` is expected to be a dictionary" + ): + d = { + "id": "1", + "values": [0.1, 0.2, 0.3], + "metadata": {"genre": "comedy"}, + "sparse_values": "not a dict", + } VectorFactory.build(d) def test_build_when_dict_sparse_values_errors_when_missing_indices(self): - with pytest.raises(ValueError, match="Missing required keys in data in column `sparse_values`"): + with pytest.raises( + ValueError, match="Missing required keys in data in column `sparse_values`" + ): d = { "id": "1", "values": [0.1, 0.2, 0.3], @@ -119,7 +138,9 @@ def test_build_when_dict_sparse_values_errors_when_missing_indices(self): VectorFactory.build(d) def test_build_when_dict_sparse_values_errors_when_missing_values(self): - with pytest.raises(ValueError, match="Missing required keys in data in column `sparse_values`"): + with pytest.raises( + ValueError, match="Missing required keys in data in column `sparse_values`" + ): d = { "id": "1", "values": [0.1, 0.2, 0.3], @@ -217,7 +238,12 @@ def test_build_when_errors_when_other_type(self): VectorFactory.build(1) def test_build_when_sparse_values_is_None(self): - d = {"id": "1", "values": [0.1, 0.2, 0.3], "metadata": {"genre": "comedy"}, "sparse_values": None} + d = { + "id": "1", + "values": [0.1, 0.2, 0.3], + "metadata": {"genre": "comedy"}, + "sparse_values": None, + } actual = VectorFactory.build(d) expected = Vector(id="1", values=[0.1, 0.2, 0.3], metadata={"genre": "comedy"}) assert actual == expected diff --git a/tests/unit/models/test_collection_list.py b/tests/unit/models/test_collection_list.py index f62de248..12eaa775 100644 --- a/tests/unit/models/test_collection_list.py +++ b/tests/unit/models/test_collection_list.py @@ -1,6 +1,9 @@ import pytest from pinecone import CollectionList -from pinecone.core.openapi.control.models import CollectionList as OpenApiCollectionList, CollectionModel +from pinecone.core.openapi.control.models import ( + CollectionList as OpenApiCollectionList, + CollectionModel, +) @pytest.fixture @@ -23,7 +26,7 @@ def collection_list_response(): record_count=2000, environment="us-west1-gcp", ), - ], + ] ) @@ -44,7 +47,8 @@ def test_collection_list_getitem(self, collection_list_response): def test_collection_list_proxies_methods(self, collection_list_response): # Forward compatibility, in case we add more attributes to IndexList for pagination assert ( - CollectionList(collection_list_response).collection_list.collections == collection_list_response.collections + CollectionList(collection_list_response).collection_list.collections + == collection_list_response.collections ) def test_when_results_are_empty(self): diff --git a/tests/unit/models/test_index_list.py b/tests/unit/models/test_index_list.py index 44d36828..c44e3c57 100644 --- a/tests/unit/models/test_index_list.py +++ b/tests/unit/models/test_index_list.py @@ -22,7 +22,9 @@ def index_list_response(): status=IndexModelStatus(ready=True, state="Ready"), deletion_protection=DeletionProtection("enabled"), spec=IndexModelSpec( - pod=OpenApiPodSpec(environment="us-west1-gcp", pod_type="p1.x1", pods=1, replicas=1, shards=1) + pod=OpenApiPodSpec( + environment="us-west1-gcp", pod_type="p1.x1", pods=1, replicas=1, shards=1 + ) ), ), OpenApiIndexModel( @@ -33,7 +35,9 @@ def index_list_response(): status=IndexModelStatus(ready=True, state="Ready"), deletion_protection=DeletionProtection("disabled"), spec=IndexModelSpec( - pod=OpenApiPodSpec(environment="us-west1-gcp", pod_type="p1.x1", pods=1, replicas=1, shards=1) + pod=OpenApiPodSpec( + environment="us-west1-gcp", pod_type="p1.x1", pods=1, replicas=1, shards=1 + ) ), ), ], diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 9ef14dfb..8ab7acbe 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -25,7 +25,9 @@ def run_before_and_after_tests(tmpdir): ] for var in known_env_vars: if os.getenv(var): - raise ValueError(f"Unexpected env var {var} found in environment. Check for test pollution.") + raise ValueError( + f"Unexpected env var {var} found in environment. Check for test pollution." + ) yield # this is where the testing happens @@ -62,7 +64,10 @@ def test_init_with_kwargs(self): openapi_config = OpenApiConfiguration() config = PineconeConfig.build( - api_key=api_key, host=controller_host, ssl_ca_certs=ssl_ca_cert, openapi_config=openapi_config + api_key=api_key, + host=controller_host, + ssl_ca_certs=ssl_ca_cert, + openapi_config=openapi_config, ) assert config.api_key == api_key @@ -82,7 +87,9 @@ def test_resolution_order_kwargs_over_env_vars(self): controller_host = "kwargs-controller-host" additional_headers = {"header": "value2"} - config = PineconeConfig.build(api_key=api_key, host=controller_host, additional_headers=additional_headers) + config = PineconeConfig.build( + api_key=api_key, host=controller_host, additional_headers=additional_headers + ) assert config.api_key == api_key assert config.host == "https://" + controller_host diff --git a/tests/unit/test_config_builder.py b/tests/unit/test_config_builder.py index 0e839960..b3cf1c2a 100644 --- a/tests/unit/test_config_builder.py +++ b/tests/unit/test_config_builder.py @@ -14,7 +14,9 @@ def test_build_simple(self): def test_build_merges_key_and_host_when_openapi_config_provided(self): config = ConfigBuilder.build( - api_key="my-api-key", host="https://my-controller-host", openapi_config=OpenApiConfiguration() + api_key="my-api-key", + host="https://my-controller-host", + openapi_config=OpenApiConfiguration(), ) assert config.api_key == "my-api-key" assert config.host == "https://my-controller-host" @@ -22,9 +24,7 @@ def test_build_merges_key_and_host_when_openapi_config_provided(self): def test_build_with_source_tag(self): config = ConfigBuilder.build( - api_key="my-api-key", - host="https://my-controller-host", - source_tag="my-source-tag", + api_key="my-api-key", host="https://my-controller-host", source_tag="my-source-tag" ) assert config.api_key == "my-api-key" assert config.host == "https://my-controller-host" @@ -61,7 +61,9 @@ def test_build_openapi_config_merges_with_existing_config(self): assert openapi_config.proxy == "http://my-proxy:8080" def test_build_openapi_config_does_not_mutate_input(self): - config = ConfigBuilder.build(api_key="my-api-key", host="foo", ssl_ca_certs="path/to/bundle.foo") + config = ConfigBuilder.build( + api_key="my-api-key", host="foo", ssl_ca_certs="path/to/bundle.foo" + ) input_openapi_config = OpenApiConfiguration() input_openapi_config.host = "bar" diff --git a/tests/unit/test_control.py b/tests/unit/test_control.py index ef45214a..2fb9e6da 100644 --- a/tests/unit/test_control.py +++ b/tests/unit/test_control.py @@ -4,7 +4,6 @@ from pinecone import ConfigBuilder, Pinecone, PodSpec, ServerlessSpec from pinecone.core.openapi.control.models import IndexList, IndexModel, DeletionProtection from pinecone.core.openapi.control.api.manage_indexes_api import ManageIndexesApi -from pinecone.core.openapi.shared.configuration import Configuration as OpenApiConfiguration import time @@ -50,13 +49,15 @@ def index_list_response(): class TestControl: def test_plugins_are_installed(self): with patch("pinecone.control.pinecone.install_plugins") as mock_install_plugins: - p = Pinecone(api_key="asdf") + Pinecone(api_key="asdf") mock_install_plugins.assert_called_once() def test_bad_plugin_doesnt_break_sdk(self): - with patch("pinecone.control.pinecone.install_plugins", side_effect=Exception("bad plugin")): + with patch( + "pinecone.control.pinecone.install_plugins", side_effect=Exception("bad plugin") + ): try: - p = Pinecone(api_key="asdf") + Pinecone(api_key="asdf") except Exception as e: assert False, f"Unexpected exception: {e}" @@ -91,10 +92,14 @@ def test_overwrite_useragent(self): def test_set_source_tag_in_useragent(self): p = Pinecone(api_key="123-456-789", source_tag="test_source_tag") - assert re.search(r"source_tag=test_source_tag", p.index_api.api_client.user_agent) is not None + assert ( + re.search(r"source_tag=test_source_tag", p.index_api.api_client.user_agent) is not None + ) def test_set_source_tag_in_useragent_via_config(self): - config = ConfigBuilder.build(api_key="YOUR_API_KEY", host="https://my-host", source_tag="my_source_tag") + config = ConfigBuilder.build( + api_key="YOUR_API_KEY", host="https://my-host", source_tag="my_source_tag" + ) p = Pinecone(config=config) assert re.search(r"source_tag=my_source_tag", p.index_api.api_client.user_agent) is not None @@ -104,13 +109,27 @@ def test_set_source_tag_in_useragent_via_config(self): # When timeout=None, describe_index is called until ready (None, [{"status": {"ready": False}}, {"status": {"ready": True}}], 2, 1), # Timeout of 10 seconds, describe_index called 3 times, sleep twice - (10, [{"status": {"ready": False}}, {"status": {"ready": False}}, {"status": {"ready": True}}], 3, 2), + ( + 10, + [ + {"status": {"ready": False}}, + {"status": {"ready": False}}, + {"status": {"ready": True}}, + ], + 3, + 2, + ), # When timeout=-1, create_index returns immediately without calling describe_index or sleep (-1, [{"status": {"ready": False}}], 0, 0), ], ) def test_create_index_with_timeout( - self, mocker, timeout_value, describe_index_responses, expected_describe_index_calls, expected_sleep_calls + self, + mocker, + timeout_value, + describe_index_responses, + expected_describe_index_calls, + expected_sleep_calls, ): p = Pinecone(api_key="123-456-789") mocker.patch.object(p.index_api, "describe_index", side_effect=describe_index_responses) @@ -118,7 +137,10 @@ def test_create_index_with_timeout( mocker.patch("time.sleep") p.create_index( - name="my-index", dimension=10, spec=ServerlessSpec(cloud="aws", region="us-west1"), timeout=timeout_value + name="my-index", + dimension=10, + spec=ServerlessSpec(cloud="aws", region="us-west1"), + timeout=timeout_value, ) assert p.index_api.create_index.call_count == 1 @@ -172,13 +194,27 @@ def test_create_index_with_spec_dictionary(self, mocker, index_spec): # When timeout=None, describe_index is called until ready (None, [{"status": {"ready": False}}, {"status": {"ready": True}}], 2, 1), # Timeout of 10 seconds, describe_index called 3 times, sleep twice - (10, [{"status": {"ready": False}}, {"status": {"ready": False}}, {"status": {"ready": True}}], 3, 2), + ( + 10, + [ + {"status": {"ready": False}}, + {"status": {"ready": False}}, + {"status": {"ready": True}}, + ], + 3, + 2, + ), # When timeout=-1, create_index returns immediately without calling describe_index or sleep (-1, [{"status": {"ready": False}}], 0, 0), ], ) def test_create_index_from_source_collection( - self, mocker, timeout_value, describe_index_responses, expected_describe_index_calls, expected_sleep_calls + self, + mocker, + timeout_value, + describe_index_responses, + expected_describe_index_calls, + expected_sleep_calls, ): p = Pinecone(api_key="123-456-789") mocker.patch.object(p.index_api, "describe_index", side_effect=describe_index_responses) @@ -205,7 +241,9 @@ def test_create_index_when_timeout_exceeded(self, mocker): mocker.patch.object(p.index_api, "describe_index", side_effect=describe_index_response) mocker.patch("time.sleep") - p.create_index(name="my-index", dimension=10, timeout=10, spec=PodSpec(environment="us-west1-gcp")) + p.create_index( + name="my-index", dimension=10, timeout=10, spec=PodSpec(environment="us-west1-gcp") + ) def test_list_indexes_returns_iterable(self, mocker, index_list_response): p = Pinecone(api_key="123-456-789") diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 4bae22ff..e60c89c8 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,11 +1,16 @@ import pandas as pd -import numpy as np import pytest import pinecone from pinecone import Index from pinecone import UpsertRequest, Vector -from pinecone import DescribeIndexStatsRequest, ScoredVector, QueryResponse, UpsertResponse, SparseValues +from pinecone import ( + DescribeIndexStatsRequest, + ScoredVector, + QueryResponse, + UpsertResponse, + SparseValues, +) class TestRestIndex: @@ -21,10 +26,7 @@ def setup_method(self): self.filter2 = {"year": {"$eq": 2020}} self.svi1 = [1, 3, 5] self.svv1 = [0.1, 0.2, 0.3] - self.sv1 = { - "indices": self.svi1, - "values": self.svv1, - } + self.sv1 = {"indices": self.svi1, "values": self.svv1} self.svi2 = [2, 4, 6] self.svv2 = [0.1, 0.2, 0.3] self.sv2 = {"indices": self.svi2, "values": self.svv2} @@ -77,9 +79,13 @@ def test_upsert_dictOfIdVecMD_UpsertVectorsWithMD(self, mocker): def test_upsert_dictOfIdVecMD_UpsertVectorsWithoutMD(self, mocker): mocker.patch.object(self.index._vector_api, "upsert", autospec=True) - self.index.upsert([{"id": self.id1, "values": self.vals1}, {"id": self.id2, "values": self.vals2}]) + self.index.upsert( + [{"id": self.id1, "values": self.vals1}, {"id": self.id2, "values": self.vals2}] + ) self.index._vector_api.upsert.assert_called_once_with( - UpsertRequest(vectors=[Vector(id="vec1", values=self.vals1), Vector(id="vec2", values=self.vals2)]) + UpsertRequest( + vectors=[Vector(id="vec1", values=self.vals1), Vector(id="vec2", values=self.vals2)] + ) ) def test_upsert_dictOfIdVecMD_UpsertVectorsWithSparseValues(self, mocker): @@ -128,16 +134,15 @@ def test_upsert_parallelUpsert_callUpsertParallel(self, mocker): # Send requests in parallel async_results = [ - index.upsert(vectors=ids_vectors_chunk, namespace="ns", async_req=True) for ids_vectors_chunk in chunks + index.upsert(vectors=ids_vectors_chunk, namespace="ns", async_req=True) + for ids_vectors_chunk in chunks ] # Wait for and retrieve responses (this raises in case of error) [async_result.get() for async_result in async_results] index._vector_api.upsert.assert_any_call( UpsertRequest( - vectors=[ - Vector(id="vec1", values=self.vals1, metadata=self.md1), - ], + vectors=[Vector(id="vec1", values=self.vals1, metadata=self.md1)], namespace="ns", ), async_req=True, @@ -145,9 +150,7 @@ def test_upsert_parallelUpsert_callUpsertParallel(self, mocker): index._vector_api.upsert.assert_any_call( UpsertRequest( - vectors=[ - Vector(id="vec2", values=self.vals2, metadata=self.md2), - ], + vectors=[Vector(id="vec2", values=self.vals2, metadata=self.md2)], namespace="ns", ), async_req=True, @@ -158,7 +161,9 @@ def test_upsert_vectorListIsMultiplyOfBatchSize_vectorsUpsertedInBatches(self, m self.index._vector_api, "upsert", autospec=True, - side_effect=lambda upsert_request: UpsertResponse(upserted_count=len(upsert_request.vectors)), + side_effect=lambda upsert_request: UpsertResponse( + upserted_count=len(upsert_request.vectors) + ), ) result = self.index.upsert( @@ -173,19 +178,13 @@ def test_upsert_vectorListIsMultiplyOfBatchSize_vectorsUpsertedInBatches(self, m self.index._vector_api.upsert.assert_any_call( UpsertRequest( - vectors=[ - Vector(id="vec1", values=self.vals1, metadata=self.md1), - ], - namespace="ns", + vectors=[Vector(id="vec1", values=self.vals1, metadata=self.md1)], namespace="ns" ) ) self.index._vector_api.upsert.assert_any_call( UpsertRequest( - vectors=[ - Vector(id="vec2", values=self.vals2, metadata=self.md2), - ], - namespace="ns", + vectors=[Vector(id="vec2", values=self.vals2, metadata=self.md2)], namespace="ns" ) ) @@ -196,7 +195,9 @@ def test_upsert_vectorListNotMultiplyOfBatchSize_vectorsUpsertedInBatches(self, self.index._vector_api, "upsert", autospec=True, - side_effect=lambda upsert_request: UpsertResponse(upserted_count=len(upsert_request.vectors)), + side_effect=lambda upsert_request: UpsertResponse( + upserted_count=len(upsert_request.vectors) + ), ) result = self.index.upsert( @@ -221,10 +222,7 @@ def test_upsert_vectorListNotMultiplyOfBatchSize_vectorsUpsertedInBatches(self, self.index._vector_api.upsert.assert_any_call( UpsertRequest( - vectors=[ - Vector(id="vec3", values=self.vals1, metadata=self.md1), - ], - namespace="ns", + vectors=[Vector(id="vec3", values=self.vals1, metadata=self.md1)], namespace="ns" ) ) @@ -235,7 +233,9 @@ def test_upsert_vectorListSmallerThanBatchSize_vectorsUpsertedInBatches(self, mo self.index._vector_api, "upsert", autospec=True, - side_effect=lambda upsert_request: UpsertResponse(upserted_count=len(upsert_request.vectors)), + side_effect=lambda upsert_request: UpsertResponse( + upserted_count=len(upsert_request.vectors) + ), ) result = self.index.upsert( @@ -266,11 +266,17 @@ def test_upsert_tuplesList_vectorsUpsertedInBatches(self, mocker): self.index._vector_api, "upsert", autospec=True, - side_effect=lambda upsert_request: UpsertResponse(upserted_count=len(upsert_request.vectors)), + side_effect=lambda upsert_request: UpsertResponse( + upserted_count=len(upsert_request.vectors) + ), ) result = self.index.upsert( - vectors=[("vec1", self.vals1, self.md1), ("vec2", self.vals2, self.md2), ("vec3", self.vals1, self.md1)], + vectors=[ + ("vec1", self.vals1, self.md1), + ("vec2", self.vals2, self.md2), + ("vec3", self.vals1, self.md1), + ], namespace="ns", batch_size=2, ) @@ -287,10 +293,7 @@ def test_upsert_tuplesList_vectorsUpsertedInBatches(self, mocker): self.index._vector_api.upsert.assert_any_call( UpsertRequest( - vectors=[ - Vector(id="vec3", values=self.vals1, metadata=self.md1), - ], - namespace="ns", + vectors=[Vector(id="vec3", values=self.vals1, metadata=self.md1)], namespace="ns" ) ) @@ -298,7 +301,10 @@ def test_upsert_tuplesList_vectorsUpsertedInBatches(self, mocker): def test_upsert_dataframe(self, mocker): mocker.patch.object( - self.index._vector_api, "upsert", autospec=True, return_value=UpsertResponse(upserted_count=2) + self.index._vector_api, + "upsert", + autospec=True, + return_value=UpsertResponse(upserted_count=2), ) df = pd.DataFrame( [ @@ -320,7 +326,9 @@ def test_upsert_dataframe(self, mocker): def test_upsert_batchSizeIsNotPositive_errorIsRaised(self): with pytest.raises(ValueError, match="batch_size must be a positive integer"): self.index.upsert( - vectors=[Vector(id="vec1", values=self.vals1, metadata=self.md1)], namespace="ns", batch_size=0 + vectors=[Vector(id="vec1", values=self.vals1, metadata=self.md1)], + namespace="ns", + batch_size=0, ) with pytest.raises(ValueError, match="batch_size must be a positive integer"): @@ -331,7 +339,9 @@ def test_upsert_batchSizeIsNotPositive_errorIsRaised(self): ) def test_upsert_useBatchSizeAndAsyncReq_valueErrorRaised(self): - with pytest.raises(ValueError, match="async_req is not supported when batch_size is provided."): + with pytest.raises( + ValueError, match="async_req is not supported when batch_size is provided." + ): self.index.upsert( vectors=[Vector(id="vec1", values=self.vals1, metadata=self.md1)], namespace="ns", @@ -345,15 +355,20 @@ def test_upsert_useBatchSizeAndAsyncReq_valueErrorRaised(self): def test_query_byVectorNoFilter_queryVectorNoFilter(self, mocker): response = QueryResponse( - results=[], matches=[ScoredVector(id="1", score=0.9, values=[0.0], metadata={"a": 2})], namespace="test" + results=[], + matches=[ScoredVector(id="1", score=0.9, values=[0.0], metadata={"a": 2})], + namespace="test", ) mocker.patch.object(self.index._vector_api, "query", autospec=True, return_value=response) actual = self.index.query(top_k=10, vector=self.vals1) - self.index._vector_api.query.assert_called_once_with(pinecone.QueryRequest(top_k=10, vector=self.vals1)) + self.index._vector_api.query.assert_called_once_with( + pinecone.QueryRequest(top_k=10, vector=self.vals1) + ) expected = QueryResponse( - matches=[ScoredVector(id="1", score=0.9, values=[0.0], metadata={"a": 2})], namespace="test" + matches=[ScoredVector(id="1", score=0.9, values=[0.0], metadata={"a": 2})], + namespace="test", ) assert expected.to_dict() == actual.to_dict() @@ -390,7 +405,9 @@ def test_query_with_positional_args(self, mocker): def test_delete_byIds_deleteByIds(self, mocker): mocker.patch.object(self.index._vector_api, "delete", autospec=True) self.index.delete(ids=["vec1", "vec2"]) - self.index._vector_api.delete.assert_called_once_with(pinecone.DeleteRequest(ids=["vec1", "vec2"])) + self.index._vector_api.delete.assert_called_once_with( + pinecone.DeleteRequest(ids=["vec1", "vec2"]) + ) def test_delete_deleteAllByFilter_deleteAllByFilter(self, mocker): mocker.patch.object(self.index._vector_api, "delete", autospec=True) @@ -402,7 +419,9 @@ def test_delete_deleteAllByFilter_deleteAllByFilter(self, mocker): def test_delete_deleteAllNoFilter_deleteNoFilter(self, mocker): mocker.patch.object(self.index._vector_api, "delete", autospec=True) self.index.delete(delete_all=True) - self.index._vector_api.delete.assert_called_once_with(pinecone.DeleteRequest(delete_all=True)) + self.index._vector_api.delete.assert_called_once_with( + pinecone.DeleteRequest(delete_all=True) + ) # endregion @@ -443,7 +462,9 @@ def test_update_byIdAnValuesAndMetadata_updateByIdAndValuesAndMetadata(self, moc def test_describeIndexStats_callWithoutFilter_CalledWithoutFilter(self, mocker): mocker.patch.object(self.index._vector_api, "describe_index_stats", autospec=True) self.index.describe_index_stats() - self.index._vector_api.describe_index_stats.assert_called_once_with(DescribeIndexStatsRequest()) + self.index._vector_api.describe_index_stats.assert_called_once_with( + DescribeIndexStatsRequest() + ) def test_describeIndexStats_callWithFilter_CalledWithFilter(self, mocker): mocker.patch.object(self.index._vector_api, "describe_index_stats", autospec=True) diff --git a/tests/unit/test_index_initialization.py b/tests/unit/test_index_initialization.py index d477decb..8a06c4dd 100644 --- a/tests/unit/test_index_initialization.py +++ b/tests/unit/test_index_initialization.py @@ -24,7 +24,11 @@ def test_additional_headers_one_additional(self): def test_multiple_additional_headers(self): pc = Pinecone(api_key="YOUR_API_KEY") index = pc.Index( - host="myhost", additional_headers={"test-header": "test-header-value", "test-header2": "test-header-value2"} + host="myhost", + additional_headers={ + "test-header": "test-header-value", + "test-header2": "test-header-value2", + }, ) assert "test-header" in index._vector_api.api_client.default_headers assert "test-header2" in index._vector_api.api_client.default_headers @@ -42,11 +46,15 @@ def test_overwrite_useragent(self): def test_set_source_tag(self): pc = Pinecone(api_key="123-456-789", source_tag="test_source_tag") - index = pc.Index(host="myhost") - assert re.search(r"source_tag=test_source_tag", pc.index_api.api_client.user_agent) is not None + assert ( + re.search(r"source_tag=test_source_tag", pc.index_api.api_client.user_agent) is not None + ) def test_set_source_tag_via_config(self): - config = ConfigBuilder.build(api_key="YOUR_API_KEY", host="https://my-host", source_tag="my_source_tag") + config = ConfigBuilder.build( + api_key="YOUR_API_KEY", host="https://my-host", source_tag="my_source_tag" + ) pc = Pinecone(config=config) - index = pc.Index(host="myhost") - assert re.search(r"source_tag=my_source_tag", pc.index_api.api_client.user_agent) is not None + assert ( + re.search(r"source_tag=my_source_tag", pc.index_api.api_client.user_agent) is not None + ) diff --git a/tests/unit/utils/test_convert_to_list.py b/tests/unit/utils/test_convert_to_list.py index 6699493f..69580fd0 100644 --- a/tests/unit/utils/test_convert_to_list.py +++ b/tests/unit/utils/test_convert_to_list.py @@ -1,6 +1,5 @@ import pytest from pinecone.utils import convert_to_list -from pinecone import SparseValues import numpy as np import pandas as pd diff --git a/tests/unit/utils/test_normalize_host.py b/tests/unit/utils/test_normalize_host.py index d6ea9b31..f48b78de 100644 --- a/tests/unit/utils/test_normalize_host.py +++ b/tests/unit/utils/test_normalize_host.py @@ -6,17 +6,26 @@ def test_when_url_is_none(): def test_when_url_is_https(): - assert normalize_host("https://index-name-abcdef.svc.pinecone.io") == "https://index-name-abcdef.svc.pinecone.io" + assert ( + normalize_host("https://index-name-abcdef.svc.pinecone.io") + == "https://index-name-abcdef.svc.pinecone.io" + ) def test_when_url_is_http(): # This should not occur in prod, but if it does, we will leave it alone. # Could be useful when testing with local proxies. - assert normalize_host("http://index-name-abcdef.svc.pinecone.io") == "http://index-name-abcdef.svc.pinecone.io" + assert ( + normalize_host("http://index-name-abcdef.svc.pinecone.io") + == "http://index-name-abcdef.svc.pinecone.io" + ) def test_when_url_is_host_without_protocol(): - assert normalize_host("index-name-abcdef.svc.pinecone.io") == "https://index-name-abcdef.svc.pinecone.io" + assert ( + normalize_host("index-name-abcdef.svc.pinecone.io") + == "https://index-name-abcdef.svc.pinecone.io" + ) def test_can_be_called_multiple_times(): diff --git a/tests/unit/utils/test_setup_openapi_client.py b/tests/unit/utils/test_setup_openapi_client.py index e5bafb69..446aaaf5 100644 --- a/tests/unit/utils/test_setup_openapi_client.py +++ b/tests/unit/utils/test_setup_openapi_client.py @@ -13,17 +13,20 @@ def test_setup_openapi_client(self): assert openapi_config.host == "https://my-controller-host" control_plane_client = setup_openapi_client( - ApiClient, ManageIndexesApi, config=config, openapi_config=openapi_config, pool_threads=2 + ApiClient, + ManageIndexesApi, + config=config, + openapi_config=openapi_config, + pool_threads=2, ) user_agent_regex = re.compile(r"python-client-\d+\.\d+\.\d+ \(urllib3\:\d+\.\d+\.\d+\)") assert re.match(user_agent_regex, control_plane_client.api_client.user_agent) - assert re.match(user_agent_regex, control_plane_client.api_client.default_headers["User-Agent"]) + assert re.match( + user_agent_regex, control_plane_client.api_client.default_headers["User-Agent"] + ) def test_setup_openapi_client_with_api_version(self): - config = ConfigBuilder.build( - api_key="my-api-key", - host="https://my-controller-host", - ) + config = ConfigBuilder.build(api_key="my-api-key", host="https://my-controller-host") openapi_config = ConfigBuilder.build_openapi_config(config) assert openapi_config.host == "https://my-controller-host" @@ -37,12 +40,18 @@ def test_setup_openapi_client_with_api_version(self): ) user_agent_regex = re.compile(r"python-client-\d+\.\d+\.\d+ \(urllib3\:\d+\.\d+\.\d+\)") assert re.match(user_agent_regex, control_plane_client.api_client.user_agent) - assert re.match(user_agent_regex, control_plane_client.api_client.default_headers["User-Agent"]) - assert control_plane_client.api_client.default_headers["X-Pinecone-API-Version"] == "2024-04" + assert re.match( + user_agent_regex, control_plane_client.api_client.default_headers["User-Agent"] + ) + assert ( + control_plane_client.api_client.default_headers["X-Pinecone-API-Version"] == "2024-04" + ) class TestBuildPluginSetupClient: - @pytest.mark.parametrize("plugin_api_version,plugin_host", [(None, None), ("2024-07", "https://my-plugin-host")]) + @pytest.mark.parametrize( + "plugin_api_version,plugin_host", [(None, None), ("2024-07", "https://my-plugin-host")] + ) def test_setup_openapi_client_with_host_override(self, plugin_api_version, plugin_host): # These configurations represent the configurations that the core sdk # (e.g. Pinecone class) will have built prior to invoking the plugin setup. @@ -67,7 +76,9 @@ def test_setup_openapi_client_with_host_override(self, plugin_api_version, plugi # proxy settings, etc) while allowing the plugin to pass the parts of the # configuration that are relevant to it such as api version, base url if # served from somewhere besides api.pinecone.io, etc. - client_builder = build_plugin_setup_client(config=config, openapi_config=openapi_config, pool_threads=2) + client_builder = build_plugin_setup_client( + config=config, openapi_config=openapi_config, pool_threads=2 + ) # The plugin machinery in pinecone_plugin_interface will be the one to call # this client_builder function using classes and other config it discovers inside the @@ -81,7 +92,10 @@ def test_setup_openapi_client_with_host_override(self, plugin_api_version, plugi # class generated off the openapi spec. plugin_api = ManageIndexesApi plugin_client = client_builder( - api_client_klass=ApiClient, api_klass=plugin_api, api_version=plugin_api_version, host=plugin_host + api_client_klass=ApiClient, + api_klass=plugin_api, + api_version=plugin_api_version, + host=plugin_host, ) # Returned client is an instance of the input class @@ -100,7 +114,10 @@ def test_setup_openapi_client_with_host_override(self, plugin_api_version, plugi assert plugin_client.api_client.configuration.ssl_ca_cert == "path/to/bundle.pem" # Plugins need to be able to pass their own API version (optionally) - assert plugin_client.api_client.default_headers.get("X-Pinecone-API-Version") == plugin_api_version + assert ( + plugin_client.api_client.default_headers.get("X-Pinecone-API-Version") + == plugin_api_version + ) # Plugins need to be able to override the host (optionally) if plugin_host: diff --git a/tests/unit/utils/test_user_agent.py b/tests/unit/utils/test_user_agent.py index 9f17d680..58cdfbbf 100644 --- a/tests/unit/utils/test_user_agent.py +++ b/tests/unit/utils/test_user_agent.py @@ -33,13 +33,17 @@ def test_source_tag_is_normalized(self): assert re.search(r"source_tag=my_source_tag", useragent) is not None config = ConfigBuilder.build( - api_key="my-api-key", host="https://my-controller-host", source_tag=" My Source Tag 123 " + api_key="my-api-key", + host="https://my-controller-host", + source_tag=" My Source Tag 123 ", ) useragent = get_user_agent(config) assert re.search(r"source_tag=my_source_tag_123", useragent) is not None config = ConfigBuilder.build( - api_key="my-api-key", host="https://my-controller-host", source_tag=" My Source Tag 123 #### !! " + api_key="my-api-key", + host="https://my-controller-host", + source_tag=" My Source Tag 123 #### !! ", ) useragent = get_user_agent(config) assert re.search(r"source_tag=my_source_tag_123", useragent) is not None diff --git a/tests/unit_grpc/test_grpc_index_describe_index_stats.py b/tests/unit_grpc/test_grpc_index_describe_index_stats.py index fc779e9f..0b7388f4 100644 --- a/tests/unit_grpc/test_grpc_index_describe_index_stats.py +++ b/tests/unit_grpc/test_grpc_index_describe_index_stats.py @@ -1,30 +1,21 @@ -from copy import deepcopy - -import numpy as np -import pandas as pd -import pytest - from pinecone import Config from pinecone.grpc import GRPCIndex -from pinecone import DescribeIndexStatsRequest -from pinecone.core.grpc.protos.vector_service_pb2 import ( - DescribeIndexStatsRequest, -) +from pinecone.core.grpc.protos.vector_service_pb2 import DescribeIndexStatsRequest from pinecone.grpc.utils import dict_to_proto_struct class TestGrpcIndexDescribeIndexStats: def setup_method(self): self.config = Config(api_key="test-api-key", host="foo") - self.index = GRPCIndex(config=self.config, index_name="example-name", _endpoint_override="test-endpoint") + self.index = GRPCIndex( + config=self.config, index_name="example-name", _endpoint_override="test-endpoint" + ) def test_describeIndexStats_callWithoutFilter_CalledWithoutFilter(self, mocker): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) self.index.describe_index_stats() self.index._wrap_grpc_call.assert_called_once_with( - self.index.stub.DescribeIndexStats, - DescribeIndexStatsRequest(), - timeout=None, + self.index.stub.DescribeIndexStats, DescribeIndexStatsRequest(), timeout=None ) def test_describeIndexStats_callWithFilter_CalledWithFilter(self, mocker, filter1): diff --git a/tests/unit_grpc/test_grpc_index_fetch.py b/tests/unit_grpc/test_grpc_index_fetch.py index 1c750c4c..6ccb4199 100644 --- a/tests/unit_grpc/test_grpc_index_fetch.py +++ b/tests/unit_grpc/test_grpc_index_fetch.py @@ -1,35 +1,25 @@ -from copy import deepcopy - -import numpy as np -import pandas as pd -import pytest - from pinecone import Config from pinecone.grpc import GRPCIndex -from pinecone.core.grpc.protos.vector_service_pb2 import ( - FetchRequest, -) +from pinecone.core.grpc.protos.vector_service_pb2 import FetchRequest class TestGrpcIndexFetch: def setup_method(self): self.config = Config(api_key="test-api-key", host="foo") - self.index = GRPCIndex(config=self.config, index_name="example-name", _endpoint_override="test-endpoint") + self.index = GRPCIndex( + config=self.config, index_name="example-name", _endpoint_override="test-endpoint" + ) def test_fetch_byIds_fetchByIds(self, mocker): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) self.index.fetch(["vec1", "vec2"]) self.index._wrap_grpc_call.assert_called_once_with( - self.index.stub.Fetch, - FetchRequest(ids=["vec1", "vec2"]), - timeout=None, + self.index.stub.Fetch, FetchRequest(ids=["vec1", "vec2"]), timeout=None ) def test_fetch_byIdsAndNS_fetchByIdsAndNS(self, mocker): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) self.index.fetch(["vec1", "vec2"], namespace="ns", timeout=30) self.index._wrap_grpc_call.assert_called_once_with( - self.index.stub.Fetch, - FetchRequest(ids=["vec1", "vec2"], namespace="ns"), - timeout=30, + self.index.stub.Fetch, FetchRequest(ids=["vec1", "vec2"], namespace="ns"), timeout=30 ) diff --git a/tests/unit_grpc/test_grpc_index_initialization.py b/tests/unit_grpc/test_grpc_index_initialization.py index d8503a66..c12689ee 100644 --- a/tests/unit_grpc/test_grpc_index_initialization.py +++ b/tests/unit_grpc/test_grpc_index_initialization.py @@ -1,6 +1,5 @@ import re from pinecone.grpc import PineconeGRPC, GRPCClientConfig -from pinecone import ConfigBuilder class TestGRPCIndexInitialization: @@ -12,24 +11,26 @@ def test_init_with_default_config(self): assert index.grpc_client_config.timeout == 20 assert index.grpc_client_config.conn_timeout == 1 assert index.grpc_client_config.reuse_channel == True - assert index.grpc_client_config.retry_config == None - assert index.grpc_client_config.grpc_channel_options == None - assert index.grpc_client_config.additional_metadata == None + assert index.grpc_client_config.retry_config is None + assert index.grpc_client_config.grpc_channel_options is None + assert index.grpc_client_config.additional_metadata is None # Default metadata, grpc equivalent to http request headers assert len(index.fixed_metadata) == 3 assert index.fixed_metadata["api-key"] == "YOUR_API_KEY" assert index.fixed_metadata["service-name"] == "my-index" - assert index.fixed_metadata["client-version"] != None + assert index.fixed_metadata["client-version"] is not None def test_init_with_additional_metadata(self): pc = PineconeGRPC(api_key="YOUR_API_KEY") - config = GRPCClientConfig(additional_metadata={"debug-header": "value123", "debug-header2": "value456"}) + config = GRPCClientConfig( + additional_metadata={"debug-header": "value123", "debug-header2": "value456"} + ) index = pc.Index(name="my-index", host="host", grpc_config=config) assert len(index.fixed_metadata) == 5 assert index.fixed_metadata["api-key"] == "YOUR_API_KEY" assert index.fixed_metadata["service-name"] == "my-index" - assert index.fixed_metadata["client-version"] != None + assert index.fixed_metadata["client-version"] is not None assert index.fixed_metadata["debug-header"] == "value123" assert index.fixed_metadata["debug-header2"] == "value456" @@ -105,5 +106,6 @@ def test_config_passed_when_target_by_host_and_port(self): def test_config_passes_source_tag_when_set(self): pc = PineconeGRPC(api_key="YOUR_API_KEY", source_tag="my_source_tag") - index = pc.Index(name="my-index", host="host") - assert re.search(r"source_tag=my_source_tag", pc.index_api.api_client.user_agent) is not None + assert ( + re.search(r"source_tag=my_source_tag", pc.index_api.api_client.user_agent) is not None + ) diff --git a/tests/unit_grpc/test_grpc_index_query.py b/tests/unit_grpc/test_grpc_index_query.py index 561b7331..a871656c 100644 --- a/tests/unit_grpc/test_grpc_index_query.py +++ b/tests/unit_grpc/test_grpc_index_query.py @@ -1,29 +1,23 @@ -from copy import deepcopy - -import numpy as np -import pandas as pd import pytest from pinecone import Config from pinecone.grpc import GRPCIndex -from pinecone.core.grpc.protos.vector_service_pb2 import ( - QueryRequest, -) +from pinecone.core.grpc.protos.vector_service_pb2 import QueryRequest from pinecone.grpc.utils import dict_to_proto_struct class TestGrpcIndexQuery: def setup_method(self): self.config = Config(api_key="test-api-key", host="foo") - self.index = GRPCIndex(config=self.config, index_name="example-name", _endpoint_override="test-endpoint") + self.index = GRPCIndex( + config=self.config, index_name="example-name", _endpoint_override="test-endpoint" + ) def test_query_byVectorNoFilter_queryVectorNoFilter(self, mocker, vals1): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) self.index.query(top_k=10, vector=vals1) self.index._wrap_grpc_call.assert_called_once_with( - self.index.stub.Query, - QueryRequest(top_k=10, vector=vals1), - timeout=None, + self.index.stub.Query, QueryRequest(top_k=10, vector=vals1), timeout=None ) def test_query_byVectorWithFilter_queryVectorWithFilter(self, mocker, vals1, filter1): @@ -31,7 +25,9 @@ def test_query_byVectorWithFilter_queryVectorWithFilter(self, mocker, vals1, fil self.index.query(top_k=10, vector=vals1, filter=filter1, namespace="ns", timeout=10) self.index._wrap_grpc_call.assert_called_once_with( self.index.stub.Query, - QueryRequest(top_k=10, vector=vals1, filter=dict_to_proto_struct(filter1), namespace="ns"), + QueryRequest( + top_k=10, vector=vals1, filter=dict_to_proto_struct(filter1), namespace="ns" + ), timeout=10, ) diff --git a/tests/unit_grpc/test_grpc_index_update.py b/tests/unit_grpc/test_grpc_index_update.py index ce07c496..12774195 100644 --- a/tests/unit_grpc/test_grpc_index_update.py +++ b/tests/unit_grpc/test_grpc_index_update.py @@ -1,21 +1,15 @@ -from copy import deepcopy - -import numpy as np -import pandas as pd -import pytest - from pinecone import Config from pinecone.grpc import GRPCIndex -from pinecone.core.grpc.protos.vector_service_pb2 import ( - UpdateRequest, -) +from pinecone.core.grpc.protos.vector_service_pb2 import UpdateRequest from pinecone.grpc.utils import dict_to_proto_struct class TestGrpcIndexUpdate: def setup_method(self): self.config = Config(api_key="test-api-key", host="foo") - self.index = GRPCIndex(config=self.config, index_name="example-name", _endpoint_override="test-endpoint") + self.index = GRPCIndex( + config=self.config, index_name="example-name", _endpoint_override="test-endpoint" + ) def test_update_byIdAnValues_updateByIdAndValues(self, mocker, vals1): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) @@ -35,7 +29,9 @@ def test_update_byIdAnValuesAsync_updateByIdAndValuesAsync(self, mocker, vals1): timeout=30, ) - def test_update_byIdAnValuesAndMetadata_updateByIdAndValuesAndMetadata(self, mocker, vals1, md1): + def test_update_byIdAnValuesAndMetadata_updateByIdAndValuesAndMetadata( + self, mocker, vals1, md1 + ): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) self.index.update("vec1", values=vals1, set_metadata=md1) self.index._wrap_grpc_call.assert_called_once_with( diff --git a/tests/unit_grpc/test_grpc_index_upsert.py b/tests/unit_grpc/test_grpc_index_upsert.py index a7d4ff7a..fb5de75a 100644 --- a/tests/unit_grpc/test_grpc_index_upsert.py +++ b/tests/unit_grpc/test_grpc_index_upsert.py @@ -66,7 +66,9 @@ def expected_vec_md_sparse2(vals2, md2, sparse_indices_2, sparse_values_2): class TestGrpcIndexUpsert: def setup_method(self): self.config = Config(api_key="test-api-key", host="foo") - self.index = GRPCIndex(config=self.config, index_name="example-name", _endpoint_override="test-endpoint") + self.index = GRPCIndex( + config=self.config, index_name="example-name", _endpoint_override="test-endpoint" + ) def _assert_called_once(self, vectors, async_call=False): self.index._wrap_grpc_call.assert_called_once_with( @@ -75,7 +77,9 @@ def _assert_called_once(self, vectors, async_call=False): timeout=None, ) - def test_upsert_tuplesOfIdVec_UpserWithoutMD(self, mocker, vals1, vals2, expected_vec1, expected_vec2): + def test_upsert_tuplesOfIdVec_UpserWithoutMD( + self, mocker, vals1, vals2, expected_vec1, expected_vec2 + ): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) self.index.upsert([("vec1", vals1), ("vec2", vals2)], namespace="ns") self._assert_called_once([expected_vec1, expected_vec2]) @@ -85,16 +89,12 @@ def test_upsert_tuplesOfIdVecMD_UpsertVectorsWithMD( ): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) self.index.upsert([("vec1", vals1, md1), ("vec2", vals2, md2)], namespace="ns") - self._assert_called_once( - [expected_vec_md1, expected_vec_md2], - ) + self._assert_called_once([expected_vec_md1, expected_vec_md2]) def test_upsert_vectors_upsertInputVectors(self, mocker, expected_vec_md1, expected_vec_md2): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) self.index.upsert([expected_vec_md1, expected_vec_md2], namespace="ns") - self._assert_called_once( - [expected_vec_md1, expected_vec_md2], - ) + self._assert_called_once([expected_vec_md1, expected_vec_md2]) def test_upsert_vectors_upsertInputVectorsSparse( self, @@ -137,7 +137,9 @@ def test_upsert_dict(self, mocker, vals1, vals2, expected_vec1, expected_vec2): self.index.upsert([dict1, dict2], namespace="ns") self._assert_called_once([expected_vec1, expected_vec2]) - def test_upsert_dict_md(self, mocker, vals1, md1, vals2, md2, expected_vec_md1, expected_vec_md2): + def test_upsert_dict_md( + self, mocker, vals1, md1, vals2, md2, expected_vec_md1, expected_vec_md2 + ): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) dict1 = {"id": "vec1", "values": vals1, "metadata": md1} dict2 = {"id": "vec2", "values": vals2, "metadata": md2} @@ -145,7 +147,14 @@ def test_upsert_dict_md(self, mocker, vals1, md1, vals2, md2, expected_vec_md1, self._assert_called_once([expected_vec_md1, expected_vec_md2]) def test_upsert_dict_sparse( - self, mocker, vals1, vals2, sparse_indices_1, sparse_values_1, sparse_indices_2, sparse_values_2 + self, + mocker, + vals1, + vals2, + sparse_indices_1, + sparse_values_1, + sparse_indices_2, + sparse_values_2, ): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) dict1 = { @@ -177,7 +186,16 @@ def test_upsert_dict_sparse( ) def test_upsert_dict_sparse_md( - self, mocker, vals1, md1, vals2, md2, sparse_indices_1, sparse_values_1, sparse_indices_2, sparse_values_2 + self, + mocker, + vals1, + md1, + vals2, + md2, + sparse_indices_1, + sparse_values_1, + sparse_indices_2, + sparse_values_2, ): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) dict1 = { @@ -248,7 +266,9 @@ def test_upsert_dict_negative(self, mocker, vals1, vals2, md2): ("sparse_values", []), ], ) - def test_upsert_dict_with_invalid_values(self, mocker, key, new_val, vals1, md1, sparse_indices_1, sparse_values_1): + def test_upsert_dict_with_invalid_values( + self, mocker, key, new_val, vals1, md1, sparse_indices_1, sparse_values_1 + ): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) full_dict1 = { @@ -264,14 +284,10 @@ def test_upsert_dict_with_invalid_values(self, mocker, key, new_val, vals1, md1, self.index.upsert([dict1]) assert key in str(e.value) - @pytest.mark.parametrize( - "key,new_val", - [ - ("id", 4.2), - ("id", ["vec1"]), - ], - ) - def test_upsert_dict_with_invalid_ids(self, mocker, key, new_val, vals1, md1, sparse_indices_1, sparse_values_1): + @pytest.mark.parametrize("key,new_val", [("id", 4.2), ("id", ["vec1"])]) + def test_upsert_dict_with_invalid_ids( + self, mocker, key, new_val, vals1, md1, sparse_indices_1, sparse_values_1 + ): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) full_dict1 = { @@ -289,12 +305,7 @@ def test_upsert_dict_with_invalid_ids(self, mocker, key, new_val, vals1, md1, sp @pytest.mark.parametrize( "key,new_val", - [ - ("indices", 3), - ("indices", [1.2, 0.5]), - ("values", ["1", "4.4"]), - ("values", 0.5), - ], + [("indices", 3), ("indices", [1.2, 0.5]), ("values", ["1", "4.4"]), ("values", 0.5)], ) def test_upsert_dict_with_invalid_sparse_values( self, mocker, key, new_val, vals1, md1, sparse_indices_1, sparse_values_1 @@ -354,7 +365,9 @@ def test_upsert_dataframe( ] ) self.index.upsert_from_dataframe(df, namespace="ns") - self._assert_called_once([expected_vec_md_sparse1, expected_vec_md_sparse2], async_call=True) + self._assert_called_once( + [expected_vec_md_sparse1, expected_vec_md_sparse2], async_call=True + ) def test_upsert_dataframe_sync( self, @@ -395,10 +408,21 @@ def test_upsert_dataframe_sync( ] ) self.index.upsert_from_dataframe(df, namespace="ns", use_async_requests=False) - self._assert_called_once([expected_vec_md_sparse1, expected_vec_md_sparse2], async_call=False) + self._assert_called_once( + [expected_vec_md_sparse1, expected_vec_md_sparse2], async_call=False + ) def test_upsert_dataframe_negative( - self, mocker, vals1, md1, vals2, md2, sparse_indices_1, sparse_values_1, sparse_indices_2, sparse_values_2 + self, + mocker, + vals1, + md1, + vals2, + md2, + sparse_indices_1, + sparse_values_1, + sparse_indices_2, + sparse_values_2, ): mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True) full_dict1 = { @@ -455,13 +479,16 @@ def test_upsert_vectorListIsMultiplyOfBatchSize_vectorsUpsertedInBatches( self.index._wrap_grpc_call.assert_any_call( self.index.stub.Upsert, UpsertRequest( - vectors=[Vector(id="vec1", values=vals1, metadata=dict_to_proto_struct(md1))], namespace="ns" + vectors=[Vector(id="vec1", values=vals1, metadata=dict_to_proto_struct(md1))], + namespace="ns", ), timeout=None, ) self.index._wrap_grpc_call.assert_any_call( - self.index.stub.Upsert, UpsertRequest(vectors=[expected_vec_md2], namespace="ns"), timeout=None + self.index.stub.Upsert, + UpsertRequest(vectors=[expected_vec_md2], namespace="ns"), + timeout=None, ) assert result.upserted_count == 2 @@ -496,7 +523,8 @@ def test_upsert_vectorListNotMultiplyOfBatchSize_vectorsUpsertedInBatches( self.index._wrap_grpc_call.assert_any_call( self.index.stub.Upsert, UpsertRequest( - vectors=[Vector(id="vec3", values=vals1, metadata=dict_to_proto_struct(md1))], namespace="ns" + vectors=[Vector(id="vec3", values=vals1, metadata=dict_to_proto_struct(md1))], + namespace="ns", ), timeout=None, ) @@ -515,10 +543,10 @@ def test_upsert_vectorListSmallerThanBatchSize_vectorsUpsertedInBatches( ), ) - result = self.index.upsert([expected_vec_md1, expected_vec_md2], namespace="ns", batch_size=5) - self._assert_called_once( - [expected_vec_md1, expected_vec_md2], + result = self.index.upsert( + [expected_vec_md1, expected_vec_md2], namespace="ns", batch_size=5 ) + self._assert_called_once([expected_vec_md1, expected_vec_md2]) assert result.upserted_count == 2 @@ -548,7 +576,8 @@ def test_upsert_tuplesList_vectorsUpsertedInBatches( self.index._wrap_grpc_call.assert_any_call( self.index.stub.Upsert, UpsertRequest( - vectors=[Vector(id="vec3", values=vals1, metadata=dict_to_proto_struct(md1))], namespace="ns" + vectors=[Vector(id="vec3", values=vals1, metadata=dict_to_proto_struct(md1))], + namespace="ns", ), timeout=None, ) @@ -571,7 +600,9 @@ def test_upsert_batchSizeIsNotPositive_errorIsRaised(self, vals1, md1): ) def test_upsert_useBatchSizeAndAsyncReq_valueErrorRaised(self, vals1, md1): - with pytest.raises(ValueError, match="async_req is not supported when batch_size is provided."): + with pytest.raises( + ValueError, match="async_req is not supported when batch_size is provided." + ): self.index.upsert( [Vector(id="vec1", values=vals1, metadata=dict_to_proto_struct(md1))], namespace="ns", diff --git a/tests/unit_grpc/test_sparse_values_factory.py b/tests/unit_grpc/test_sparse_values_factory.py index 2938613a..4b3066f5 100644 --- a/tests/unit_grpc/test_sparse_values_factory.py +++ b/tests/unit_grpc/test_sparse_values_factory.py @@ -11,7 +11,7 @@ class TestSparseValuesFactory: def test_build_when_None(self): - assert SparseValuesFactory.build(None) == None + assert SparseValuesFactory.build(None) is None def test_build_when_passed_GRPCSparseValues(self): """ @@ -67,14 +67,12 @@ def test_build_when_special_data_types(self, input): assert actual == expected @pytest.mark.parametrize( - "input", - [ - {"indices": [2], "values": [0.3, 0.3]}, - {"indices": [88, 102], "values": [-0.1]}, - ], + "input", [{"indices": [2], "values": [0.3, 0.3]}, {"indices": [88, 102], "values": [-0.1]}] ) def test_build_when_list_sizes_dont_match(self, input): - with pytest.raises(ValueError, match="Sparse values indices and values must have the same length"): + with pytest.raises( + ValueError, match="Sparse values indices and values must have the same length" + ): SparseValuesFactory.build(input) @pytest.mark.parametrize( diff --git a/tests/unit_grpc/test_vector_factory_grpc.py b/tests/unit_grpc/test_vector_factory_grpc.py index f85824e9..11cfb401 100644 --- a/tests/unit_grpc/test_vector_factory_grpc.py +++ b/tests/unit_grpc/test_vector_factory_grpc.py @@ -2,7 +2,6 @@ import pandas as pd import pytest -from collections.abc import Iterable, Mapping from pinecone.grpc.vector_factory_grpc import VectorFactoryGRPC from pinecone.grpc import Vector, SparseValues @@ -18,7 +17,9 @@ def test_build_when_returns_vector_unmodified(self): def test_build_when_nongrpc_vector_it_converts(self): vec = NonGRPCVector(id="1", values=[0.1, 0.2, 0.3]) - assert VectorFactoryGRPC.build(vec) == Vector(id="1", values=[0.1, 0.2, 0.3], metadata=dict_to_proto_struct({})) + assert VectorFactoryGRPC.build(vec) == Vector( + id="1", values=[0.1, 0.2, 0.3], metadata=dict_to_proto_struct({}) + ) def test_build_when_nongrpc_vector_with_metadata_it_converts(self): vec = NonGRPCVector(id="1", values=[0.1, 0.2, 0.3], metadata={"genre": "comedy"}) @@ -28,7 +29,9 @@ def test_build_when_nongrpc_vector_with_metadata_it_converts(self): def test_build_when_nongrpc_vector_with_sparse_values_it_converts(self): vec = NonGRPCVector( - id="1", values=[0.1, 0.2, 0.3], sparse_values=NonGRPCSparseValues(indices=[0, 2], values=[0.1, 0.3]) + id="1", + values=[0.1, 0.2, 0.3], + sparse_values=NonGRPCSparseValues(indices=[0, 2], values=[0.1, 0.3]), ) assert VectorFactoryGRPC.build(vec) == Vector( id="1", @@ -37,7 +40,9 @@ def test_build_when_nongrpc_vector_with_sparse_values_it_converts(self): sparse_values=SparseValues(indices=[0, 2], values=[0.1, 0.3]), ) - @pytest.mark.parametrize("values_array", [[0.1, 0.2, 0.3], np.array([0.1, 0.2, 0.3]), pd.array([0.1, 0.2, 0.3])]) + @pytest.mark.parametrize( + "values_array", [[0.1, 0.2, 0.3], np.array([0.1, 0.2, 0.3]), pd.array([0.1, 0.2, 0.3])] + ) def test_build_when_tuple_with_two_values(self, values_array): tup = ("1", values_array) actual = VectorFactoryGRPC.build(tup) @@ -46,20 +51,26 @@ def test_build_when_tuple_with_two_values(self, values_array): @pytest.mark.parametrize( "vector_tup", - [("1", "not an array"), ("1", {}), ("1", "not an array", {"genre": "comedy"}), ("1", {}, {"genre": "comedy"})], + [ + ("1", "not an array"), + ("1", {}), + ("1", "not an array", {"genre": "comedy"}), + ("1", {}, {"genre": "comedy"}), + ], ) def test_build_when_tuple_values_must_be_list(self, vector_tup): - with pytest.raises( - TypeError, - match="Expected a list or list-like data structure", - ): + with pytest.raises(TypeError, match="Expected a list or list-like data structure"): VectorFactoryGRPC.build(vector_tup) - @pytest.mark.parametrize("values_array", [[0.1, 0.2, 0.3], np.array([0.1, 0.2, 0.3]), pd.array([0.1, 0.2, 0.3])]) + @pytest.mark.parametrize( + "values_array", [[0.1, 0.2, 0.3], np.array([0.1, 0.2, 0.3]), pd.array([0.1, 0.2, 0.3])] + ) def test_build_when_tuple_with_three_values(self, values_array): tup = ("1", values_array, {"genre": "comedy"}) actual = VectorFactoryGRPC.build(tup) - expected = Vector(id="1", values=[0.1, 0.2, 0.3], metadata=dict_to_proto_struct({"genre": "comedy"})) + expected = Vector( + id="1", values=[0.1, 0.2, 0.3], metadata=dict_to_proto_struct({"genre": "comedy"}) + ) assert actual == expected @pytest.mark.parametrize("sv_klass", [SparseValues, NonGRPCSparseValues]) @@ -81,11 +92,15 @@ def test_build_when_tuple_too_short(self): tup = ("1",) VectorFactoryGRPC.build(tup) - @pytest.mark.parametrize("metadata", [{"genre": "comedy"}, dict_to_proto_struct({"genre": "comedy"})]) + @pytest.mark.parametrize( + "metadata", [{"genre": "comedy"}, dict_to_proto_struct({"genre": "comedy"})] + ) def test_build_when_dict(self, metadata): d = {"id": "1", "values": [0.1, 0.2, 0.3], "metadata": metadata} actual = VectorFactoryGRPC.build(d) - expected = Vector(id="1", values=[0.1, 0.2, 0.3], metadata=dict_to_proto_struct({"genre": "comedy"})) + expected = Vector( + id="1", values=[0.1, 0.2, 0.3], metadata=dict_to_proto_struct({"genre": "comedy"}) + ) assert actual == expected def test_build_with_dict_no_metadata(self): @@ -131,7 +146,9 @@ def test_build_with_dict_with_sparse_values_object(self, sv_klass): def test_build_when_dict_with_special_values(self, input_values): d = {"id": "1", "values": input_values, "metadata": {"genre": "comedy"}} actual = VectorFactoryGRPC.build(d) - expected = Vector(id="1", values=[0.1, 0.2, 0.3], metadata=dict_to_proto_struct({"genre": "comedy"})) + expected = Vector( + id="1", values=[0.1, 0.2, 0.3], metadata=dict_to_proto_struct({"genre": "comedy"}) + ) assert actual == expected def test_build_when_dict_missing_required_fields(self): @@ -141,7 +158,12 @@ def test_build_when_dict_missing_required_fields(self): def test_build_when_dict_excess_keys(self): with pytest.raises(ValueError, match="Found excess keys in the vector dictionary"): - d = {"id": "1", "values": [0.1, 0.2, 0.3], "metadata": {"genre": "comedy"}, "extra": "field"} + d = { + "id": "1", + "values": [0.1, 0.2, 0.3], + "metadata": {"genre": "comedy"}, + "extra": "field", + } VectorFactoryGRPC.build(d) @pytest.mark.parametrize( @@ -188,7 +210,9 @@ def test_build_when_dict_sparse_values_when_SparseValues(self): assert actual == expected @pytest.mark.parametrize("bogus_sparse_values", [1, "not an array", [1, 2], {}]) - def test_build_when_dict_sparse_values_errors_when_invalid_sparse_values_values(self, bogus_sparse_values): + def test_build_when_dict_sparse_values_errors_when_invalid_sparse_values_values( + self, bogus_sparse_values + ): with pytest.raises(ValueError, match="Found unexpected data in column `sparse_values`"): d = { "id": "1", @@ -199,7 +223,9 @@ def test_build_when_dict_sparse_values_errors_when_invalid_sparse_values_values( VectorFactoryGRPC.build(d) @pytest.mark.parametrize("bogus_sparse_indices", [1, "not an array", [0.1, 0.2], {}]) - def test_build_when_dict_sparse_values_errors_when_indices_not_valid_list(self, bogus_sparse_indices): + def test_build_when_dict_sparse_values_errors_when_indices_not_valid_list( + self, bogus_sparse_indices + ): with pytest.raises(ValueError, match="Found unexpected data in column `sparse_values`"): d = { "id": "1", @@ -213,17 +239,11 @@ def test_build_when_errors_when_other_type(self): with pytest.raises(ValueError, match="Invalid vector value passed: cannot interpret type"): VectorFactoryGRPC.build(1) - @pytest.mark.parametrize( - "bogus_sparse_values", - [ - 1, - "not a dict", - [1, 2, 3], - [], - ], - ) + @pytest.mark.parametrize("bogus_sparse_values", [1, "not a dict", [1, 2, 3], []]) def test_build_when_invalid_sparse_values_type_in_dict(self, bogus_sparse_values): - with pytest.raises(ValueError, match="Column `sparse_values` is expected to be a dictionary"): + with pytest.raises( + ValueError, match="Column `sparse_values` is expected to be a dictionary" + ): d = { "id": "1", "values": [0.1, 0.2, 0.3], @@ -233,15 +253,12 @@ def test_build_when_invalid_sparse_values_type_in_dict(self, bogus_sparse_values VectorFactoryGRPC.build(d) @pytest.mark.parametrize( - "bogus_sparse_values", - [ - {}, - {"indices": [0, 2]}, - {"values": [0.1, 0.3]}, - ], + "bogus_sparse_values", [{}, {"indices": [0, 2]}, {"values": [0.1, 0.3]}] ) def test_build_when_missing_keys_in_sparse_values_dict(self, bogus_sparse_values): - with pytest.raises(ValueError, match="Missing required keys in data in column `sparse_values`"): + with pytest.raises( + ValueError, match="Missing required keys in data in column `sparse_values`" + ): d = { "id": "1", "values": [0.1, 0.2, 0.3], @@ -251,7 +268,14 @@ def test_build_when_missing_keys_in_sparse_values_dict(self, bogus_sparse_values VectorFactoryGRPC.build(d) def test_build_when_sparse_values_is_None(self): - d = {"id": "1", "values": [0.1, 0.2, 0.3], "metadata": {"genre": "comedy"}, "sparse_values": None} + d = { + "id": "1", + "values": [0.1, 0.2, 0.3], + "metadata": {"genre": "comedy"}, + "sparse_values": None, + } actual = VectorFactoryGRPC.build(d) - expected = Vector(id="1", values=[0.1, 0.2, 0.3], metadata=dict_to_proto_struct({"genre": "comedy"})) + expected = Vector( + id="1", values=[0.1, 0.2, 0.3], metadata=dict_to_proto_struct({"genre": "comedy"}) + ) assert actual == expected