diff --git a/.github/workflows/hamilton-main.yml b/.github/workflows/hamilton-main.yml index b71e1e384..0c9db92d6 100644 --- a/.github/workflows/hamilton-main.yml +++ b/.github/workflows/hamilton-main.yml @@ -79,15 +79,6 @@ jobs: - name: Test integrations - if: ${{ matrix.python-version == '3.9' }} - run: | - uv sync --group test --extra pandera - uv pip install -r tests/integrations/pandera/requirements.txt - uv pip install dask-expr - uv run pytest tests/integrations - - - name: Test integrations - if: ${{ matrix.python-version != '3.9' }} run: | uv sync --group test --extra pandera uv pip install -r tests/integrations/pandera/requirements.txt @@ -111,15 +102,11 @@ jobs: uv run pytest plugin_tests/h_narwhals - name: Test dask - # Dask supports >= py3.9 - if: ${{ matrix.python-version != '3.8' }} run: | uv sync --group test --extra dask uv run pytest plugin_tests/h_dask - name: Test ray - # Ray supports >= py3.9 - if: ${{ matrix.python-version != '3.8' }} env: RAY_ENABLE_UV_RUN_RUNTIME_ENV: 0 # https://github.com/ray-project/ray/issues/53848 run: | @@ -127,8 +114,7 @@ jobs: uv run pytest plugin_tests/h_ray - name: Test pyspark - # Spark supports >= py3.9 - if: ${{ matrix.python-version != '3.8' && runner.os == 'Linux' }} + if: ${{ runner.os == 'Linux' }} env: PYSPARK_SUBMIT_ARGS: "--conf spark.sql.ansi.enabled=false pyspark-shell" run: | @@ -139,8 +125,7 @@ jobs: uv run pytest plugin_tests/h_spark - name: Test pyspark - # Spark supports >= py3.9 - if: ${{ matrix.python-version != '3.8' && runner.os != 'Linux' }} + if: ${{ runner.os != 'Linux' }} env: PYSPARK_SUBMIT_ARGS: "--conf spark.sql.ansi.enabled=false pyspark-shell" run: | @@ -151,7 +136,7 @@ jobs: - name: Test vaex # Vaex supports <= py3.10 and numpy<2 - if: ${{ runner.os == 'Linux' && (matrix.python-version == '3.8' || matrix.python-version == '3.9' || matrix.python-version == '3.10') }} + if: ${{ runner.os == 'Linux' && matrix.python-version == '3.10' }} run: | sudo apt-get install --no-install-recommends --yes libpcre3-dev cargo uv sync --group test --extra vaex @@ -160,7 +145,7 @@ jobs: - name: Test vaex # Vaex supports <= py3.10 and numpy<2 - if: ${{ runner.os != 'Linux' && (matrix.python-version == '3.8' || matrix.python-version == '3.9' || matrix.python-version == '3.10') }} + if: ${{ runner.os != 'Linux' && matrix.python-version == '3.10' }} run: | uv sync --group test --extra vaex uv pip install "numpy<2" diff --git a/.github/workflows/hamilton-ui-backend.yml b/.github/workflows/hamilton-ui-backend.yml index 204fc48c3..20942ce5f 100644 --- a/.github/workflows/hamilton-ui-backend.yml +++ b/.github/workflows/hamilton-ui-backend.yml @@ -42,7 +42,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.9' + python-version: '3.10' - name: Install dependencies run: | cd ui/backend/server diff --git a/contrib/docs/compile_docs.py b/contrib/docs/compile_docs.py index b5503ec61..dd492d89e 100644 --- a/contrib/docs/compile_docs.py +++ b/contrib/docs/compile_docs.py @@ -344,7 +344,7 @@ def _create_commit_file(df_path, single_df): os.makedirs(commit_path, exist_ok=True) with open(os.path.join(commit_path, "commit.txt"), "w") as f: for commit, ts in zip( - single_df["__init__.py"]["commit"], single_df["__init__.py"]["timestamp"] + single_df["__init__.py"]["commit"], single_df["__init__.py"]["timestamp"], strict=False ): f.write(f"[commit::{commit}][ts::{ts}]\n") diff --git a/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py b/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py index a038698dc..fed60ce61 100644 --- a/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py +++ b/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py @@ -409,7 +409,7 @@ def _accuracy_and_se( threshold = threshold_thousandths / 1000 total = 0 correct = 0 - for cs, ls in zip(cosine_similarity, labeled_similarity): + for cs, ls in zip(cosine_similarity, labeled_similarity, strict=False): total += 1 if cs > threshold: prediction = 1 diff --git a/contrib/hamilton/contrib/user/skrawcz/fine_tuning/__init__.py b/contrib/hamilton/contrib/user/skrawcz/fine_tuning/__init__.py index 406a2a3a2..606520b5d 100644 --- a/contrib/hamilton/contrib/user/skrawcz/fine_tuning/__init__.py +++ b/contrib/hamilton/contrib/user/skrawcz/fine_tuning/__init__.py @@ -657,7 +657,7 @@ def hold_out_set_predictions( ) predictions.append(prediction) questions.append(tokenizer.decode(sample["input_ids"], skip_special_tokens=True)) - return list(zip(questions, predictions)) + return list(zip(questions, predictions, strict=False)) if __name__ == "__main__": diff --git a/contrib/setup.py b/contrib/setup.py index 8fda7941d..7662d65c2 100644 --- a/contrib/setup.py +++ b/contrib/setup.py @@ -75,15 +75,14 @@ def load_requirements(): "Natural Language :: English", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ], # Note that this feature requires pep8 >= v9 and a version of setup tools greater than the # default version installed with virtualenv. Make sure to update your tools! - python_requires=">=3.8, <4", + python_requires=">=3.10, <4", # adding this to slim the package down, since these dependencies are only used in certain contexts. extras_require={ "visualization": ["sf-hamilton[visualization]"], diff --git a/dev_tools/language_server/pyproject.toml b/dev_tools/language_server/pyproject.toml index a0fe6b383..7ff509598 100644 --- a/dev_tools/language_server/pyproject.toml +++ b/dev_tools/language_server/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", ] -requires-python = ">=3.8, <4" +requires-python = ">=3.10, <4" dependencies = [ "pygls>=1.3.1", "sf-hamilton[visualization]>=1.56", diff --git a/docs/get-started/install.md b/docs/get-started/install.md index 48810c907..04055a89d 100644 --- a/docs/get-started/install.md +++ b/docs/get-started/install.md @@ -24,7 +24,7 @@ Installing hamilton is easy! Apache Hamilton is a lightweight framework with a variety of extensions/plugins. To get started, you'll need the following: -- ``python >= 3.8`` +- ``python >= 3.10`` - ``pip`` For help with python/pip/managing virtual environments see the [python docs](https://docs.python.org/3/tutorial/venv.html/). diff --git a/examples/LLM_Workflows/modular_llm_stack/lancedb_module.py b/examples/LLM_Workflows/modular_llm_stack/lancedb_module.py index 573db8b68..befb74f70 100644 --- a/examples/LLM_Workflows/modular_llm_stack/lancedb_module.py +++ b/examples/LLM_Workflows/modular_llm_stack/lancedb_module.py @@ -68,7 +68,9 @@ def data_objects( assert len(ids) == len(titles) == len(text_contents) == len(embeddings) return [ dict(squad_id=id_, title=title, context=context, vector=embedding, **metadata) - for id_, title, context, embedding in zip(ids, titles, text_contents, embeddings) + for id_, title, context, embedding in zip( + ids, titles, text_contents, embeddings, strict=False + ) ] diff --git a/examples/LLM_Workflows/modular_llm_stack/marqo_module.py b/examples/LLM_Workflows/modular_llm_stack/marqo_module.py index e186f7819..2c1bea0c6 100644 --- a/examples/LLM_Workflows/modular_llm_stack/marqo_module.py +++ b/examples/LLM_Workflows/modular_llm_stack/marqo_module.py @@ -58,7 +58,7 @@ def data_objects( assert len(ids) == len(titles) == len(text_contents) return [ dict(_id=id, title=title, Description=text_content) - for id, title, text_content in zip(ids, titles, text_contents) + for id, title, text_content in zip(ids, titles, text_contents, strict=False) if id is not None and title is not None or text_content is not None ] diff --git a/examples/LLM_Workflows/modular_llm_stack/pinecone_module.py b/examples/LLM_Workflows/modular_llm_stack/pinecone_module.py index 38f4d3bfb..29f8bacb5 100644 --- a/examples/LLM_Workflows/modular_llm_stack/pinecone_module.py +++ b/examples/LLM_Workflows/modular_llm_stack/pinecone_module.py @@ -62,7 +62,7 @@ def data_objects( assert len(ids) == len(titles) == len(embeddings) properties = [dict(title=title, **metadata) for title in titles] embeddings = [x.tolist() for x in embeddings] - return list(zip(ids, embeddings, properties)) + return list(zip(ids, embeddings, properties, strict=False)) def push_to_vector_db( diff --git a/examples/LLM_Workflows/modular_llm_stack/qdrant_module.py b/examples/LLM_Workflows/modular_llm_stack/qdrant_module.py index 9856cb17e..a05fc26ff 100644 --- a/examples/LLM_Workflows/modular_llm_stack/qdrant_module.py +++ b/examples/LLM_Workflows/modular_llm_stack/qdrant_module.py @@ -59,7 +59,7 @@ def data_objects( ids = list(range(len(ids))) payloads = [ dict(id=_id, text_content=text_content, title=title, **metadata) - for _id, title, text_content in zip(ids, titles, text_contents) + for _id, title, text_content in zip(ids, titles, text_contents, strict=False) ] embeddings = [x.tolist() for x in embeddings] return dict(ids=ids, vectors=embeddings, payload=payloads) diff --git a/examples/LLM_Workflows/modular_llm_stack/weaviate_module.py b/examples/LLM_Workflows/modular_llm_stack/weaviate_module.py index 3a723588e..ed31145e0 100644 --- a/examples/LLM_Workflows/modular_llm_stack/weaviate_module.py +++ b/examples/LLM_Workflows/modular_llm_stack/weaviate_module.py @@ -86,7 +86,7 @@ def data_objects( assert len(ids) == len(titles) == len(text_contents) return [ dict(squad_id=id_, title=title, context=context, **metadata) - for id_, title, context in zip(ids, titles, text_contents) + for id_, title, context in zip(ids, titles, text_contents, strict=False) ] diff --git a/examples/LLM_Workflows/retrieval_augmented_generation/backend/ingestion.py b/examples/LLM_Workflows/retrieval_augmented_generation/backend/ingestion.py index 14bfedd1f..209debc2e 100644 --- a/examples/LLM_Workflows/retrieval_augmented_generation/backend/ingestion.py +++ b/examples/LLM_Workflows/retrieval_augmented_generation/backend/ingestion.py @@ -212,7 +212,9 @@ def store_documents( uuid=document_uuid, ) - chunk_iterator = zip(pdf_obj["chunked_text"], pdf_obj["chunked_embeddings"]) + chunk_iterator = zip( + pdf_obj["chunked_text"], pdf_obj["chunked_embeddings"], strict=False + ) for chunk_idx, (chunk_text, chunk_embedding) in enumerate(chunk_iterator): chunk_object = dict(content=chunk_text, chunk_index=chunk_idx) chunk_uuid = generate_uuid5(chunk_object, "Chunk") diff --git a/examples/due_date_probabilities/probability_estimation.py b/examples/due_date_probabilities/probability_estimation.py index a3878750a..8009cbb8b 100644 --- a/examples/due_date_probabilities/probability_estimation.py +++ b/examples/due_date_probabilities/probability_estimation.py @@ -133,7 +133,8 @@ def raw_probabilities(raw_data: str) -> pd.DataFrame: days = [int(item.split(", ")[1].split()[0]) for item in raw_data] probability = [float(item.split()[5].replace("%", "")) / 100 for item in raw_data] probabilities_data = [ - (week * 7 + day, probability) for week, day, probability in zip(weeks, days, probability) + (week * 7 + day, probability) + for week, day, probability in zip(weeks, days, probability, strict=False) ] probabilities_df = pd.DataFrame(probabilities_data) probabilities_df.columns = ["days", "probability"] diff --git a/hamilton/async_driver.py b/hamilton/async_driver.py index aa6287fbf..95a53a521 100644 --- a/hamilton/async_driver.py +++ b/hamilton/async_driver.py @@ -38,7 +38,7 @@ async def await_dict_of_tasks(task_dict: Dict[str, typing.Awaitable]) -> Dict[st keys = sorted(task_dict.keys()) coroutines = [task_dict[key] for key in keys] coroutines_gathered = await asyncio.gather(*coroutines) - return dict(zip(keys, coroutines_gathered)) + return dict(zip(keys, coroutines_gathered, strict=False)) async def process_value(val: Any) -> Any: diff --git a/hamilton/experimental/decorators/parameterize_frame.py b/hamilton/experimental/decorators/parameterize_frame.py index 30f9b3312..10aee4a53 100644 --- a/hamilton/experimental/decorators/parameterize_frame.py +++ b/hamilton/experimental/decorators/parameterize_frame.py @@ -67,11 +67,11 @@ def _convert_params_from_df(parameterization: pd.DataFrame) -> List[Parameterize for _, column_set in parameterization.iterrows(): parameterization = { arg: dep_type(col_value) - for arg, col_value, dep_type in zip(args, column_set, dep_types_converted) + for arg, col_value, dep_type in zip(args, column_set, dep_types_converted, strict=False) if dep_type is not None } extracted_columns = [ - col for col, dep_type in zip(column_set, dep_types) if dep_type == "out" + col for col, dep_type in zip(column_set, dep_types, strict=False) if dep_type == "out" ] out.append(ParameterizedExtract(tuple(extracted_columns), parameterization)) return out diff --git a/hamilton/function_modifiers/expanders.py b/hamilton/function_modifiers/expanders.py index 9e16a9451..a0074d357 100644 --- a/hamilton/function_modifiers/expanders.py +++ b/hamilton/function_modifiers/expanders.py @@ -1065,7 +1065,9 @@ def tuple_generator(*args, **kwargs): output_nodes = [node_.copy_with(callabl=tuple_generator)] - for idx, (field_name, field_type) in enumerate(zip(self.fields, self.field_types)): + for idx, (field_name, field_type) in enumerate( + zip(self.fields, self.field_types, strict=False) + ): def extractor(field_index: int = idx, **kwargs) -> field_type: # type: ignore # This extractor is constructed to avoid closure issues. diff --git a/hamilton/plugins/h_kedro.py b/hamilton/plugins/h_kedro.py index b5e617230..a20557b8c 100644 --- a/hamilton/plugins/h_kedro.py +++ b/hamilton/plugins/h_kedro.py @@ -35,7 +35,7 @@ def expand_k_node(base_node: HNode, outputs: List[str]) -> List[HNode]: """ def _convert_output_from_tuple_to_dict(node_result: Any, node_kwargs: Dict[str, Any]): - return {out: v for out, v in zip(outputs, node_result)} + return {out: v for out, v in zip(outputs, node_result, strict=False)} # NOTE isinstance(Any, type) is False for Python < 3.11 extractor = extract_fields(fields={out: Any for out in outputs}) @@ -91,7 +91,7 @@ def k_node_to_h_nodes(node: KNode) -> List[HNode]: # remap the function parameters to the node `inputs` and clean Kedro `parameters` name new_params = {} - for param, k_input in zip(params, node.inputs): + for param, k_input in zip(params, node.inputs, strict=False): if k_input.startswith("params:"): k_input = k_input.partition("params:")[-1] diff --git a/hamilton/plugins/h_openlineage.py b/hamilton/plugins/h_openlineage.py index 2554d568b..a186b29ef 100644 --- a/hamilton/plugins/h_openlineage.py +++ b/hamilton/plugins/h_openlineage.py @@ -61,6 +61,7 @@ def extract_schema_facet(metadata): for k, v in zip( metadata["dataframe_metadata"]["column_names"], metadata["dataframe_metadata"]["datatypes"], + strict=False, ) ] schema_facet = facet_v2.schema_dataset.SchemaDatasetFacet( diff --git a/pyproject.toml b/pyproject.toml index bfa3f85e7..481c85b0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,10 +93,9 @@ dev = [ "ruff==0.5.7", # this should match `.pre-commit-config.yaml` ] test = [ - "connectorx<=0.3.2; python_version=='3.8'", - "connectorx; python_version!='3.8'", + "connectorx", "dask[complete]", - "dask-expr>=1.1.14; python_version >= '3.10'", # Bugfix only available after py3.10 https://github.com/dask/dask-expr/pull/1150 + "dask-expr>=1.1.14", "datasets>=2.18.0", # huggingface datasets -- https://github.com/huggingface/datasets/issues/6737#issuecomment-2107336816 "diskcache", "dlt", @@ -117,15 +116,13 @@ test = [ "polars", "pyarrow", "pydantic >=2.0", - "pyreadstat<1.2.8; python_version <= '3.9'", # for SPSS data loader - "pyreadstat; python_version > '3.9'", # for SPSS data loader + "pyreadstat", # for SPSS data loader "pytest", "pytest-asyncio", "pytest-cov", "PyYAML", "scikit-learn", - "sqlalchemy==1.4.49; python_version == '3.7.*'", - "sqlalchemy; python_version >= '3.8'", + "sqlalchemy", "typer", "xgboost", "xlsx2csv", # for excel data loader @@ -135,7 +132,7 @@ docs = [ {include-group = "dev"}, "alabaster>=0.7,<0.8,!=0.7.5", # read the docs pins "commonmark==0.9.1", # read the docs pins - "dask-expr>=1.1.14; python_version >= '3.10'", # Bugfix only available after py3.10 https://github.com/dask/dask-expr/pull/1150 + "dask-expr>=1.1.14", "dask[distributed]", "ddtrace<3.0", "diskcache", @@ -200,7 +197,7 @@ slack = "https://join.slack.com/t/hamilton-opensource/shared_invite/zt-2niepkra8 [tool.ruff] line-length = 100 -target-version = "py38" # Must include only the earliest supported version +target-version = "py310" # Must include only the earliest supported version [tool.ruff.format] docstring-code-format = false diff --git a/scripts/build_conda.sh b/scripts/build_conda.sh index 2b7a58da8..d56af6992 100644 --- a/scripts/build_conda.sh +++ b/scripts/build_conda.sh @@ -22,7 +22,7 @@ CONDA_HOME=$HOME/anaconda3 # conda activate && anaconda login pkg='sf-hamilton' # adjust the Python versions you would like to build -array=(3.7 3.8 3.9 3.10 3.11 ) +array=(3.10 3.11 3.12 3.13) echo "Building conda package ..." cd ~ # this will create a ~/sf-hamilton directory with metadata to build the package. diff --git a/scripts/update_blogs_in_learning_resources.py b/scripts/update_blogs_in_learning_resources.py index dd6601ca0..3472d8216 100644 --- a/scripts/update_blogs_in_learning_resources.py +++ b/scripts/update_blogs_in_learning_resources.py @@ -105,7 +105,7 @@ def fetch_articles(url, cutoff_date): None, ), ] - for _i, (anchor, time_el) in enumerate(zip(anchors, timeEls)): + for _i, (anchor, time_el) in enumerate(zip(anchors, timeEls, strict=False)): link = anchor["href"] text = anchor.get_text() diff --git a/tests/resources/test_driver_serde_mapper.py b/tests/resources/test_driver_serde_mapper.py index 7d71edf53..7b9f064da 100644 --- a/tests/resources/test_driver_serde_mapper.py +++ b/tests/resources/test_driver_serde_mapper.py @@ -27,7 +27,7 @@ def mapper( ) -> Parallelizable[dict]: if final_vars is None: final_vars = [] - for dr, input_ in zip(drivers, inputs): + for dr, input_ in zip(drivers, inputs, strict=False): yield { "dr": dr, "final_vars": final_vars or dr.list_available_variables(), diff --git a/ui/backend/server/trackingserver_run_tracking/api.py b/ui/backend/server/trackingserver_run_tracking/api.py index c216c3a67..a95e9c962 100644 --- a/ui/backend/server/trackingserver_run_tracking/api.py +++ b/ui/backend/server/trackingserver_run_tracking/api.py @@ -426,7 +426,7 @@ async def get_latest_template_runs( return CatalogZoomResponse( node_runs=[ NodeRunOutWithExtraData.from_orm(node_run, dag_template_id=dag_template_id) - for (node_run, dag_template_id) in zip(node_runs, all_dag_versions) + for (node_run, dag_template_id) in zip(node_runs, all_dag_versions, strict=False) ], node_templates=[ NodeTemplateOut.from_orm(node_template) for node_template in node_templates diff --git a/ui/backend/setup.py b/ui/backend/setup.py index ff76d0541..60aed1a88 100644 --- a/ui/backend/setup.py +++ b/ui/backend/setup.py @@ -54,15 +54,14 @@ def load_requirements(): "Natural Language :: English", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ], # Note that this feature requires pep8 >= v9 and a version of setup tools greater than the # default version installed with virtualenv. Make sure to update your tools! - python_requires=">=3.6, <4", + python_requires=">=3.10, <4", # adding this to slim the package down, since these dependencies are only used in certain contexts. # Relevant project URLs project_urls={ # Optional diff --git a/ui/sdk/pyproject.toml b/ui/sdk/pyproject.toml index 20f0fd83e..695aa83a1 100644 --- a/ui/sdk/pyproject.toml +++ b/ui/sdk/pyproject.toml @@ -17,12 +17,12 @@ classifiers = [ "Natural Language :: English", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11" + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13" ] -requires-python = ">=3.7, <4" +requires-python = ">=3.10, <4" dynamic = ["dependencies", "optional-dependencies", "version"] [project.urls] diff --git a/writeups/developer_setup.md b/writeups/developer_setup.md index 0af38927e..31c60f39a 100644 --- a/writeups/developer_setup.md +++ b/writeups/developer_setup.md @@ -121,7 +121,7 @@ The following values for `TASK` are recognized: Choose a Python version and task. ```shell -PYTHON_VERSION='3.8' +PYTHON_VERSION='3.10' TASK=tests ```