From 9e5dd2e87ac8d42defc79aa4c4d368b898dcebe6 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 6 Sep 2025 08:51:24 -0600 Subject: [PATCH 1/7] Stop hard-coding Comet version in docs --- docs/generate-versions.py | 34 ++++++++++++++++--- docs/source/user-guide/latest/iceberg.md | 2 +- docs/source/user-guide/latest/index.rst | 6 ++-- docs/source/user-guide/latest/installation.md | 8 ++--- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/docs/generate-versions.py b/docs/generate-versions.py index e1306f26dd..73a34afad0 100644 --- a/docs/generate-versions.py +++ b/docs/generate-versions.py @@ -23,8 +23,34 @@ # of Comet import os +from pathlib import Path -for version in ["0.8", "0.9"]: - os.system(f"git clone --depth 1 https://github.com/apache/datafusion-comet.git -b branch-{version} comet-{version}") - os.system(f"mkdir temp/user-guide/{version}") - os.system(f"cp -rf comet-{version}/docs/source/user-guide/* temp/user-guide/{version}") \ No newline at end of file +current_version = "0.10.0-SNAPSHOT" +previous_versions = ["0.8", "0.9"] + +def replace_in_files(root: str, filename_pattern: str, search: str, replace: str): + root_path = Path(root) + for md_file in root_path.rglob(filename_pattern): + text = md_file.read_text(encoding="utf-8") + updated = text.replace(search, replace) + if text != updated: + md_file.write_text(updated, encoding="utf-8") + print(f"Replaced {search} with {replace} in {md_file}") + +def generate_docs(): + + # Replace $COMET_VERSION with actual version + for file_pattern in ["*.md", "*.rst"]: + replace_in_files(f"temp/user-guide/latest", file_pattern, "$COMET_VERSION", current_version) + + for version in previous_versions: + os.system(f"git clone --depth 1 https://github.com/apache/datafusion-comet.git -b branch-{version} comet-{version}") + os.system(f"mkdir temp/user-guide/{version}") + os.system(f"cp -rf comet-{version}/docs/source/user-guide/* temp/user-guide/{version}") + # Replace $COMET_VERSION with actual version + for file_pattern in ["*.md", "*.rst"]: + replace_in_files(f"temp/user-guide/{version}", file_pattern, "$COMET_VERSION", current_version) + +if __name__ == "__main__": + print("Generating versioned user guide docs...") + generate_docs() \ No newline at end of file diff --git a/docs/source/user-guide/latest/iceberg.md b/docs/source/user-guide/latest/iceberg.md index aa3caa79b4..acae6ad27f 100644 --- a/docs/source/user-guide/latest/iceberg.md +++ b/docs/source/user-guide/latest/iceberg.md @@ -39,7 +39,7 @@ make release Set `COMET_JAR` env var: ```shell -export COMET_JAR=`pwd`/spark/target/comet-spark-spark3.5_2.12-0.10.0-SNAPSHOT.jar +export COMET_JAR=`pwd`/spark/target/comet-spark-spark3.5_2.12-$COMET_VERSION.jar ``` ## Build Iceberg diff --git a/docs/source/user-guide/latest/index.rst b/docs/source/user-guide/latest/index.rst index c1c676ef3c..d5df4d014a 100644 --- a/docs/source/user-guide/latest/index.rst +++ b/docs/source/user-guide/latest/index.rst @@ -19,13 +19,13 @@ :alt: DataFusion Comet Logo ================================ -Comet 0.10.0-SNAPSHOT User Guide +Comet $COMET_VERSION User Guide ================================ -.. _toc.user-guide-links-latest: +.. _toc.user-guide-links-$COMET_VERSION: .. toctree:: :maxdepth: 1 - :caption: Comet 0.10.0-SNAPSHOT User Guide + :caption: Comet $COMET_VERSION User Guide Installing Comet Building From Source diff --git a/docs/source/user-guide/latest/installation.md b/docs/source/user-guide/latest/installation.md index dee8fd74ac..89a8624e3e 100644 --- a/docs/source/user-guide/latest/installation.md +++ b/docs/source/user-guide/latest/installation.md @@ -30,9 +30,7 @@ Make sure the following requirements are met and software installed on your mach ### Supported Spark Versions -Comet currently supports the following versions of Apache Spark. Note that this documentation is for the main -branch. Please refer to the version of this documentation in the appropriate GitHub release branch for released -Comet versions. +Comet $COMET_VERSION supports the following versions of Apache Spark. We recommend only using Comet with Spark versions where we currently have both Comet and Spark tests enabled in CI. Other versions may work well enough for development and evaluation purposes. @@ -84,7 +82,7 @@ See the [Comet Kubernetes Guide](kubernetes.md) guide. Make sure `SPARK_HOME` points to the same Spark version as Comet was built for. ```shell -export COMET_JAR=spark/target/comet-spark-spark3.5_2.12-0.10.0-SNAPSHOT.jar +export COMET_JAR=spark/target/comet-spark-spark3.5_2.12-$COMET_VERSION.jar $SPARK_HOME/bin/spark-shell \ --jars $COMET_JAR \ @@ -140,7 +138,7 @@ explicitly contain Comet otherwise Spark may use a different class-loader for th components which will then fail at runtime. For example: ``` ---driver-class-path spark/target/comet-spark-spark3.5_2.12-0.10.0-SNAPSHOT.jar +--driver-class-path spark/target/comet-spark-spark3.5_2.12-$COMET_VERSION.jar ``` Some cluster managers may require additional configuration, see From 80e6256073d3e2f84d728118ac61b2ee2bd01817 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 6 Sep 2025 08:54:37 -0600 Subject: [PATCH 2/7] prep for review --- docs/generate-versions.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/generate-versions.py b/docs/generate-versions.py index 73a34afad0..b3d7df8710 100644 --- a/docs/generate-versions.py +++ b/docs/generate-versions.py @@ -25,19 +25,16 @@ import os from pathlib import Path -current_version = "0.10.0-SNAPSHOT" -previous_versions = ["0.8", "0.9"] - def replace_in_files(root: str, filename_pattern: str, search: str, replace: str): root_path = Path(root) - for md_file in root_path.rglob(filename_pattern): - text = md_file.read_text(encoding="utf-8") + for file in root_path.rglob(filename_pattern): + text = file.read_text(encoding="utf-8") updated = text.replace(search, replace) if text != updated: - md_file.write_text(updated, encoding="utf-8") - print(f"Replaced {search} with {replace} in {md_file}") + file.write_text(updated, encoding="utf-8") + print(f"Replaced {search} with {replace} in {file}") -def generate_docs(): +def generate_docs(current_version: str, previous_versions: list[str]): # Replace $COMET_VERSION with actual version for file_pattern in ["*.md", "*.rst"]: @@ -53,4 +50,6 @@ def generate_docs(): if __name__ == "__main__": print("Generating versioned user guide docs...") - generate_docs() \ No newline at end of file + current_version = "0.10.0-SNAPSHOT" + previous_versions = ["0.8", "0.9"] + generate_docs(current_version, previous_versions) \ No newline at end of file From 4919c2ec32f9b3fe17e5bc8ab201a9446b5a1f29 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 6 Sep 2025 09:15:22 -0600 Subject: [PATCH 3/7] add warning for out-of-date docs --- docs/build.sh | 4 ++-- docs/generate-versions.py | 48 ++++++++++++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/docs/build.sh b/docs/build.sh index d647bf7af3..8a7312121e 100755 --- a/docs/build.sh +++ b/docs/build.sh @@ -25,8 +25,8 @@ mkdir temp cp -rf source/* temp/ # Add user guide from published releases -rm -rf comet-0.8 -rm -rf comet-0.9 +#rm -rf comet-0.8 +#rm -rf comet-0.9 python3 generate-versions.py # Remove overview pages (this used to be part of the user guide but is now a top level page) diff --git a/docs/generate-versions.py b/docs/generate-versions.py index b3d7df8710..445336d524 100644 --- a/docs/generate-versions.py +++ b/docs/generate-versions.py @@ -34,22 +34,48 @@ def replace_in_files(root: str, filename_pattern: str, search: str, replace: str file.write_text(updated, encoding="utf-8") print(f"Replaced {search} with {replace} in {file}") -def generate_docs(current_version: str, previous_versions: list[str]): +def insert_warning_after_comment(root: str, warning: str): + root_path = Path(root) + for file in root_path.rglob("*.md"): + lines = file.read_text(encoding="utf-8").splitlines(keepends=True) + new_lines = [] + inserted = False + for line in lines: + new_lines.append(line) + if not inserted and "-->" in line: + new_lines.append(warning + "\n") + inserted = True + file.write_text("".join(new_lines), encoding="utf-8") +def publish_released_version(version: str): + os.system(f"git clone --depth 1 https://github.com/apache/datafusion-comet.git -b branch-{version} comet-{version}") + os.system(f"mkdir temp/user-guide/{version}") + os.system(f"cp -rf comet-{version}/docs/source/user-guide/* temp/user-guide/{version}") # Replace $COMET_VERSION with actual version for file_pattern in ["*.md", "*.rst"]: - replace_in_files(f"temp/user-guide/latest", file_pattern, "$COMET_VERSION", current_version) + replace_in_files(f"temp/user-guide/{version}", file_pattern, "$COMET_VERSION", version) + +def generate_docs(snapshot_version: str, latest_released_version: str, previous_versions: list[str]): + + # Replace $COMET_VERSION with actual version for snapshot version + for file_pattern in ["*.md", "*.rst"]: + replace_in_files(f"temp/user-guide/latest", file_pattern, "$COMET_VERSION", snapshot_version) + + # Add user guide content for latest released versions + publish_released_version(latest_released_version) + # Add user guide content for older released versions for version in previous_versions: - os.system(f"git clone --depth 1 https://github.com/apache/datafusion-comet.git -b branch-{version} comet-{version}") - os.system(f"mkdir temp/user-guide/{version}") - os.system(f"cp -rf comet-{version}/docs/source/user-guide/* temp/user-guide/{version}") - # Replace $COMET_VERSION with actual version - for file_pattern in ["*.md", "*.rst"]: - replace_in_files(f"temp/user-guide/{version}", file_pattern, "$COMET_VERSION", current_version) + publish_released_version(version) + # add warning that this is out-of-date documentation + warning = """```{warning} +You’re viewing **out-of-date** documentation. +```""" + insert_warning_after_comment(f"temp/user-guide/{version}", warning) if __name__ == "__main__": print("Generating versioned user guide docs...") - current_version = "0.10.0-SNAPSHOT" - previous_versions = ["0.8", "0.9"] - generate_docs(current_version, previous_versions) \ No newline at end of file + snapshot_version = "0.10.0-SNAPSHOT" + latest_released_version = "0.9" + previous_versions = ["0.8"] + generate_docs(snapshot_version, latest_released_version, previous_versions) \ No newline at end of file From 862608e2f37a92d335d6b891b2d054c3776e2013 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 6 Sep 2025 09:15:32 -0600 Subject: [PATCH 4/7] add warning for out-of-date docs --- docs/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/build.sh b/docs/build.sh index 8a7312121e..d647bf7af3 100755 --- a/docs/build.sh +++ b/docs/build.sh @@ -25,8 +25,8 @@ mkdir temp cp -rf source/* temp/ # Add user guide from published releases -#rm -rf comet-0.8 -#rm -rf comet-0.9 +rm -rf comet-0.8 +rm -rf comet-0.9 python3 generate-versions.py # Remove overview pages (this used to be part of the user guide but is now a top level page) From 69aa55c70d3910aeb9c887ba616f76fb338b1bd7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 6 Sep 2025 16:00:44 -0600 Subject: [PATCH 5/7] fix --- docs/generate-versions.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/docs/generate-versions.py b/docs/generate-versions.py index 445336d524..9e76e656e4 100644 --- a/docs/generate-versions.py +++ b/docs/generate-versions.py @@ -25,6 +25,10 @@ import os from pathlib import Path +def get_major_minor_version(version: str): + parts = version.split('.') + return f"{parts[0]}.{parts[1]}" + def replace_in_files(root: str, filename_pattern: str, search: str, replace: str): root_path = Path(root) for file in root_path.rglob(filename_pattern): @@ -34,7 +38,7 @@ def replace_in_files(root: str, filename_pattern: str, search: str, replace: str file.write_text(updated, encoding="utf-8") print(f"Replaced {search} with {replace} in {file}") -def insert_warning_after_comment(root: str, warning: str): +def insert_warning_after_asf_header(root: str, warning: str): root_path = Path(root) for file in root_path.rglob("*.md"): lines = file.read_text(encoding="utf-8").splitlines(keepends=True) @@ -48,12 +52,13 @@ def insert_warning_after_comment(root: str, warning: str): file.write_text("".join(new_lines), encoding="utf-8") def publish_released_version(version: str): - os.system(f"git clone --depth 1 https://github.com/apache/datafusion-comet.git -b branch-{version} comet-{version}") - os.system(f"mkdir temp/user-guide/{version}") - os.system(f"cp -rf comet-{version}/docs/source/user-guide/* temp/user-guide/{version}") + major_minor = get_major_minor_version(version) + os.system(f"git clone --depth 1 https://github.com/apache/datafusion-comet.git -b branch-{major_minor} comet-{major_minor}") + os.system(f"mkdir temp/user-guide/{major_minor}") + os.system(f"cp -rf comet-{major_minor}/docs/source/user-guide/* temp/user-guide/{major_minor}") # Replace $COMET_VERSION with actual version for file_pattern in ["*.md", "*.rst"]: - replace_in_files(f"temp/user-guide/{version}", file_pattern, "$COMET_VERSION", version) + replace_in_files(f"temp/user-guide/{major_minor}", file_pattern, "$COMET_VERSION", version) def generate_docs(snapshot_version: str, latest_released_version: str, previous_versions: list[str]): @@ -71,11 +76,11 @@ def generate_docs(snapshot_version: str, latest_released_version: str, previous_ warning = """```{warning} You’re viewing **out-of-date** documentation. ```""" - insert_warning_after_comment(f"temp/user-guide/{version}", warning) + insert_warning_after_asf_header(f"temp/user-guide/{version}", warning) if __name__ == "__main__": print("Generating versioned user guide docs...") snapshot_version = "0.10.0-SNAPSHOT" - latest_released_version = "0.9" - previous_versions = ["0.8"] + latest_released_version = "0.9.1" + previous_versions = ["0.8.0"] generate_docs(snapshot_version, latest_released_version, previous_versions) \ No newline at end of file From d97bbc60299594574572bdec8fac0bb8c3106190 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 6 Sep 2025 16:07:32 -0600 Subject: [PATCH 6/7] save --- docs/source/user-guide/0.8/index.rst | 4 ++-- docs/source/user-guide/0.9/index.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/user-guide/0.8/index.rst b/docs/source/user-guide/0.8/index.rst index b34d5e19d9..1633acff0d 100644 --- a/docs/source/user-guide/0.8/index.rst +++ b/docs/source/user-guide/0.8/index.rst @@ -22,10 +22,10 @@ Comet 0.8.x User Guide ======================= -.. _toc.user-guide-links-08: +.. _toc.user-guide-links-$COMET_VERSION: .. toctree:: :maxdepth: 1 - :caption: Comet 0.8.x User Guide + :caption: Comet $COMET_VERSION User Guide Installing Comet Building From Source diff --git a/docs/source/user-guide/0.9/index.rst b/docs/source/user-guide/0.9/index.rst index 7a93b4c5ca..cdcfe1ce8d 100644 --- a/docs/source/user-guide/0.9/index.rst +++ b/docs/source/user-guide/0.9/index.rst @@ -22,10 +22,10 @@ Comet 0.9.x User Guide ======================= -.. _toc.user-guide-links-09: +.. _toc.user-guide-links-$COMET_VERSION: .. toctree:: :maxdepth: 1 - :caption: Comet 0.9.x User Guide + :caption: Comet $COMET_VERSION User Guide Installing Comet Building From Source From d9452f718d7659a594d82969fb5d704719e109e7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 6 Sep 2025 16:11:32 -0600 Subject: [PATCH 7/7] fix --- docs/generate-versions.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/generate-versions.py b/docs/generate-versions.py index 9e76e656e4..28a343ddc6 100644 --- a/docs/generate-versions.py +++ b/docs/generate-versions.py @@ -73,10 +73,11 @@ def generate_docs(snapshot_version: str, latest_released_version: str, previous_ for version in previous_versions: publish_released_version(version) # add warning that this is out-of-date documentation - warning = """```{warning} -You’re viewing **out-of-date** documentation. + warning = f"""```{{warning}} +This is **out-of-date** documentation. The latest Comet release is version {latest_released_version}. ```""" - insert_warning_after_asf_header(f"temp/user-guide/{version}", warning) + major_minor = get_major_minor_version(version) + insert_warning_after_asf_header(f"temp/user-guide/{major_minor}", warning) if __name__ == "__main__": print("Generating versioned user guide docs...")