From 1e5ece3d815c708837806c8be4c2f93cea010d1d Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 27 Nov 2024 10:27:58 +0100 Subject: [PATCH 01/19] Refactor pipeline functions and update project naming conventions to 'gitguarden' --- .../workflows/run_train_deploy_pipeline.yml | 55 ++++++++++++++++++ train_and_deploy/README.md | 2 +- train_and_deploy/configs/deployer_config.yaml | 9 ++- .../configs/inference_config.yaml | 6 +- train_and_deploy/configs/train_config.yaml | 12 ++-- train_and_deploy/pipelines/__init__.py | 6 +- train_and_deploy/pipelines/batch_inference.py | 2 +- .../{deployment.py => local_deployment.py} | 2 +- train_and_deploy/pipelines/training.py | 6 +- train_and_deploy/requirements.txt | 16 +----- train_and_deploy/run.py | 20 +++---- train_and_deploy/service.py | 6 +- .../steps/deployment/bento_builder.py | 10 +++- .../steps/deployment/deployment_deploy.py | 28 ++++++---- .../hp_tuning/hp_tuning_select_best_model.py | 2 +- .../steps/inference/inference_predict.py | 7 +-- ...ute_performance_metrics_on_current_data.py | 1 - .../promotion/promote_with_metric_compare.py | 1 - .../steps/training/model_evaluator.py | 7 +-- .../steps/training/model_trainer.py | 56 ++++++++----------- 20 files changed, 145 insertions(+), 109 deletions(-) create mode 100644 .github/workflows/run_train_deploy_pipeline.yml rename train_and_deploy/pipelines/{deployment.py => local_deployment.py} (97%) diff --git a/.github/workflows/run_train_deploy_pipeline.yml b/.github/workflows/run_train_deploy_pipeline.yml new file mode 100644 index 000000000..b4884870a --- /dev/null +++ b/.github/workflows/run_train_deploy_pipeline.yml @@ -0,0 +1,55 @@ +name: Staging Trigger Train and Deploy Pipeline +on: + pull_request: + types: [opened, synchronize] + branches: [staging, main] + paths: + - 'train_and_deploy/**' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + run-staging-workflow: + runs-on: ubuntu-latest + env: + ZENML_HOST: ${{ secrets.ZENML_HOST }} + ZENML_API_KEY: ${{ secrets.ZENML_API_KEY }} + ZENML_STAGING_STACK: ef6c474d-b6e7-49a7-a046-0dab39f7969a + ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }} + ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }} + ZENML_DEBUG: true + ZENML_ANALYTICS_OPT_IN: false + ZENML_LOGGING_VERBOSITY: INFO + ZENML_PROJECT_SECRET_NAME: llm-complete + ZENML_DISABLE_CLIENT_SERVER_MISMATCH_WARNING: True + + steps: + - name: Check out repository code + uses: actions/checkout@v3 + + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install requirements + working-directory: ./train_and_deploy + run: | + pip3 install -r requirements.txt + zenml integration install bentoml skypilot_kubernetes s3 aws evidently --uv -y + + - name: Connect to ZenML server + working-directory: ./train_and_deploy + run: | + zenml init + + - name: Set stack (Staging) + working-directory: ./train_and_deploy + run: | + zenml stack set ${{ env.ZENML_STAGING_STACK }} + + - name: Run pipeline (Staging) + working-directory: ./train_and_deploy + run: | + python run.py --training \ No newline at end of file diff --git a/train_and_deploy/README.md b/train_and_deploy/README.md index ffa4dad04..1abe6603a 100644 --- a/train_and_deploy/README.md +++ b/train_and_deploy/README.md @@ -7,7 +7,7 @@ classification datasets provided by the scikit-learn library. The project was generated from the [E2E Batch ZenML project template](https://github.com/zenml-io/template-e2e-batch) with the following properties: - Project name: ZenML E2E project -- Technical Name: e2e_use_case +- Technical Name: gitguarden - Version: `0.0.1` - Licensed with apache to ZenML GmbH<> - Deployment environment: `staging` diff --git a/train_and_deploy/configs/deployer_config.yaml b/train_and_deploy/configs/deployer_config.yaml index aae54437b..c24231b2b 100644 --- a/train_and_deploy/configs/deployer_config.yaml +++ b/train_and_deploy/configs/deployer_config.yaml @@ -18,14 +18,13 @@ # environment configuration settings: docker: + python_package_installer: uv required_integrations: - aws - - evidently - - mlflow - sklearn - - slack - bentoml + # configuration of steps steps: notify_on_success: @@ -34,8 +33,8 @@ steps: # configuration of the Model Control Plane model: - name: e2e_use_case - version: production + name: gitguarden + version: staging # pipeline level extra configurations extra: diff --git a/train_and_deploy/configs/inference_config.yaml b/train_and_deploy/configs/inference_config.yaml index 5c79f2d71..d6f1be757 100644 --- a/train_and_deploy/configs/inference_config.yaml +++ b/train_and_deploy/configs/inference_config.yaml @@ -18,10 +18,8 @@ # environment configuration settings: docker: + python_package_installer: uv required_integrations: - - gcp - - evidently - - mlflow - sklearn - slack - bentoml @@ -34,7 +32,7 @@ steps: # configuration of the Model Control Plane model: - name: e2e_use_case + name: gitguarden version: staging # pipeline level extra configurations diff --git a/train_and_deploy/configs/train_config.yaml b/train_and_deploy/configs/train_config.yaml index 674bbf665..29d74645e 100644 --- a/train_and_deploy/configs/train_config.yaml +++ b/train_and_deploy/configs/train_config.yaml @@ -18,10 +18,8 @@ # environment configuration settings: docker: + python_package_installer: uv required_integrations: - - gcp - - evidently - - mlflow - sklearn - slack - bentoml @@ -30,19 +28,19 @@ settings: steps: model_trainer: parameters: - name: e2e_use_case + name: gitguarden promote_with_metric_compare: parameters: - mlflow_model_name: e2e_use_case + mlflow_model_name: gitguarden notify_on_success: parameters: notify_on_success: False # configuration of the Model Control Plane model: - name: e2e_use_case + name: gitguarden license: apache - description: e2e_use_case E2E Batch Use Case + description: gitguarden E2E Batch Use Case audience: All ZenML users use_cases: | The ZenML E2E project project demonstrates how the most important steps of diff --git a/train_and_deploy/pipelines/__init__.py b/train_and_deploy/pipelines/__init__.py index 634503d19..6123686c1 100644 --- a/train_and_deploy/pipelines/__init__.py +++ b/train_and_deploy/pipelines/__init__.py @@ -16,6 +16,6 @@ # -from .batch_inference import e2e_use_case_batch_inference -from .training import e2e_use_case_training -from .deployment import e2e_use_case_deployment +from .batch_inference import gitguarden_batch_inference +from .training import gitguarden_training +from .local_deployment import gitguarden_local_deployment diff --git a/train_and_deploy/pipelines/batch_inference.py b/train_and_deploy/pipelines/batch_inference.py index fbc772273..3b7f5015d 100644 --- a/train_and_deploy/pipelines/batch_inference.py +++ b/train_and_deploy/pipelines/batch_inference.py @@ -33,7 +33,7 @@ @pipeline(on_failure=notify_on_failure) -def e2e_use_case_batch_inference(): +def gitguarden_batch_inference(): """ Model batch inference pipeline. diff --git a/train_and_deploy/pipelines/deployment.py b/train_and_deploy/pipelines/local_deployment.py similarity index 97% rename from train_and_deploy/pipelines/deployment.py rename to train_and_deploy/pipelines/local_deployment.py index 7d0875a6c..0f0fffa65 100644 --- a/train_and_deploy/pipelines/deployment.py +++ b/train_and_deploy/pipelines/local_deployment.py @@ -21,7 +21,7 @@ @pipeline(on_failure=notify_on_failure, enable_cache=False) -def e2e_use_case_deployment(): +def gitguarden_local_deployment(): """ Model deployment pipeline. diff --git a/train_and_deploy/pipelines/training.py b/train_and_deploy/pipelines/training.py index ba9a2f754..9b151eb6d 100644 --- a/train_and_deploy/pipelines/training.py +++ b/train_and_deploy/pipelines/training.py @@ -32,7 +32,6 @@ train_data_preprocessor, train_data_splitter, ) - from zenml import pipeline from zenml.logger import get_logger @@ -40,7 +39,7 @@ @pipeline(on_failure=notify_on_failure) -def e2e_use_case_training( +def gitguarden_training( model_search_space: Dict[str, Any], target_env: str, test_size: float = 0.2, @@ -51,8 +50,7 @@ def e2e_use_case_training( min_test_accuracy: float = 0.0, fail_on_accuracy_quality_gates: bool = False, ): - """ - Model training pipeline. + """Model training pipeline. This is a pipeline that loads the data, processes it and splits it into train and test sets, then search for best hyperparameters, diff --git a/train_and_deploy/requirements.txt b/train_and_deploy/requirements.txt index aab392b53..055d50a7b 100644 --- a/train_and_deploy/requirements.txt +++ b/train_and_deploy/requirements.txt @@ -1,16 +1,4 @@ -zenml[server]==0.56.3 +zenml[server]==0.70.0 bentoml>=1.0.10 scikit-learn -mlflow>=2.1.1,<=2.12.1 -mlserver>=1.3.3 -mlserver-mlflow>=1.3.3 -python-rapidjson<1.15 -kfp==1.8.22 -gcsfs -google-cloud-secret-manager -google-cloud-container>=2.21.0 -google-cloud-storage>=2.9.0 -google-cloud-aiplatform>=1.34.0 -google-cloud-build>=3.11.0 -kubernetes -evidently>0.2.6,<0.4.5 \ No newline at end of file +kubernetes \ No newline at end of file diff --git a/train_and_deploy/run.py b/train_and_deploy/run.py index 5a9866c22..5ecc0bab8 100644 --- a/train_and_deploy/run.py +++ b/train_and_deploy/run.py @@ -21,11 +21,10 @@ import click from pipelines import ( - e2e_use_case_batch_inference, - e2e_use_case_deployment, - e2e_use_case_training, + gitguarden_batch_inference, + gitguarden_local_deployment, + gitguarden_training, ) - from zenml.logger import get_logger logger = get_logger(__name__) @@ -168,7 +167,6 @@ def main( not affect the pipeline. only_inference: If `True` only inference pipeline will be triggered. """ - # Run a pipeline with the required parameters. This executes # all steps in the pipeline in the correct order using the orchestrator # stack component that is configured in your active ZenML stack. @@ -195,9 +193,9 @@ def main( "train_config.yaml", ) pipeline_args["run_name"] = ( - f"e2e_use_case_training_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" + f"gitguarden_training_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" ) - e2e_use_case_training.with_options(**pipeline_args)(**run_args_train) + gitguarden_training.with_options(**pipeline_args)(**run_args_train) logger.info("Training pipeline finished successfully!") if deployment: @@ -209,9 +207,9 @@ def main( "deployer_config.yaml", ) pipeline_args["run_name"] = ( - f"e2e_use_case_deployment_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" + f"gitguarden_local_deployment_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" ) - e2e_use_case_deployment.with_options(**pipeline_args)(**run_args_inference) + gitguarden_local_deployment.with_options(**pipeline_args)(**run_args_inference) if inference: # Execute Batch Inference Pipeline @@ -222,9 +220,9 @@ def main( "inference_config.yaml", ) pipeline_args["run_name"] = ( - f"e2e_use_case_batch_inference_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" + f"gitguarden_batch_inference_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" ) - e2e_use_case_batch_inference.with_options(**pipeline_args)( + gitguarden_batch_inference.with_options(**pipeline_args)( **run_args_inference ) diff --git a/train_and_deploy/service.py b/train_and_deploy/service.py index 0ac94c253..b43c0d07b 100644 --- a/train_and_deploy/service.py +++ b/train_and_deploy/service.py @@ -3,12 +3,12 @@ import bentoml from bentoml.io import NumpyNdarray -e2e_use_case_runner = bentoml.sklearn.get("e2e_use_case").to_runner() +gitguarden_runner = bentoml.sklearn.get("gitguarden").to_runner() -svc = bentoml.Service(name="e2e_use_case_service", runners=[e2e_use_case_runner]) +svc = bentoml.Service(name="gitguarden_service", runners=[gitguarden_runner]) input_spec = NumpyNdarray(dtype="float", shape=(-1, 30)) @svc.api(input=input_spec, output=NumpyNdarray()) async def predict(input_arr): - return await e2e_use_case_runner.predict.async_run(input_arr) + return await gitguarden_runner.predict.async_run(input_arr) diff --git a/train_and_deploy/steps/deployment/bento_builder.py b/train_and_deploy/steps/deployment/bento_builder.py index c53c7b7fa..35541bdc8 100644 --- a/train_and_deploy/steps/deployment/bento_builder.py +++ b/train_and_deploy/steps/deployment/bento_builder.py @@ -35,7 +35,7 @@ def bento_builder() -> ( Annotated[ Optional[bento.Bento], - ArtifactConfig(name="mlflow_deployment", is_model_artifact=True), + ArtifactConfig(name="bentoml_deployment", is_model_artifact=True), ] ): """Predictions step. @@ -72,6 +72,14 @@ def bento_builder() -> ( "bento_uri": os.path.join(get_step_context().get_output_artifact_uri(), DEFAULT_BENTO_FILENAME), }, build_ctx=source_utils.get_source_root(), + python={ + "packages": [ + "scikit-learn", + "pandas", + "numpy", + "zenml" + ], + }, ) else: logger.warning("Skipping deployment as the orchestrator is not local.") diff --git a/train_and_deploy/steps/deployment/deployment_deploy.py b/train_and_deploy/steps/deployment/deployment_deploy.py index 90cc82ed6..5fbe11440 100644 --- a/train_and_deploy/steps/deployment/deployment_deploy.py +++ b/train_and_deploy/steps/deployment/deployment_deploy.py @@ -18,19 +18,25 @@ from typing import Optional +from bentoml._internal.bento import bento from typing_extensions import Annotated - -from zenml import ArtifactConfig, get_step_context, step +from zenml import ( + ArtifactConfig, + Model, + get_step_context, + log_artifact_metadata, + step, +) from zenml.client import Client -from zenml.integrations.bentoml.services.bentoml_deployment import ( - BentoMLDeploymentService, +from zenml.integrations.bentoml.services.bentoml_container_deployment import ( + BentoMLContainerDeploymentService, +) +from zenml.integrations.bentoml.services.deployment_type import ( + BentoMLDeploymentType, ) -from zenml import Model, log_artifact_metadata from zenml.integrations.bentoml.steps import bentoml_model_deployer_step from zenml.logger import get_logger -from bentoml._internal.bento import bento - logger = get_logger(__name__) @step @@ -38,7 +44,7 @@ def deployment_deploy( bento: bento.Bento, ) -> ( Annotated[ - Optional[BentoMLDeploymentService], + Optional[BentoMLContainerDeploymentService], ArtifactConfig(name="bentoml_deployment", is_deployment_artifact=True), ] ): @@ -68,9 +74,10 @@ def deployment_deploy( bentoml_deployment = bentoml_model_deployer_step.entrypoint( model_name=model.name, # Name of the model port=3009, # Port to be used by the http server - production=False, # Deploy the model in production mode + production=True, # Deploy the model in production mode timeout=1000, bento=bento, + deployment_type=BentoMLDeploymentType.CONTAINER, ) bentoml_service = Client().get_service(name_id_or_prefix=bentoml_deployment.uuid) @@ -82,7 +89,8 @@ def deployment_deploy( "prediction_url": bentoml_service.prediction_url, "health_check_url": bentoml_service.health_check_url, "model_uri": model.get_artifact(name="model").uri, - "bento" : bentoml_service.config.get("bento"), + "bento_tag" : bentoml_service.config.get("bento_tag"), + "bentoml_model_image": bentoml_service.config.get("image"), } ) else: diff --git a/train_and_deploy/steps/hp_tuning/hp_tuning_select_best_model.py b/train_and_deploy/steps/hp_tuning/hp_tuning_select_best_model.py index 7d5a6bc33..65e524ecd 100644 --- a/train_and_deploy/steps/hp_tuning/hp_tuning_select_best_model.py +++ b/train_and_deploy/steps/hp_tuning/hp_tuning_select_best_model.py @@ -50,7 +50,7 @@ def hp_tuning_select_best_model( hp_output = model.get_data_artifact("hp_result") model_: ClassifierMixin = hp_output.load() # fetch metadata we attached earlier - metric = float(hp_output.run_metadata["metric"].value) + metric = float(hp_output.run_metadata["metric"]) if best_model is None or best_metric < metric: best_model = model_ ### YOUR CODE ENDS HERE ### diff --git a/train_and_deploy/steps/inference/inference_predict.py b/train_and_deploy/steps/inference/inference_predict.py index f8242b5c5..99077df1e 100644 --- a/train_and_deploy/steps/inference/inference_predict.py +++ b/train_and_deploy/steps/inference/inference_predict.py @@ -20,10 +20,9 @@ import pandas as pd from typing_extensions import Annotated - from zenml import get_step_context, step -from zenml.integrations.bentoml.services.bentoml_deployment import ( - BentoMLDeploymentService, +from zenml.integrations.bentoml.services.bentoml_container_deployment import ( + BentoMLContainerDeploymentService, ) from zenml.logger import get_logger @@ -56,7 +55,7 @@ def inference_predict( model = get_step_context().model # get predictor - predictor_service: Optional[BentoMLDeploymentService] = model.load_artifact( + predictor_service: Optional[BentoMLContainerDeploymentService] = model.load_artifact( "bentomldeployment" ) if predictor_service is not None: diff --git a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py index 7fabfb304..49974018a 100644 --- a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py +++ b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py @@ -53,7 +53,6 @@ def compute_performance_metrics_on_current_data( Returns: Latest version and current version metric values on a test set. """ - ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### X = dataset_tst.drop(columns=["target"]) y = dataset_tst["target"].to_numpy() diff --git a/train_and_deploy/steps/promotion/promote_with_metric_compare.py b/train_and_deploy/steps/promotion/promote_with_metric_compare.py index 48d235945..712d590ee 100644 --- a/train_and_deploy/steps/promotion/promote_with_metric_compare.py +++ b/train_and_deploy/steps/promotion/promote_with_metric_compare.py @@ -49,7 +49,6 @@ def promote_with_metric_compare( latest_metric: Recently trained model metric results. current_metric: Previously promoted model metric results. """ - ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### should_promote = True diff --git a/train_and_deploy/steps/training/model_evaluator.py b/train_and_deploy/steps/training/model_evaluator.py index 7a4656e5c..2e61a1726 100644 --- a/train_and_deploy/steps/training/model_evaluator.py +++ b/train_and_deploy/steps/training/model_evaluator.py @@ -16,7 +16,6 @@ # -import mlflow import pandas as pd from sklearn.base import ClassifierMixin @@ -26,10 +25,10 @@ logger = get_logger(__name__) -experiment_tracker = Client().active_stack.experiment_tracker +#experiment_tracker = Client().active_stack.experiment_tracker -@step(experiment_tracker=experiment_tracker.name) +@step#(experiment_tracker=experiment_tracker.name) def model_evaluator( model: ClassifierMixin, dataset_trn: pd.DataFrame, @@ -88,7 +87,7 @@ def model_evaluator( dataset_tst[target], ) logger.info(f"Test accuracy={tst_acc*100:.2f}%") - mlflow.log_metric("testing_accuracy_score", tst_acc) + #mlflow.log_metric("testing_accuracy_score", tst_acc) messages = [] if trn_acc < min_train_accuracy: diff --git a/train_and_deploy/steps/training/model_trainer.py b/train_and_deploy/steps/training/model_trainer.py index 8d2fdb2d9..f369948ae 100644 --- a/train_and_deploy/steps/training/model_trainer.py +++ b/train_and_deploy/steps/training/model_trainer.py @@ -15,37 +15,28 @@ # limitations under the License. # -import mlflow import pandas as pd from sklearn.base import ClassifierMixin from typing_extensions import Annotated -from zenml import log_model_metadata -from zenml.metadata.metadata_types import Uri - -from zenml import ArtifactConfig, get_step_context, step +from zenml import ArtifactConfig, get_step_context, log_model_metadata, step from zenml.client import Client -from zenml.integrations.mlflow.experiment_trackers import ( - MLFlowExperimentTracker, -) -from zenml.integrations.mlflow.steps.mlflow_registry import ( - mlflow_register_model_step, -) from zenml.logger import get_logger +from zenml.metadata.metadata_types import Uri logger = get_logger(__name__) -experiment_tracker = Client().active_stack.experiment_tracker +#experiment_tracker = Client().active_stack.experiment_tracker -if not experiment_tracker or not isinstance( - experiment_tracker, MLFlowExperimentTracker -): - raise RuntimeError( - "Your active stack needs to contain a MLFlow experiment tracker for " - "this example to work." - ) +#if not experiment_tracker or not isinstance( +# experiment_tracker, MLFlowExperimentTracker +#): + #raise RuntimeError( + # "Your active stack needs to contain a MLFlow experiment tracker for " + # "this example to work." + #) -@step(experiment_tracker=experiment_tracker.name) +@step#(experiment_tracker=experiment_tracker.name) def model_trainer( dataset_trn: pd.DataFrame, model: ClassifierMixin, @@ -84,27 +75,26 @@ def model_trainer( Returns: The trained model artifact. """ - ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### # Initialize the model with the hyperparameters indicated in the step # parameters and train it on the training set. logger.info(f"Training model {model}...") - mlflow.sklearn.autolog() + #mlflow.sklearn.autolog() model.fit( dataset_trn.drop(columns=[target]), dataset_trn[target], ) - log_model_metadata( - metadata={ - "experiment_tracker": { - "experiment_tracker_url": Uri( - experiment_tracker.get_tracking_uri() - ), - "experiment_tracker_run_id": mlflow.last_active_run().info.run_id, - "experiment_tracker_run_name": mlflow.active_run().info.run_name, - "experiment_tracker_experiment_id": mlflow.active_run().info.experiment_id, - }} - ) + #log_model_metadata( + # metadata={ + # "experiment_tracker": { + # "experiment_tracker_url": Uri( + # experiment_tracker.get_tracking_uri() + # ), + # "experiment_tracker_run_id": mlflow.last_active_run().info.run_id, + # "experiment_tracker_run_name": mlflow.active_run().info.run_name, + # "experiment_tracker_experiment_id": mlflow.active_run().info.experiment_id, + # }} + #) return model From 3fe656c675ae9fd9cc310f042415599b51b48496 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 27 Nov 2024 10:32:35 +0100 Subject: [PATCH 02/19] Update GitHub Actions workflow to use new ZenML store environment variables --- .github/workflows/run_train_deploy_pipeline.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run_train_deploy_pipeline.yml b/.github/workflows/run_train_deploy_pipeline.yml index b4884870a..ed4e9b97b 100644 --- a/.github/workflows/run_train_deploy_pipeline.yml +++ b/.github/workflows/run_train_deploy_pipeline.yml @@ -14,15 +14,14 @@ jobs: run-staging-workflow: runs-on: ubuntu-latest env: - ZENML_HOST: ${{ secrets.ZENML_HOST }} - ZENML_API_KEY: ${{ secrets.ZENML_API_KEY }} - ZENML_STAGING_STACK: ef6c474d-b6e7-49a7-a046-0dab39f7969a + ZENML_STORE_URL: ${{ secrets.ZENML_PROJECTS_HOST }} + ZENML_STORE_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }} + ZENML_STAGING_STACK : ef6c474d-b6e7-49a7-a046-0dab39f7969a # Set this to your staging stack ID ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }} ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }} ZENML_DEBUG: true ZENML_ANALYTICS_OPT_IN: false ZENML_LOGGING_VERBOSITY: INFO - ZENML_PROJECT_SECRET_NAME: llm-complete ZENML_DISABLE_CLIENT_SERVER_MISMATCH_WARNING: True steps: From 1adf5a1e5f4e18f6b4a25cfee8fd2b843b8776e8 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 27 Nov 2024 10:44:30 +0100 Subject: [PATCH 03/19] Update train_config.yaml with enhanced model search space and parameters --- train_and_deploy/configs/train_config.yaml | 53 +++++++++++++++++++--- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/train_and_deploy/configs/train_config.yaml b/train_and_deploy/configs/train_config.yaml index 29d74645e..02cc297bc 100644 --- a/train_and_deploy/configs/train_config.yaml +++ b/train_and_deploy/configs/train_config.yaml @@ -59,10 +59,10 @@ model: extra: notify_on_failure: True # pipeline level parameters +# Updated train_config.yaml + parameters: target_env: staging - # This set contains all the model configurations that you want - # to evaluate during hyperparameter tuning stage. model_search_space: random_forest: model_package: sklearn.ensemble @@ -78,15 +78,20 @@ parameters: - 8 - 10 - 12 + - None # Allow unlimited depth min_samples_leaf: range: start: 1 - end: 10 + end: 15 n_estimators: range: start: 50 - end: 500 - step: 25 + end: 1000 + step: 50 + max_features: + - auto + - sqrt + - log2 decision_tree: model_package: sklearn.tree model_class: DecisionTreeClassifier @@ -101,7 +106,43 @@ parameters: - 8 - 10 - 12 + - None min_samples_leaf: range: start: 1 - end: 10 \ No newline at end of file + end: 15 + gradient_boosting: + model_package: sklearn.ensemble + model_class: GradientBoostingClassifier + search_grid: + learning_rate: + - 0.01 + - 0.1 + - 0.2 + n_estimators: + range: + start: 50 + end: 500 + step: 50 + max_depth: + - 3 + - 5 + - 7 + subsample: + - 0.6 + - 0.8 + - 1.0 + svc: + model_package: sklearn.svm + model_class: SVC + search_grid: + C: + - 0.1 + - 1 + - 10 + kernel: + - linear + - rbf + gamma: + - scale + - auto \ No newline at end of file From 698d145e157d9203b25215ced52640a4d6b8ca71 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 27 Nov 2024 10:55:41 +0100 Subject: [PATCH 04/19] Remove redundant model parameters from train_config.yaml --- train_and_deploy/configs/train_config.yaml | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/train_and_deploy/configs/train_config.yaml b/train_and_deploy/configs/train_config.yaml index 02cc297bc..63fc3ac83 100644 --- a/train_and_deploy/configs/train_config.yaml +++ b/train_and_deploy/configs/train_config.yaml @@ -131,18 +131,4 @@ parameters: subsample: - 0.6 - 0.8 - - 1.0 - svc: - model_package: sklearn.svm - model_class: SVC - search_grid: - C: - - 0.1 - - 1 - - 10 - kernel: - - linear - - rbf - gamma: - - scale - - auto \ No newline at end of file + - 1.0 \ No newline at end of file From 943cfcee5207b1dfb6496e5bc35449b87814bbf5 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 27 Nov 2024 11:17:38 +0100 Subject: [PATCH 05/19] Handle KeyError when retrieving current model version number --- .../compute_performance_metrics_on_current_data.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py index 49974018a..2c7b81cb6 100644 --- a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py +++ b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py @@ -60,10 +60,14 @@ def compute_performance_metrics_on_current_data( # Get model version numbers from Model Control Plane latest_version = get_step_context().model - current_version = Model(name=latest_version.name, version=target_env) latest_version_number = latest_version.number - current_version_number = current_version.number + + current_version = Model(name=latest_version.name, version=target_env) + try: + current_version_number = current_version.number + except KeyError: + current_version_number = None if current_version_number is None: current_version_number = -1 From 74baad49980eefab9abec046e8f79a8c117ffe5c Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 27 Nov 2024 11:25:21 +0100 Subject: [PATCH 06/19] Fix KeyError handling when retrieving current model version number --- .../promotion/compute_performance_metrics_on_current_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py index 2c7b81cb6..7b4ae7234 100644 --- a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py +++ b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py @@ -63,8 +63,8 @@ def compute_performance_metrics_on_current_data( latest_version_number = latest_version.number - current_version = Model(name=latest_version.name, version=target_env) try: + current_version = Model(name=latest_version.name, version=target_env) current_version_number = current_version.number except KeyError: current_version_number = None From 692798eb36428a14951ce8ca9c0fab26cf386704 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 27 Nov 2024 11:50:31 +0100 Subject: [PATCH 07/19] Enhance error handling for current model version retrieval --- .../promotion/compute_performance_metrics_on_current_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py index 7b4ae7234..561d3c27a 100644 --- a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py +++ b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py @@ -66,7 +66,7 @@ def compute_performance_metrics_on_current_data( try: current_version = Model(name=latest_version.name, version=target_env) current_version_number = current_version.number - except KeyError: + except (RuntimeError, KeyError): current_version_number = None if current_version_number is None: From 96c11b1cc8d4182084253c40d95df0d3436cdf23 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 27 Nov 2024 12:01:30 +0100 Subject: [PATCH 08/19] Update current model version retrieval to use ModelStages for staging --- .../compute_performance_metrics_on_current_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py index 561d3c27a..7c66b58d4 100644 --- a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py +++ b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py @@ -20,8 +20,8 @@ import pandas as pd from sklearn.metrics import accuracy_score from typing_extensions import Annotated - from zenml import Model, get_step_context, step +from zenml.enums import ModelStages from zenml.logger import get_logger logger = get_logger(__name__) @@ -64,8 +64,7 @@ def compute_performance_metrics_on_current_data( latest_version_number = latest_version.number try: - current_version = Model(name=latest_version.name, version=target_env) - current_version_number = current_version.number + current_version = Model(name=latest_version.name, version=ModelStages.STAGING) except (RuntimeError, KeyError): current_version_number = None @@ -73,6 +72,7 @@ def compute_performance_metrics_on_current_data( current_version_number = -1 metrics = {latest_version_number: 1.0, current_version_number: 0.0} else: + current_version_number = current_version.number # Get predictors predictors = { latest_version_number: latest_version.load_artifact("model"), From 4b61d49fe7fee6f690e46c69716db90c0fabc72e Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 27 Nov 2024 14:19:00 +0100 Subject: [PATCH 09/19] Refactor error handling for current version retrieval in performance metrics computation --- .../promotion/compute_performance_metrics_on_current_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py index 7c66b58d4..76ef8a10a 100644 --- a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py +++ b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py @@ -62,11 +62,11 @@ def compute_performance_metrics_on_current_data( latest_version = get_step_context().model latest_version_number = latest_version.number - + current_version_number = None try: current_version = Model(name=latest_version.name, version=ModelStages.STAGING) except (RuntimeError, KeyError): - current_version_number = None + pass if current_version_number is None: current_version_number = -1 From 5c29e59340b3fcc8bfc6c415880bfd60d9296dd9 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 27 Nov 2024 14:32:25 +0100 Subject: [PATCH 10/19] Refactor exception handling for current version retrieval in performance metrics computation --- .../promotion/compute_performance_metrics_on_current_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py index 76ef8a10a..a502d086e 100644 --- a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py +++ b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py @@ -65,7 +65,7 @@ def compute_performance_metrics_on_current_data( current_version_number = None try: current_version = Model(name=latest_version.name, version=ModelStages.STAGING) - except (RuntimeError, KeyError): + except KeyError: pass if current_version_number is None: From 0b09eb69edc0f0d21ee6161b7ac7d35a017febf7 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 27 Nov 2024 14:37:28 +0100 Subject: [PATCH 11/19] Refactor exception handling in performance metrics computation to catch all exceptions --- .../promotion/compute_performance_metrics_on_current_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py index a502d086e..83f089788 100644 --- a/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py +++ b/train_and_deploy/steps/promotion/compute_performance_metrics_on_current_data.py @@ -65,7 +65,7 @@ def compute_performance_metrics_on_current_data( current_version_number = None try: current_version = Model(name=latest_version.name, version=ModelStages.STAGING) - except KeyError: + except Exception: pass if current_version_number is None: From e7fe0528a93e3122ea68aace8f0b5c2a1d49bf5b Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 27 Nov 2024 14:47:47 +0100 Subject: [PATCH 12/19] Refactor current model version retrieval to use ModelStages and improve error handling --- .../steps/promotion/promote_with_metric_compare.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/train_and_deploy/steps/promotion/promote_with_metric_compare.py b/train_and_deploy/steps/promotion/promote_with_metric_compare.py index 712d590ee..22ac2f3e8 100644 --- a/train_and_deploy/steps/promotion/promote_with_metric_compare.py +++ b/train_and_deploy/steps/promotion/promote_with_metric_compare.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from zenml import Model, get_step_context, step +from zenml.enums import ModelStages from zenml.logger import get_logger logger = get_logger(__name__) @@ -54,10 +55,12 @@ def promote_with_metric_compare( # Get model version numbers from Model Control Plane latest_version = get_step_context().model - current_version = Model(name=latest_version.name, version=target_env) - - current_version_number = current_version.number - + current_version_number = None + try: + current_version = Model(name=latest_version.name, version=ModelStages.STAGING) + current_version_number = current_version.number + except Exception: + pass if current_version_number is None: logger.info("No current model version found - promoting latest") else: From 5fa513b3b5a1ccfd6850bd39f406cf2e1f6fdab1 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Thu, 28 Nov 2024 10:31:38 +0100 Subject: [PATCH 13/19] Add target environment parameter to deployment and inference pipelines --- train_and_deploy/configs/deployer_config.yaml | 3 + .../configs/inference_config.yaml | 2 + train_and_deploy/pipelines/batch_inference.py | 5 +- .../pipelines/local_deployment.py | 9 +- train_and_deploy/service.py | 2 +- .../steps/deployment/deployment_deploy.py | 128 ++++++------------ .../steps/inference/inference_predict.py | 21 ++- 7 files changed, 73 insertions(+), 97 deletions(-) diff --git a/train_and_deploy/configs/deployer_config.yaml b/train_and_deploy/configs/deployer_config.yaml index c24231b2b..cd575ec2b 100644 --- a/train_and_deploy/configs/deployer_config.yaml +++ b/train_and_deploy/configs/deployer_config.yaml @@ -40,3 +40,6 @@ model: extra: notify_on_failure: True + +parameters: + target_env: staging diff --git a/train_and_deploy/configs/inference_config.yaml b/train_and_deploy/configs/inference_config.yaml index d6f1be757..1a46bd016 100644 --- a/train_and_deploy/configs/inference_config.yaml +++ b/train_and_deploy/configs/inference_config.yaml @@ -39,3 +39,5 @@ model: extra: notify_on_failure: True +parameters: + target_env: staging \ No newline at end of file diff --git a/train_and_deploy/pipelines/batch_inference.py b/train_and_deploy/pipelines/batch_inference.py index 3b7f5015d..08d8d0fba 100644 --- a/train_and_deploy/pipelines/batch_inference.py +++ b/train_and_deploy/pipelines/batch_inference.py @@ -33,7 +33,9 @@ @pipeline(on_failure=notify_on_failure) -def gitguarden_batch_inference(): +def gitguarden_batch_inference( + target_env: str, +): """ Model batch inference pipeline. @@ -66,6 +68,7 @@ def gitguarden_batch_inference(): ########## Inference stage ########## inference_predict( dataset_inf=df_inference, + target_env=target_env, after=["drift_quality_gate"], ) diff --git a/train_and_deploy/pipelines/local_deployment.py b/train_and_deploy/pipelines/local_deployment.py index 0f0fffa65..dd3a4f36e 100644 --- a/train_and_deploy/pipelines/local_deployment.py +++ b/train_and_deploy/pipelines/local_deployment.py @@ -21,9 +21,10 @@ @pipeline(on_failure=notify_on_failure, enable_cache=False) -def gitguarden_local_deployment(): - """ - Model deployment pipeline. +def gitguarden_local_deployment( + target_env: str, +): + """Model deployment pipeline. This is a pipeline deploys trained model for future inference. """ @@ -33,7 +34,7 @@ def gitguarden_local_deployment(): ########## Deployment stage ########## # Get the production model artifact bento = bento_builder() - deployment_deploy(bento=bento) + deployment_deploy(bento=bento, target_env=target_env) notify_on_success(after=["deployment_deploy"]) ### YOUR CODE ENDS HERE ### diff --git a/train_and_deploy/service.py b/train_and_deploy/service.py index b43c0d07b..83170eab6 100644 --- a/train_and_deploy/service.py +++ b/train_and_deploy/service.py @@ -11,4 +11,4 @@ @svc.api(input=input_spec, output=NumpyNdarray()) async def predict(input_arr): - return await gitguarden_runner.predict.async_run(input_arr) + return await gitguarden_runner.predict.async_run(input_arr) \ No newline at end of file diff --git a/train_and_deploy/steps/deployment/deployment_deploy.py b/train_and_deploy/steps/deployment/deployment_deploy.py index 5fbe11440..3cd109c9b 100644 --- a/train_and_deploy/steps/deployment/deployment_deploy.py +++ b/train_and_deploy/steps/deployment/deployment_deploy.py @@ -1,100 +1,60 @@ -# Apache Software License 2.0 +# Copyright (c) ZenML GmbH 2022. All Rights Reserved. # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# https://www.apache.org/licenses/LICENSE-2.0 # - +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. from typing import Optional from bentoml._internal.bento import bento -from typing_extensions import Annotated -from zenml import ( - ArtifactConfig, - Model, - get_step_context, - log_artifact_metadata, - step, -) +from zenml import get_step_context, step from zenml.client import Client -from zenml.integrations.bentoml.services.bentoml_container_deployment import ( - BentoMLContainerDeploymentService, +from zenml.integrations.bentoml.services.bentoml_local_deployment import ( + BentoMLLocalDeploymentConfig, + BentoMLLocalDeploymentService, ) -from zenml.integrations.bentoml.services.deployment_type import ( - BentoMLDeploymentType, -) -from zenml.integrations.bentoml.steps import bentoml_model_deployer_step from zenml.logger import get_logger +from zenml.utils import source_utils logger = get_logger(__name__) + @step def deployment_deploy( bento: bento.Bento, -) -> ( - Annotated[ - Optional[BentoMLContainerDeploymentService], - ArtifactConfig(name="bentoml_deployment", is_deployment_artifact=True), - ] -): - """Predictions step. - - This is an example of a predictions step that takes the data in and returns - predicted values. - - This step is parameterized, which allows you to configure the step - independently of the step code, before running it in a pipeline. - In this example, the step can be configured to use different input data. - See the documentation for more information: - - https://docs.zenml.io/user-guide/advanced-guide/configure-steps-pipelines - - Args: - dataset_inf: The inference dataset. - - Returns: - The predictions as pandas series - """ - ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### - if Client().active_stack.orchestrator.flavor == "local": - model = get_step_context().model - - # deploy predictor service - bentoml_deployment = bentoml_model_deployer_step.entrypoint( - model_name=model.name, # Name of the model - port=3009, # Port to be used by the http server - production=True, # Deploy the model in production mode - timeout=1000, - bento=bento, - deployment_type=BentoMLDeploymentType.CONTAINER, - ) - - bentoml_service = Client().get_service(name_id_or_prefix=bentoml_deployment.uuid) - - log_artifact_metadata( - metadata={ - "service_type": "bentoml", - "status": bentoml_service.state, - "prediction_url": bentoml_service.prediction_url, - "health_check_url": bentoml_service.health_check_url, - "model_uri": model.get_artifact(name="model").uri, - "bento_tag" : bentoml_service.config.get("bento_tag"), - "bentoml_model_image": bentoml_service.config.get("image"), - } - ) - else: - logger.warning("Skipping deployment as the orchestrator is not local.") - bentoml_deployment = None - ### YOUR CODE ENDS HERE ### - return bentoml_deployment \ No newline at end of file + target_env: str, +) -> Optional[BentoMLLocalDeploymentService]: + # Deploy a model using the MLflow Model Deployer + zenml_client = Client() + step_context = get_step_context() + pipeline_name = step_context.pipeline.name + step_name = step_context.step_run.name + model_deployer = zenml_client.active_stack.model_deployer + bentoml_deployment_config = BentoMLLocalDeploymentConfig( + model_name=step_context.model.name, + model_version=target_env, + description="An example of deploying a model using the MLflow Model Deployer", + pipeline_name=pipeline_name, + pipeline_step_name=step_name, + model_uri=bento.info.labels.get("model_uri"), + bento_tag=str(bento.tag), + bento_uri=bento.info.labels.get("bento_uri"), + working_dir=source_utils.get_source_root(), + timeout=1500, + ) + service = model_deployer.deploy_model( + config=bentoml_deployment_config, + service_type=BentoMLLocalDeploymentService.SERVICE_TYPE, + ) + logger.info( + f"The deployed service info: {model_deployer.get_model_server_info(service)}" + ) + return service diff --git a/train_and_deploy/steps/inference/inference_predict.py b/train_and_deploy/steps/inference/inference_predict.py index 99077df1e..76bb017f5 100644 --- a/train_and_deploy/steps/inference/inference_predict.py +++ b/train_and_deploy/steps/inference/inference_predict.py @@ -16,13 +16,13 @@ # -from typing import Optional - +from typing import Optional, cast +from zenml.client import Client import pandas as pd from typing_extensions import Annotated from zenml import get_step_context, step -from zenml.integrations.bentoml.services.bentoml_container_deployment import ( - BentoMLContainerDeploymentService, +from zenml.integrations.bentoml.services.bentoml_local_deployment import ( + BentoMLLocalDeploymentService, ) from zenml.logger import get_logger @@ -32,6 +32,7 @@ @step def inference_predict( dataset_inf: pd.DataFrame, + target_env: str, ) -> Annotated[pd.Series, "predictions"]: """Predictions step. @@ -55,12 +56,18 @@ def inference_predict( model = get_step_context().model # get predictor - predictor_service: Optional[BentoMLContainerDeploymentService] = model.load_artifact( - "bentomldeployment" + zenml_client = Client() + model_deployer = zenml_client.active_stack.model_deployer + + # fetch existing services with same pipeline name, step name and model name + existing_services = model_deployer.find_model_server( + model_name=model.name, + model_version=target_env, ) + predictor_service = cast(BentoMLLocalDeploymentService, existing_services[0]) if predictor_service is not None: # run prediction from service - predictions = predictor_service.predict(request=dataset_inf) + predictions = predictor_service.predict(api_endpoint="predict",data=dataset_inf) else: logger.warning( "Predicting from loaded model instead of deployment service " From cde75959c113c9370d7a5f6afbbb66adda216ab9 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Thu, 28 Nov 2024 16:47:09 +0100 Subject: [PATCH 14/19] Add production deployment pipeline and related steps for model deployment --- .../configs/deploy_production.yaml | 45 +++++ train_and_deploy/pipelines/__init__.py | 1 + .../pipelines/deploy_production.py | 40 +++++ train_and_deploy/run.py | 23 +++ train_and_deploy/service.py | 34 +++- train_and_deploy/steps/__init__.py | 2 +- train_and_deploy/steps/deployment/__init__.py | 2 + .../steps/deployment/bento_builder.py | 2 +- .../steps/deployment/deploy_to_k8s.py | 164 ++++++++++++++++++ .../steps/deployment/dockerize_bento.py | 69 ++++++++ .../steps/deployment/k8s_template.yaml | 35 ++++ 11 files changed, 407 insertions(+), 10 deletions(-) create mode 100644 train_and_deploy/configs/deploy_production.yaml create mode 100644 train_and_deploy/pipelines/deploy_production.py create mode 100644 train_and_deploy/steps/deployment/deploy_to_k8s.py create mode 100644 train_and_deploy/steps/deployment/dockerize_bento.py create mode 100644 train_and_deploy/steps/deployment/k8s_template.yaml diff --git a/train_and_deploy/configs/deploy_production.yaml b/train_and_deploy/configs/deploy_production.yaml new file mode 100644 index 000000000..cd575ec2b --- /dev/null +++ b/train_and_deploy/configs/deploy_production.yaml @@ -0,0 +1,45 @@ +# Apache Software License 2.0 +# +# Copyright (c) ZenML GmbH 2024. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# environment configuration +settings: + docker: + python_package_installer: uv + required_integrations: + - aws + - sklearn + - bentoml + + +# configuration of steps +steps: + notify_on_success: + parameters: + notify_on_success: False + +# configuration of the Model Control Plane +model: + name: gitguarden + version: staging + +# pipeline level extra configurations +extra: + notify_on_failure: True + + +parameters: + target_env: staging diff --git a/train_and_deploy/pipelines/__init__.py b/train_and_deploy/pipelines/__init__.py index 6123686c1..7ef1c39e8 100644 --- a/train_and_deploy/pipelines/__init__.py +++ b/train_and_deploy/pipelines/__init__.py @@ -19,3 +19,4 @@ from .batch_inference import gitguarden_batch_inference from .training import gitguarden_training from .local_deployment import gitguarden_local_deployment +from .deploy_production import gitguarden_production_deployment diff --git a/train_and_deploy/pipelines/deploy_production.py b/train_and_deploy/pipelines/deploy_production.py new file mode 100644 index 000000000..4545e7d8f --- /dev/null +++ b/train_and_deploy/pipelines/deploy_production.py @@ -0,0 +1,40 @@ +# Apache Software License 2.0 +# +# Copyright (c) ZenML GmbH 2024. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from steps import dockerize_bento_model, notify_on_failure, notify_on_success, deploy_model_to_k8s + +from zenml import pipeline + + +@pipeline(on_failure=notify_on_failure, enable_cache=False) +def gitguarden_production_deployment( + target_env: str, +): + """Model deployment pipeline. + + This is a pipeline deploys trained model for future inference. + """ + ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### + # Link all the steps together by calling them and passing the output + # of one step as the input of the next step. + ########## Deployment stage ########## + # Get the production model artifact + bento_model_image = dockerize_bento_model(target_env=target_env) + deploy_model_to_k8s(bento_model_image) + + notify_on_success(after=["deploy_model_to_k8s"]) + ### YOUR CODE ENDS HERE ### diff --git a/train_and_deploy/run.py b/train_and_deploy/run.py index 5ecc0bab8..1b938a4d4 100644 --- a/train_and_deploy/run.py +++ b/train_and_deploy/run.py @@ -23,6 +23,7 @@ from pipelines import ( gitguarden_batch_inference, gitguarden_local_deployment, + gitguarden_production_deployment, gitguarden_training, ) from zenml.logger import get_logger @@ -133,6 +134,12 @@ default=False, help="Whether to run the inference pipeline.", ) +@click.option( + "--production", + is_flag=True, + default=False, + help="Whether to run the production pipeline.", +) def main( no_cache: bool = False, no_drop_na: bool = False, @@ -145,6 +152,7 @@ def main( training: bool = True, deployment: bool = False, inference: bool = False, + production: bool = False, ): """Main entry point for the pipeline execution. @@ -166,6 +174,7 @@ def main( thresholds are violated - the pipeline will fail. If `False` thresholds will not affect the pipeline. only_inference: If `True` only inference pipeline will be triggered. + production: If `True` only production pipeline will be triggered. """ # Run a pipeline with the required parameters. This executes # all steps in the pipeline in the correct order using the orchestrator @@ -225,6 +234,20 @@ def main( gitguarden_batch_inference.with_options(**pipeline_args)( **run_args_inference ) + if production: + # Execute Production Pipeline + run_args_production = {} + pipeline_args["config_path"] = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "configs", + "deploy_production.yaml", + ) + pipeline_args["run_name"] = ( + f"gitguarden_production_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" + ) + gitguarden_production_deployment.with_options(**pipeline_args)( + **run_args_production + ) if __name__ == "__main__": diff --git a/train_and_deploy/service.py b/train_and_deploy/service.py index 83170eab6..d34f0cb17 100644 --- a/train_and_deploy/service.py +++ b/train_and_deploy/service.py @@ -1,14 +1,32 @@ +import bentoml +import numpy as np +from bentoml.validators import Shape +from typing_extensions import Annotated -import bentoml -from bentoml.io import NumpyNdarray +@bentoml.service +class GitGuarden: + """ + A simple service using a sklearn model + """ -gitguarden_runner = bentoml.sklearn.get("gitguarden").to_runner() + # Load in the class scope to declare the model as a dependency of the service + iris_model = bentoml.models.get("gitguarden:latest") -svc = bentoml.Service(name="gitguarden_service", runners=[gitguarden_runner]) + def __init__(self): + """ + Initialize the service by loading the model from the model store + """ + import joblib -input_spec = NumpyNdarray(dtype="float", shape=(-1, 30)) + self.model = joblib.load(self.iris_model.path_of("saved_model.pkl")) -@svc.api(input=input_spec, output=NumpyNdarray()) -async def predict(input_arr): - return await gitguarden_runner.predict.async_run(input_arr) \ No newline at end of file + @bentoml.api + def predict( + self, + input_series: Annotated[np.ndarray, Shape((-1, 30))], + ) -> np.ndarray: + """ + Define API with preprocessing and model inference logic + """ + return self.model.predict(input_series) \ No newline at end of file diff --git a/train_and_deploy/steps/__init__.py b/train_and_deploy/steps/__init__.py index 9e546c087..1ed982a21 100644 --- a/train_and_deploy/steps/__init__.py +++ b/train_and_deploy/steps/__init__.py @@ -31,4 +31,4 @@ promote_with_metric_compare, ) from .training import model_evaluator, model_trainer -from .deployment import deployment_deploy, bento_builder +from .deployment import deployment_deploy, bento_builder, dockerize_bento_model, deploy_model_to_k8s diff --git a/train_and_deploy/steps/deployment/__init__.py b/train_and_deploy/steps/deployment/__init__.py index efe4755f4..2b2389eff 100644 --- a/train_and_deploy/steps/deployment/__init__.py +++ b/train_and_deploy/steps/deployment/__init__.py @@ -18,3 +18,5 @@ from .deployment_deploy import deployment_deploy from .bento_builder import bento_builder +from .dockerize_bento import dockerize_bento_model +from .deploy_to_k8s import deploy_model_to_k8s diff --git a/train_and_deploy/steps/deployment/bento_builder.py b/train_and_deploy/steps/deployment/bento_builder.py index 35541bdc8..983e28e1b 100644 --- a/train_and_deploy/steps/deployment/bento_builder.py +++ b/train_and_deploy/steps/deployment/bento_builder.py @@ -63,7 +63,7 @@ def bento_builder() -> ( bento_model = bentoml.sklearn.save_model(model.name, model.load_artifact(name="model")) # Build the BentoML bundle bento = bentos.build( - service="service.py:svc", + service="service.py:GitGuarden", labels={ "zenml_version": zenml_version, "model_name": model.name, diff --git a/train_and_deploy/steps/deployment/deploy_to_k8s.py b/train_and_deploy/steps/deployment/deploy_to_k8s.py new file mode 100644 index 000000000..2bb8b2803 --- /dev/null +++ b/train_and_deploy/steps/deployment/deploy_to_k8s.py @@ -0,0 +1,164 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +from pathlib import Path +from typing import Dict, Optional + +import yaml +from kubernetes import client, config +from kubernetes.client.rest import ApiException +from zenml import get_step_context, step +from zenml.client import Client +from zenml.logger import get_logger + +logger = get_logger(__name__) + +def apply_kubernetes_configuration(k8s_configs: list) -> None: + """Apply Kubernetes configurations using the K8s Python client. + + Args: + k8s_configs: List of Kubernetes configuration dictionaries + """ + # Load Kubernetes configuration + try: + config.load_kube_config() + except: + config.load_incluster_config() # For in-cluster deployment + + # Initialize API clients + k8s_apps_v1 = client.AppsV1Api() + k8s_core_v1 = client.CoreV1Api() + + for k8s_config in k8s_configs: + kind = k8s_config["kind"] + name = k8s_config["metadata"]["name"] + namespace = k8s_config["metadata"].get("namespace", "default") + + try: + if kind == "Deployment": + # Check if deployment exists + try: + k8s_apps_v1.read_namespaced_deployment(name, namespace) + # Update existing deployment + k8s_apps_v1.patch_namespaced_deployment( + name=name, + namespace=namespace, + body=k8s_config + ) + logger.info(f"Updated existing deployment: {name}") + except ApiException as e: + if e.status == 404: + # Create new deployment + k8s_apps_v1.create_namespaced_deployment( + namespace=namespace, + body=k8s_config + ) + logger.info(f"Created new deployment: {name}") + else: + raise e + + elif kind == "Service": + # Check if service exists + try: + k8s_core_v1.read_namespaced_service(name, namespace) + # Update existing service + k8s_core_v1.patch_namespaced_service( + name=name, + namespace=namespace, + body=k8s_config + ) + logger.info(f"Updated existing service: {name}") + except ApiException as e: + if e.status == 404: + # Create new service + k8s_core_v1.create_namespaced_service( + namespace=namespace, + body=k8s_config + ) + logger.info(f"Created new service: {name}") + else: + raise e + + except ApiException as e: + logger.error(f"Error applying {kind} {name}: {e}") + raise e + +@step +def deploy_model_to_k8s( + docker_image_tag: str, + namespace: str = "default" +) -> Dict: + """Deploy a service to Kubernetes with the specified docker image and tag. + + Args: + docker_image: The full docker image name (e.g. "organization/image-name") + docker_image_tag: The tag to use for the docker image + namespace: Kubernetes namespace to deploy to (default: "default") + + Returns: + dict: Dictionary containing deployment information + """ + # Get model name from context + model_name = get_step_context().model.name + + # Read the K8s template + template_path = Path(__file__).parent / "k8s_template.yaml" + with open(template_path, "r") as f: + # Load all documents in the YAML file + k8s_configs = list(yaml.safe_load_all(f)) + + # Update both Service and Deployment configurations + for config in k8s_configs: + # Add namespace + config["metadata"]["namespace"] = namespace + + # Update metadata labels and name + config["metadata"]["labels"]["app"] = model_name + config["metadata"]["name"] = model_name + + if config["kind"] == "Service": + # Update service selector + config["spec"]["selector"]["app"] = model_name + + elif config["kind"] == "Deployment": + # Update deployment selector and template + config["spec"]["selector"]["matchLabels"]["app"] = model_name + config["spec"]["template"]["metadata"]["labels"]["app"] = model_name + + # Update the container image and name + containers = config["spec"]["template"]["spec"]["containers"] + for container in containers: + container["name"] = model_name + container["image"] = docker_image_tag + + # Apply the configurations + try: + apply_kubernetes_configuration(k8s_configs) + deployment_status = "success" + logger.info(f"Successfully deployed model {model_name} with image: {docker_image_tag}") + except Exception as e: + deployment_status = "failed" + logger.error(f"Failed to deploy model {model_name}: {str(e)}") + raise e + + # Return deployment information + deployment_info = { + "model_name": model_name, + "docker_image": docker_image_tag, + "namespace": namespace, + "status": deployment_status, + "service_port": 3000, + "configurations": k8s_configs + } + + return deployment_info \ No newline at end of file diff --git a/train_and_deploy/steps/deployment/dockerize_bento.py b/train_and_deploy/steps/deployment/dockerize_bento.py new file mode 100644 index 000000000..164178ff6 --- /dev/null +++ b/train_and_deploy/steps/deployment/dockerize_bento.py @@ -0,0 +1,69 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +import os +from typing import Optional + +import bentoml +from bentoml import bentos +from bentoml._internal.bento import bento +from typing_extensions import Annotated +from zenml import ArtifactConfig, Model, get_step_context, step +from zenml import __version__ as zenml_version +from zenml.client import Client +from zenml.integrations.bentoml.constants import DEFAULT_BENTO_FILENAME +from zenml.integrations.bentoml.steps import bento_builder_step +from zenml.logger import get_logger +from zenml.utils import source_utils + +logger = get_logger(__name__) + +@step +def dockerize_bento_model( + target_env: str, + ) -> ( + Annotated[ + str, + ArtifactConfig(name="bentoml_model_image"), + ] +): + """dockerize_bento step. + + This step is responsible for dockerizing the BentoML model. + """ + ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### + model = get_step_context().model + version_to_deploy = Model(name=model.name, version=target_env) + bentoml_deployment = version_to_deploy.get_model_artifact(name="bentoml_deployment") + bento_tag = f'{bentoml_deployment.run_metadata["bento_tag_name"]}:{bentoml_deployment.run_metadata["bento_info_version"]}' + + zenml_client = Client() + container_registry = zenml_client.active_stack.container_registry + assert container_registry, "Container registry is not configured." + image_name = f"{container_registry.config.uri}/{bento_tag}" + image_tag = (image_name,) + try: + bentoml.container.build( + bento_tag=bento_tag, + backend="docker", # hardcoding docker since container service only supports docker + image_tag=image_tag, + ) + + except Exception as e: + logger.error(f"Error containerizing the bento: {e}") + raise e + + container_registry.push_image(image_name) + ### YOUR CODE ENDS HERE ### + return image_name + diff --git a/train_and_deploy/steps/deployment/k8s_template.yaml b/train_and_deploy/steps/deployment/k8s_template.yaml new file mode 100644 index 000000000..dd6b918f8 --- /dev/null +++ b/train_and_deploy/steps/deployment/k8s_template.yaml @@ -0,0 +1,35 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app: placeholder + name: placeholder +spec: + ports: + - name: http # Changed from 'predict' to 'http' for clarity + port: 80 # External port exposed by LoadBalancer + targetPort: 3000 # Internal container port + selector: + app: placeholder + type: LoadBalancer +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: placeholder + name: placeholder +spec: + selector: + matchLabels: + app: placeholder + template: + metadata: + labels: + app: placeholder + spec: + containers: + - image: placeholder + name: placeholder + ports: + - containerPort: 3000 \ No newline at end of file From 17c0c838759a50d5eb26cd7f9cbf40766371218a Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Thu, 28 Nov 2024 18:10:15 +0100 Subject: [PATCH 15/19] Rename model references from gitguarden to secret_detection across configurations and pipelines --- train_and_deploy/README.md | 2 +- .../configs/deploy_production.yaml | 2 +- train_and_deploy/configs/deployer_config.yaml | 2 +- .../configs/inference_config.yaml | 2 +- train_and_deploy/configs/train_config.yaml | 8 ++--- train_and_deploy/gitguarden.yaml | 35 +++++++++++++++++++ train_and_deploy/pipelines/__init__.py | 8 ++--- train_and_deploy/pipelines/batch_inference.py | 2 +- .../pipelines/deploy_production.py | 2 +- .../pipelines/local_deployment.py | 2 +- train_and_deploy/pipelines/training.py | 2 +- train_and_deploy/run.py | 24 ++++++------- train_and_deploy/service.py | 2 +- 13 files changed, 64 insertions(+), 29 deletions(-) create mode 100644 train_and_deploy/gitguarden.yaml diff --git a/train_and_deploy/README.md b/train_and_deploy/README.md index 1abe6603a..4eae458e3 100644 --- a/train_and_deploy/README.md +++ b/train_and_deploy/README.md @@ -7,7 +7,7 @@ classification datasets provided by the scikit-learn library. The project was generated from the [E2E Batch ZenML project template](https://github.com/zenml-io/template-e2e-batch) with the following properties: - Project name: ZenML E2E project -- Technical Name: gitguarden +- Technical Name: secret_detection - Version: `0.0.1` - Licensed with apache to ZenML GmbH<> - Deployment environment: `staging` diff --git a/train_and_deploy/configs/deploy_production.yaml b/train_and_deploy/configs/deploy_production.yaml index cd575ec2b..808eb93bc 100644 --- a/train_and_deploy/configs/deploy_production.yaml +++ b/train_and_deploy/configs/deploy_production.yaml @@ -33,7 +33,7 @@ steps: # configuration of the Model Control Plane model: - name: gitguarden + name: secret_detection version: staging # pipeline level extra configurations diff --git a/train_and_deploy/configs/deployer_config.yaml b/train_and_deploy/configs/deployer_config.yaml index cd575ec2b..808eb93bc 100644 --- a/train_and_deploy/configs/deployer_config.yaml +++ b/train_and_deploy/configs/deployer_config.yaml @@ -33,7 +33,7 @@ steps: # configuration of the Model Control Plane model: - name: gitguarden + name: secret_detection version: staging # pipeline level extra configurations diff --git a/train_and_deploy/configs/inference_config.yaml b/train_and_deploy/configs/inference_config.yaml index 1a46bd016..2611d3008 100644 --- a/train_and_deploy/configs/inference_config.yaml +++ b/train_and_deploy/configs/inference_config.yaml @@ -32,7 +32,7 @@ steps: # configuration of the Model Control Plane model: - name: gitguarden + name: secret_detection version: staging # pipeline level extra configurations diff --git a/train_and_deploy/configs/train_config.yaml b/train_and_deploy/configs/train_config.yaml index 63fc3ac83..dfcff1829 100644 --- a/train_and_deploy/configs/train_config.yaml +++ b/train_and_deploy/configs/train_config.yaml @@ -28,19 +28,19 @@ settings: steps: model_trainer: parameters: - name: gitguarden + name: secret_detection promote_with_metric_compare: parameters: - mlflow_model_name: gitguarden + mlflow_model_name: secret_detection notify_on_success: parameters: notify_on_success: False # configuration of the Model Control Plane model: - name: gitguarden + name: secret_detection license: apache - description: gitguarden E2E Batch Use Case + description: secret_detection E2E Batch Use Case audience: All ZenML users use_cases: | The ZenML E2E project project demonstrates how the most important steps of diff --git a/train_and_deploy/gitguarden.yaml b/train_and_deploy/gitguarden.yaml new file mode 100644 index 000000000..c2fb92c00 --- /dev/null +++ b/train_and_deploy/gitguarden.yaml @@ -0,0 +1,35 @@ +components: + artifact_store: + configuration: + path: s3://zenml-dev + flavor: s3 + name: s3 + type: artifact_store + container_registry: + configuration: + uri: 339712793861.dkr.ecr.eu-central-1.amazonaws.com + flavor: aws + name: ecr + type: container_registry + data_validator: + configuration: {} + flavor: evidently + name: evidently_data_validator + type: data_validator + image_builder: + configuration: {} + flavor: local + name: local_builder + type: image_builder + model_deployer: + configuration: {} + flavor: bentoml + name: jayesh_bento + type: model_deployer + orchestrator: + configuration: {} + flavor: local + name: default + type: orchestrator +stack_name: secret_detection +zenml_version: 0.70.0 diff --git a/train_and_deploy/pipelines/__init__.py b/train_and_deploy/pipelines/__init__.py index 7ef1c39e8..cd019ac0f 100644 --- a/train_and_deploy/pipelines/__init__.py +++ b/train_and_deploy/pipelines/__init__.py @@ -16,7 +16,7 @@ # -from .batch_inference import gitguarden_batch_inference -from .training import gitguarden_training -from .local_deployment import gitguarden_local_deployment -from .deploy_production import gitguarden_production_deployment +from .batch_inference import secret_detection_batch_inference +from .training import secret_detection_training +from .local_deployment import secret_detection_local_deployment +from .deploy_production import secret_detection_production_deployment diff --git a/train_and_deploy/pipelines/batch_inference.py b/train_and_deploy/pipelines/batch_inference.py index 08d8d0fba..f891c291b 100644 --- a/train_and_deploy/pipelines/batch_inference.py +++ b/train_and_deploy/pipelines/batch_inference.py @@ -33,7 +33,7 @@ @pipeline(on_failure=notify_on_failure) -def gitguarden_batch_inference( +def secret_detection_batch_inference( target_env: str, ): """ diff --git a/train_and_deploy/pipelines/deploy_production.py b/train_and_deploy/pipelines/deploy_production.py index 4545e7d8f..86cb911ba 100644 --- a/train_and_deploy/pipelines/deploy_production.py +++ b/train_and_deploy/pipelines/deploy_production.py @@ -21,7 +21,7 @@ @pipeline(on_failure=notify_on_failure, enable_cache=False) -def gitguarden_production_deployment( +def secret_detection_production_deployment( target_env: str, ): """Model deployment pipeline. diff --git a/train_and_deploy/pipelines/local_deployment.py b/train_and_deploy/pipelines/local_deployment.py index dd3a4f36e..4fce7bc23 100644 --- a/train_and_deploy/pipelines/local_deployment.py +++ b/train_and_deploy/pipelines/local_deployment.py @@ -21,7 +21,7 @@ @pipeline(on_failure=notify_on_failure, enable_cache=False) -def gitguarden_local_deployment( +def secret_detection_local_deployment( target_env: str, ): """Model deployment pipeline. diff --git a/train_and_deploy/pipelines/training.py b/train_and_deploy/pipelines/training.py index 9b151eb6d..f0c9e9e41 100644 --- a/train_and_deploy/pipelines/training.py +++ b/train_and_deploy/pipelines/training.py @@ -39,7 +39,7 @@ @pipeline(on_failure=notify_on_failure) -def gitguarden_training( +def secret_detection_training( model_search_space: Dict[str, Any], target_env: str, test_size: float = 0.2, diff --git a/train_and_deploy/run.py b/train_and_deploy/run.py index 1b938a4d4..4f1d18dbf 100644 --- a/train_and_deploy/run.py +++ b/train_and_deploy/run.py @@ -21,10 +21,10 @@ import click from pipelines import ( - gitguarden_batch_inference, - gitguarden_local_deployment, - gitguarden_production_deployment, - gitguarden_training, + secret_detection_batch_inference, + secret_detection_local_deployment, + secret_detection_production_deployment, + secret_detection_training, ) from zenml.logger import get_logger @@ -202,9 +202,9 @@ def main( "train_config.yaml", ) pipeline_args["run_name"] = ( - f"gitguarden_training_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" + f"secret_detection_training_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" ) - gitguarden_training.with_options(**pipeline_args)(**run_args_train) + secret_detection_training.with_options(**pipeline_args)(**run_args_train) logger.info("Training pipeline finished successfully!") if deployment: @@ -216,9 +216,9 @@ def main( "deployer_config.yaml", ) pipeline_args["run_name"] = ( - f"gitguarden_local_deployment_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" + f"secret_detection_local_deployment_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" ) - gitguarden_local_deployment.with_options(**pipeline_args)(**run_args_inference) + secret_detection_local_deployment.with_options(**pipeline_args)(**run_args_inference) if inference: # Execute Batch Inference Pipeline @@ -229,9 +229,9 @@ def main( "inference_config.yaml", ) pipeline_args["run_name"] = ( - f"gitguarden_batch_inference_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" + f"secret_detection_batch_inference_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" ) - gitguarden_batch_inference.with_options(**pipeline_args)( + secret_detection_batch_inference.with_options(**pipeline_args)( **run_args_inference ) if production: @@ -243,9 +243,9 @@ def main( "deploy_production.yaml", ) pipeline_args["run_name"] = ( - f"gitguarden_production_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" + f"secret_detection_production_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}" ) - gitguarden_production_deployment.with_options(**pipeline_args)( + secret_detection_production_deployment.with_options(**pipeline_args)( **run_args_production ) diff --git a/train_and_deploy/service.py b/train_and_deploy/service.py index d34f0cb17..6bc59dcf3 100644 --- a/train_and_deploy/service.py +++ b/train_and_deploy/service.py @@ -11,7 +11,7 @@ class GitGuarden: """ # Load in the class scope to declare the model as a dependency of the service - iris_model = bentoml.models.get("gitguarden:latest") + iris_model = bentoml.models.get("secret_detection:latest") def __init__(self): """ From 39fb332c581a472bc95af38db3cc12f0a24001a7 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Thu, 28 Nov 2024 18:27:34 +0100 Subject: [PATCH 16/19] Update ZenML store configuration for staging workflow --- .github/workflows/run_train_deploy_pipeline.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run_train_deploy_pipeline.yml b/.github/workflows/run_train_deploy_pipeline.yml index ed4e9b97b..66baf4b5b 100644 --- a/.github/workflows/run_train_deploy_pipeline.yml +++ b/.github/workflows/run_train_deploy_pipeline.yml @@ -14,9 +14,9 @@ jobs: run-staging-workflow: runs-on: ubuntu-latest env: - ZENML_STORE_URL: ${{ secrets.ZENML_PROJECTS_HOST }} - ZENML_STORE_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }} - ZENML_STAGING_STACK : ef6c474d-b6e7-49a7-a046-0dab39f7969a # Set this to your staging stack ID + ZENML_STORE_URL: ${{ secrets.ZENML_BENTO_PROJECTS_HOST }} + ZENML_STORE_API_KEY: ${{ secrets.ZENML_BENTO_PROJECTS_API_KEY }} + ZENML_STAGING_STACK : 281f82f3-6bdb-4951-bbdd-b85b57b463cc # Set this to your staging stack ID ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }} ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }} ZENML_DEBUG: true From bb6b04b95d4b19685b348b5f58954fb7f1562e48 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Thu, 28 Nov 2024 22:57:39 +0100 Subject: [PATCH 17/19] add socat --- .github/workflows/run_train_deploy_pipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run_train_deploy_pipeline.yml b/.github/workflows/run_train_deploy_pipeline.yml index 66baf4b5b..85577ff31 100644 --- a/.github/workflows/run_train_deploy_pipeline.yml +++ b/.github/workflows/run_train_deploy_pipeline.yml @@ -35,6 +35,7 @@ jobs: - name: Install requirements working-directory: ./train_and_deploy run: | + sudo apt install socat pip3 install -r requirements.txt zenml integration install bentoml skypilot_kubernetes s3 aws evidently --uv -y From 49063ad544306000c961d7af21668b97650376fa Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Thu, 28 Nov 2024 23:54:36 +0100 Subject: [PATCH 18/19] Add RUN.md for training and deployment instructions; update train_config.yaml for Kubernetes settings --- train_and_deploy/RUN.md | 97 ++++++++++++++++++++++ train_and_deploy/configs/train_config.yaml | 3 + 2 files changed, 100 insertions(+) create mode 100644 train_and_deploy/RUN.md diff --git a/train_and_deploy/RUN.md b/train_and_deploy/RUN.md new file mode 100644 index 000000000..6c43eef22 --- /dev/null +++ b/train_and_deploy/RUN.md @@ -0,0 +1,97 @@ +# Train and Deploy ML Project + +This README provides step-by-step instructions for running the training and deployment pipeline using ZenML. + +## Prerequisites + +- Git installed +- Python environment set up +- ZenML installed +- Access to the ZenML project repository + +## Project Setup + +1. Clone the repository and checkout the feature branch: +```bash +git clone git@github.com:zenml-io/zenml-projects.git +git checkout feature/update-train-deploy +``` + +2. Navigate to the project directory: +```bash +cd train_and_deploy +``` + +3. Initialize ZenML in the project: +```bash +zenml init +``` + +## Running the Pipeline + +### Training + +You have two options for running the training pipeline: + +#### Option 1: Automatic via CI +Make any change to the code and push it. This will automatically trigger the CI pipeline that launches training in SkyPilot. + +#### Option 2: Manual Execution +1. First, set up your stack. You can choose between: + - Local stack (uses local orchestrator): + ```bash + zenml stack set LocalGitGuardian + ``` + - Remote stack (uses SkyPilot orchestrator): + ```bash + zenml stack set RemoteGitGuardian + ``` + +2. Run the training pipeline: +```bash +python run --training +``` + +### Model Deployment + +1. After training completes, deploy the model: +```bash +python run --deployment +``` + +Note: At this stage, the deployment is done to the model set as "staging" (configured in `target_env`), and the model is deployed locally using BentoML. + +2. Test the deployed model: +```bash +python run --inference +``` + +### Production Deployment + +If the staging model performs well and you want to proceed with production deployment: + +1. Deploy to Kubernetes: +```bash +python run --production +``` +This pipeline will: +- Build a Docker image from the BentoML service +- Deploy it to Kubernetes + +## Additional Resources + +- [ZenML Projects Tenant Dashboard](https://cloud.zenml.io/organizations/fc992c14-d960-4db7-812e-8f070c99c6f0/tenants/12ec0fd2-ed02-4479-8ff9-ecbfbaae3285) +- [Example GitHub Actions Pipeline](https://github.com/zenml-io/zenml-projects/actions/runs/12075854945/job/33676323427) + +## Pipeline Flow Overview + +1. Training → Creates and trains the model +2. Deployment → Deploys model to staging environment (local BentoML) +3. Inference → Tests the deployed model +4. Production → Deploys to production Kubernetes environment + +## Notes + +- The deployment configurations are controlled by the `target_env` setting in the configs +- Make sure you have the necessary permissions and access rights before running the pipelines +- Monitor the CI/CD pipeline in GitHub Actions when using automatic deployment \ No newline at end of file diff --git a/train_and_deploy/configs/train_config.yaml b/train_and_deploy/configs/train_config.yaml index dfcff1829..249b7bac3 100644 --- a/train_and_deploy/configs/train_config.yaml +++ b/train_and_deploy/configs/train_config.yaml @@ -23,6 +23,9 @@ settings: - sklearn - slack - bentoml + orchestrator.vm_kubernetes: + down: True + idle_minutes_to_autostop: 2 # configuration of steps steps: From c51a6ab041a98fbc28d505b9f03cc1c35fda2613 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Fri, 29 Nov 2024 10:57:49 +0100 Subject: [PATCH 19/19] Add sanitization for model names in Kubernetes deployment --- .../steps/deployment/deploy_to_k8s.py | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/train_and_deploy/steps/deployment/deploy_to_k8s.py b/train_and_deploy/steps/deployment/deploy_to_k8s.py index 2bb8b2803..b479963c8 100644 --- a/train_and_deploy/steps/deployment/deploy_to_k8s.py +++ b/train_and_deploy/steps/deployment/deploy_to_k8s.py @@ -13,7 +13,7 @@ # permissions and limitations under the License. from pathlib import Path from typing import Dict, Optional - +import re import yaml from kubernetes import client, config from kubernetes.client.rest import ApiException @@ -98,26 +98,17 @@ def deploy_model_to_k8s( docker_image_tag: str, namespace: str = "default" ) -> Dict: - """Deploy a service to Kubernetes with the specified docker image and tag. - - Args: - docker_image: The full docker image name (e.g. "organization/image-name") - docker_image_tag: The tag to use for the docker image - namespace: Kubernetes namespace to deploy to (default: "default") - - Returns: - dict: Dictionary containing deployment information - """ - # Get model name from context - model_name = get_step_context().model.name + # Get the raw model name + raw_model_name = get_step_context().model.name + # Sanitize the model name + model_name = sanitize_name(raw_model_name) # Read the K8s template template_path = Path(__file__).parent / "k8s_template.yaml" with open(template_path, "r") as f: - # Load all documents in the YAML file k8s_configs = list(yaml.safe_load_all(f)) - # Update both Service and Deployment configurations + # Update configurations with sanitized names for config in k8s_configs: # Add namespace config["metadata"]["namespace"] = namespace @@ -161,4 +152,15 @@ def deploy_model_to_k8s( "configurations": k8s_configs } - return deployment_info \ No newline at end of file + return deployment_info + + + +def sanitize_name(name: str) -> str: + # Convert to lowercase and replace invalid characters with '-' + sanitized = re.sub(r"[^a-z0-9-]", "-", name.lower()) + # Trim to a maximum length of 63 characters and strip leading/trailing '-' + sanitized = sanitized[:63].strip("-") + # Ensure the name doesn't start or end with '-' + sanitized = sanitized.strip("-") + return sanitized \ No newline at end of file