redis-applied-ai · MMBazel · May 2, 2023 · May 8, 2023
diff --git a/changelog.txt b/changelog.txt
@@ -0,0 +1,10 @@
+# Translating Feats to featureform
+
+Key Differences:
+[] docker/setup/apply.py -> Can instead be substituted by the `featureform apply` command
+[] docker/setup/materialize.py -> Not needed; Featureformt akes care of the materialization upon apply & scheduling included
+[] feature_store/data_fetcher.py -> Serving features for training & inference from the ServingClient
+[] create.py + features.py -> Can be combined to create the definitions.py
+
+Specifically, because Featureform leverages Kubernetes for orchestration, Cloud Functions and Cloud Scheduler aren't needed. 
+Featureform also doesn't need to store the definitions in a 3rd backing store like GCS Buckets. 
diff --git a/docker/setup/FF__setup.sh b/docker/setup/FF__setup.sh
@@ -0,0 +1,107 @@
+# Auth
+gcloud auth activate-service-account $SERVICE_ACCOUNT_EMAIL \
+    --key-file=$GOOGLE_APPLICATION_CREDENTIALS \
+    --project=$PROJECT_ID
+
+# Setup GCP Project Name
+echo project_id = $PROJECT_ID > ~/.bigqueryrc
+
+# Enable APIs
+echo "\nEnabling GCP APIs"
+gcloud services enable artifactregistry.googleapis.com
+gcloud services enable ml.googleapis.com
+gcloud services enable aiplatform.googleapis.com
+gcloud services enable bigquery.googleapis.com
+
+# TODO: Check if this is correct
+# https://docs.github.com/en/actions/deployment/deploying-to-your-cloud-provider/deploying-to-google-kubernetes-engine
+gcloud services enable containerregistry.googleapis.com 
+gcloud services enable container.googleapis.com
+
+
+# Create Cloud Storage Bucket
+echo "\nCreating cloud storage bucket"
+gsutil ls -b gs://$BUCKET_NAME || gsutil mb gs://$BUCKET_NAME
+
+# Create BigQuery Dataset
+echo "\nCreating biqquery dataset"
+bq --location=us mk --dataset $PROJECT_ID:gcp_feast_demo
+
+# TODO: Make sure this is the correct way to create a Kubernetes cluster
+# TODO: What additional setup & info do I need to pass? 
+## TODO: How do I get featureform into the cluster (helm chart, etc)? 
+echo "\nCreating Kubernetes cluster"
+gcloud container clusters create $GKE_CLUSTER --project=$GKE_PROJECT --zone=$GKE_ZONE
+
+
+
+# TODO: Replace with Featureform specific scripts
+# Create & Apply the Feature Store
+echo "\nCreating Feature Store"
+python setup/FF_apply.py
+
+## Create Artifact Registry
+echo "\nCreating GCP Artifact Repository for Custom Triton Serving Container"
+ARTIFACT_REPOSITORY_NAME=nvidia-triton
+
+gcloud artifacts repositories create $ARTIFACT_REPOSITORY_NAME \
+  --repository-format=docker \
+  --location=$GCP_REGION \
+  --description="NVIDIA Triton Docker repository"
+
+# Setup Vertex AI and Triton
+echo "\nUploading Triton Models to Cloud Storage"
+CONTAINER_IMAGE_URI=$GCP_REGION-docker.pkg.dev/$PROJECT_ID/$ARTIFACT_REPOSITORY_NAME/vertex-triton-inference
+NGC_TRITON_IMAGE_URI=ghcr.io/redisventures/tritonserver-python-fil:22.11-py3
+MODEL_STORAGE_URI=gs://$BUCKET_NAME/models
+
+## Upload Triton Model Repository Contents
+gsutil -m cp -r ./triton/models gs://$BUCKET_NAME/
+gsutil rm $MODEL_STORAGE_URI/ensemble/1/.gitkeep
+
+# Pull and Upload Triton Image
+echo "\nPulling Triton Docker Image"
+docker pull $NGC_TRITON_IMAGE_URI
+docker tag $NGC_TRITON_IMAGE_URI $CONTAINER_IMAGE_URI
+
+echo "\nPushing Triton Docker Image to GCP"
+gcloud auth configure-docker $GCP_REGION-docker.pkg.dev --quiet
+docker push $CONTAINER_IMAGE_URI
+
+# Create Vertex AI Model
+echo "\nCreating Vertex AI Model"
+ENDPOINT_NAME=vaccine-predictor-endpoint
+DEPLOYED_MODEL_NAME=vaccine-predictor
+
+gcloud ai models upload \
+  --region=$GCP_REGION \
+  --display-name=$DEPLOYED_MODEL_NAME \
+  --container-image-uri=$CONTAINER_IMAGE_URI \
+  --artifact-uri=$MODEL_STORAGE_URI \
+  --container-env-vars="REDIS_CONNECTION_STRING=$REDIS_CONNECTION_STRING","REDIS_PASSWORD=$REDIS_PASSWORD","PROJECT_ID=$PROJECT_ID","GCP_REGION=$GCP_REGION","BUCKET_NAME=$BUCKET_NAME"
+
+# Create Endpoint
+echo "\nCreating Vertex AI Endpoint"
+gcloud ai endpoints create \
+  --region=$GCP_REGION \
+  --display-name=$ENDPOINT_NAME
+
+## Lookup Endpoint and Model IDs
+echo "\nDeploying Model to Endpoint"
+ENDPOINT_ID=$(gcloud ai endpoints list \
+  --region=$GCP_REGION \
+  --filter=display_name=$ENDPOINT_NAME \
+  --format="value(name)")
+
+MODEL_ID=$(gcloud ai models list \
+  --region=$GCP_REGION \
+  --filter=display_name=$DEPLOYED_MODEL_NAME \
+  --format="value(name)")
+
+# Deploy Model to the Endpoint on Vertex
+gcloud ai endpoints deploy-model $ENDPOINT_ID \
+  --region=$GCP_REGION \
+  --model=$MODEL_ID \
+  --display-name=$DEPLOYED_MODEL_NAME \
+  --machine-type=n1-standard-2 \
+  --service-account=$SERVICE_ACCOUNT_EMAIL
diff --git a/docker/setup/FF_apply.py b/docker/setup/FF_apply.py
@@ -0,0 +1,39 @@
+# Import the configs for featureform
+import featureform as ff
+
+from feature_store.repo import (
+    FF_0_config as config,
+    FF_1_providers as providers,
+    FF_4_transformations as transformations,
+    FF_5_registering_sets as reg_sets,
+)
+from feature_store.utils import logger, storage
+
+
+if __name__ == "__main__":
+    # Setup logger
+    logging = logger.get_logger()
+
+    # Connect to featureform host
+    logging.info("Connecting to GKE cluster with Featureform")
+    client = ff.ResourceClient(f"{config.FEATUREFORM_HOST}")
+
+    # TODO: Register providers (Bigquery & Redis) with Featureform
+    logging.info("Register BigQuery & Redis as providers with Featureform")
+    bigquery, redis = providers.register_providers(ff)
+
+    # TODO: Register Sets with Featureform
+    logging.info("Registering entity with Featureform")
+    # Define an entity for the state.
+    # You can think of an entity as a primary key used to
+    state = ff.register_entity("state")
+
+    # TODO: Defining & Registering Transformations with Featureform
+    transformations.register_vaccine_search_trends(bigquery, redis, state)
+    transformations.register_vaccine_counts(bigquery, redis, state)
+
+    logging.info("Registering training & serving sets with Featureform")
+    reg_sets.register_sets(ff)
+
+    # TODO: Apply
+    client.apply()
diff --git a/docker/setup/FF_create.py b/docker/setup/FF_create.py
@@ -0,0 +1,47 @@
+from feast import RepoConfig
+from google.cloud import bigquery
+from feature_store.utils import logger, storage
+from feature_store.repo import config, features
+
+
+if __name__ == "__main__":
+    # Setup logger
+    logging = logger.get_logger()
+
+    # Create a feature store repo config
+    logging.info("Creating Feast repo configuration")
+    repo_config = RepoConfig(
+        project=config.FEAST_PROJECT,
+        # Cloud Storage Blob for the Registry
+        registry=f"gs://{config.BUCKET_NAME}/data/registry.db",
+        # Google Cloud Project -- GCP
+        provider="gcp",
+        # Redis Enterprise as the Online Store
+        online_store={
+            "type": "redis",
+            "connection_string": f"{config.REDIS_CONNECTION_STRING},password={config.REDIS_PASSWORD}",
+        },
+        entity_key_serialization_version=2,
+    )
+
+    # Host the config in cloud storage
+    logging.info("Uploading repo config to cloud storage bucket")
+    storage.upload_pkl(repo_config, config.BUCKET_NAME, config.REPO_CONFIG)
+
+    # Generate initial features data in offline store
+    logging.info("Generating initial vaccine features in GCP")
+    client = bigquery.Client()
+
+    features.generate_vaccine_counts(
+        logging,
+        client,
+        f"{config.PROJECT_ID}.{config.BIGQUERY_DATASET_NAME}.{config.WEEKLY_VACCINATIONS_TABLE}",
+    )
+
+    features.generate_vaccine_search_trends(
+        logging,
+        client,
+        f"{config.PROJECT_ID}.{config.BIGQUERY_DATASET_NAME}.{config.VACCINE_SEARCH_TRENDS_TABLE}",
+    )
+
+    logging.info("Done")
diff --git a/docker/setup/FF_teardown.py b/docker/setup/FF_teardown.py
@@ -0,0 +1,19 @@
+from feature_store.repo import config
+from feature_store.utils import logger, storage
+
+
+if __name__ == "__main__":
+    # Setup logging
+    logging = logger.get_logger()
+
+    # Create FeatureStore
+    logging.info("Fetching feature store")
+    store = storage.get_feature_store(
+        config_path=config.REPO_CONFIG, bucket_name=config.BUCKET_NAME
+    )
+
+    # Teardown
+    logging.info("Tearing down feature store")
+    store.teardown()
+
+    logging.info("Done")
diff --git a/docker/setup/FF_teardown.sh b/docker/setup/FF_teardown.sh
@@ -0,0 +1,52 @@
+# Auth
+gcloud auth activate-service-account $SERVICE_ACCOUNT_EMAIL \
+    --key-file=$GOOGLE_APPLICATION_CREDENTIALS \
+    --project=$PROJECT_ID
+
+# Cleanup BigQuery
+bq rm -t -f "gcp_feast_demo.vaccine_search_trends"
+bq rm -t -f "gcp_feast_demo.us_weekly_vaccinations"
+bq rm -r -f -d "gcp_feast_demo"
+
+
+# TODO: Need to also teardown Kubernetes cluster
+
+# Teardown Vertex AI Stuff
+ENDPOINT_NAME=vaccine-predictor-endpoint
+DEPLOYED_MODEL_NAME=vaccine-predictor
+ARTIFACT_REPOSITORY_NAME=nvidia-triton
+
+ENDPOINT_ID=$(gcloud ai endpoints list \
+   --region=$GCP_REGION \
+   --filter=display_name=$ENDPOINT_NAME \
+   --format="value(name)")
+
+DEPLOYED_MODEL_ID=$(gcloud ai endpoints describe $ENDPOINT_ID \
+   --region=$GCP_REGION \
+   --format="value(deployedModels.id)")
+
+gcloud ai endpoints undeploy-model $ENDPOINT_ID \
+  --region=$GCP_REGION \
+  --deployed-model-id=$DEPLOYED_MODEL_ID
+
+gcloud ai endpoints delete $ENDPOINT_ID \
+   --region=$GCP_REGION \
+   --quiet
+
+MODEL_ID=$(gcloud ai models list \
+--region=$GCP_REGION \
+--filter=display_name=$DEPLOYED_MODEL_NAME \
+--format="value(name)")
+
+gcloud ai models delete $MODEL_ID \
+   --region=$GCP_REGION \
+   --quiet
+
+gcloud artifacts repositories delete $ARTIFACT_REPOSITORY_NAME \
+  --location=$GCP_REGION \
+  --quiet
+
+# TODO: Replace with Featureform specific scripts
+# Teardown Feast
+echo "Tearing down Featureform infrastructure"
+python setup/teardown.py
diff --git a/docker/triton/models/fetch-vaccine-features/1/model.py b/docker/triton/models/fetch-vaccine-features/1/model.py
@@ -9,16 +9,11 @@
 # and converting Triton input/output types to numpy types.
 import triton_python_backend_utils as pb_utils
 from feature_store.repo import config
-from feature_store.utils import (
-    DataFetcher,
-    logger,
-    storage
-)
+from feature_store.utils import DataFetcher, logger, storage
 
 logging = logger.get_logger()
 
 
-
 class TritonPythonModel:
     """Your Python model must use the same class name. Every Python model
     that is created must have "TritonPythonModel" as the class name.
@@ -42,20 +37,21 @@ def initialize(self, args):
         """
 
         # You must parse model_config. JSON string is not parsed here
-        self.model_config = model_config = json.loads(args['model_config'])
+        self.model_config = model_config = json.loads(args["model_config"])
 
         # Get OUTPUT0 configuration
         output0_config = pb_utils.get_output_config_by_name(
-            model_config, "feature_values")
+            model_config, "feature_values"
+        )
 
         # Convert Triton types to numpy types
         self.output0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config['data_type'])
+            output0_config["data_type"]
+        )
 
         logging.info("Loading feature store")
         self.fs = storage.get_feature_store(
-            config_path=config.REPO_CONFIG,
-            bucket_name=config.BUCKET_NAME
+            config_path=config.REPO_CONFIG, bucket_name=config.BUCKET_NAME
         )
         logging.info("Loading feature store")
         self.data_fetcher = DataFetcher(self.fs)
@@ -95,16 +91,17 @@ def execute(self, requests):
             logging.info(state)
 
             # Fetch feature data from Feast db
-            feature_vector = self.data_fetcher.get_online_data(state=state[0].decode('utf-8'))
+            feature_vector = self.data_fetcher.get_online_data(
+                state=state[0].decode("utf-8")
+            )
             feature_out = feature_vector.to_numpy().reshape(-1, 8)
             logging.info(feature_vector)
 
             # Create InferenceResponse
             inference_response = pb_utils.InferenceResponse(
-                output_tensors=[pb_utils.Tensor(
-                    "feature_values",
-                    feature_out.astype(output0_dtype)
-                )]
+                output_tensors=[
+                    pb_utils.Tensor("feature_values", feature_out.astype(output0_dtype))
+                ]
             )
             responses.append(inference_response)
 
@@ -116,4 +113,4 @@ def finalize(self):
         Implementing `finalize` function is OPTIONAL. This function allows
         the model to perform any necessary clean ups before exit.
         """
-        logging.info('Cleaning up...')
+        logging.info("Cleaning up...")
diff --git a/feature_store/repo/FF_0_config.py b/feature_store/repo/FF_0_config.py
@@ -0,0 +1,27 @@
+import os
+
+PROJECT_ID = os.environ["PROJECT_ID"]
+GOOGLE_APPLICATION_CREDENTIALS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
+
+REDIS_CONNECTION_STRING = os.getenv("REDIS_CONNECTION_STRING", "localhost:6379")
+REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "")
+
+BUCKET_NAME = os.getenv("BUCKET_NAME", "gcp-feast-demo")
+GCP_REGION = os.getenv("GCP_REGION", "us-east1")
+
+BIGQUERY_DATASET_NAME = "gcp_feast_demo"
+
+MODEL_NAME = "predict-vaccine-counts"
+MODEL_FILENAME = "xgboost.json"
+
+VACCINE_SEARCH_TRENDS_TABLE = "vaccine_search_trends"
+WEEKLY_VACCINATIONS_TABLE = "us_weekly_vaccinations"
+DAILY_VACCINATIONS_CSV_URL = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/us_state_vaccinations.csv"
+
+# Featureform Configuration
+FEATUREFORM_HOST = os.getenv("FEATUREFORM_HOST")
+
+REDIS_TEAM = os.getenv("REDIS_TEAM")
+REDIS_HOST = os.getenv("REDIS_HOST")
+REDIS_PORT = os.getenv("REDIS_PORT")
+REDIS_DB = os.getenv("REDIS_DB")