Skip to content

Replacing Feast with Featureform #16

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Translating Feats to featureform

Key Differences:
[] docker/setup/apply.py -> Can instead be substituted by the `featureform apply` command
[] docker/setup/materialize.py -> Not needed; Featureformt akes care of the materialization upon apply & scheduling included
[] feature_store/data_fetcher.py -> Serving features for training & inference from the ServingClient
[] create.py + features.py -> Can be combined to create the definitions.py

Specifically, because Featureform leverages Kubernetes for orchestration, Cloud Functions and Cloud Scheduler aren't needed.
Featureform also doesn't need to store the definitions in a 3rd backing store like GCS Buckets.
107 changes: 107 additions & 0 deletions docker/setup/FF__setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Auth
gcloud auth activate-service-account $SERVICE_ACCOUNT_EMAIL \
--key-file=$GOOGLE_APPLICATION_CREDENTIALS \
--project=$PROJECT_ID

# Setup GCP Project Name
echo project_id = $PROJECT_ID > ~/.bigqueryrc

# Enable APIs
echo "\nEnabling GCP APIs"
gcloud services enable artifactregistry.googleapis.com
gcloud services enable ml.googleapis.com
gcloud services enable aiplatform.googleapis.com
gcloud services enable bigquery.googleapis.com

# TODO: Check if this is correct
# https://docs.github.com/en/actions/deployment/deploying-to-your-cloud-provider/deploying-to-google-kubernetes-engine
gcloud services enable containerregistry.googleapis.com
gcloud services enable container.googleapis.com


# Create Cloud Storage Bucket
echo "\nCreating cloud storage bucket"
gsutil ls -b gs://$BUCKET_NAME || gsutil mb gs://$BUCKET_NAME

# Create BigQuery Dataset
echo "\nCreating biqquery dataset"
bq --location=us mk --dataset $PROJECT_ID:gcp_feast_demo

# TODO: Make sure this is the correct way to create a Kubernetes cluster
# TODO: What additional setup & info do I need to pass?
## TODO: How do I get featureform into the cluster (helm chart, etc)?
echo "\nCreating Kubernetes cluster"
gcloud container clusters create $GKE_CLUSTER --project=$GKE_PROJECT --zone=$GKE_ZONE



# TODO: Replace with Featureform specific scripts
# Create & Apply the Feature Store
echo "\nCreating Feature Store"
python setup/FF_apply.py

## Create Artifact Registry
echo "\nCreating GCP Artifact Repository for Custom Triton Serving Container"
ARTIFACT_REPOSITORY_NAME=nvidia-triton

gcloud artifacts repositories create $ARTIFACT_REPOSITORY_NAME \
--repository-format=docker \
--location=$GCP_REGION \
--description="NVIDIA Triton Docker repository"

# Setup Vertex AI and Triton
echo "\nUploading Triton Models to Cloud Storage"
CONTAINER_IMAGE_URI=$GCP_REGION-docker.pkg.dev/$PROJECT_ID/$ARTIFACT_REPOSITORY_NAME/vertex-triton-inference
NGC_TRITON_IMAGE_URI=ghcr.io/redisventures/tritonserver-python-fil:22.11-py3
MODEL_STORAGE_URI=gs://$BUCKET_NAME/models

## Upload Triton Model Repository Contents
gsutil -m cp -r ./triton/models gs://$BUCKET_NAME/
gsutil rm $MODEL_STORAGE_URI/ensemble/1/.gitkeep

# Pull and Upload Triton Image
echo "\nPulling Triton Docker Image"
docker pull $NGC_TRITON_IMAGE_URI
docker tag $NGC_TRITON_IMAGE_URI $CONTAINER_IMAGE_URI

echo "\nPushing Triton Docker Image to GCP"
gcloud auth configure-docker $GCP_REGION-docker.pkg.dev --quiet
docker push $CONTAINER_IMAGE_URI

# Create Vertex AI Model
echo "\nCreating Vertex AI Model"
ENDPOINT_NAME=vaccine-predictor-endpoint
DEPLOYED_MODEL_NAME=vaccine-predictor

gcloud ai models upload \
--region=$GCP_REGION \
--display-name=$DEPLOYED_MODEL_NAME \
--container-image-uri=$CONTAINER_IMAGE_URI \
--artifact-uri=$MODEL_STORAGE_URI \
--container-env-vars="REDIS_CONNECTION_STRING=$REDIS_CONNECTION_STRING","REDIS_PASSWORD=$REDIS_PASSWORD","PROJECT_ID=$PROJECT_ID","GCP_REGION=$GCP_REGION","BUCKET_NAME=$BUCKET_NAME"

# Create Endpoint
echo "\nCreating Vertex AI Endpoint"
gcloud ai endpoints create \
--region=$GCP_REGION \
--display-name=$ENDPOINT_NAME

## Lookup Endpoint and Model IDs
echo "\nDeploying Model to Endpoint"
ENDPOINT_ID=$(gcloud ai endpoints list \
--region=$GCP_REGION \
--filter=display_name=$ENDPOINT_NAME \
--format="value(name)")

MODEL_ID=$(gcloud ai models list \
--region=$GCP_REGION \
--filter=display_name=$DEPLOYED_MODEL_NAME \
--format="value(name)")

# Deploy Model to the Endpoint on Vertex
gcloud ai endpoints deploy-model $ENDPOINT_ID \
--region=$GCP_REGION \
--model=$MODEL_ID \
--display-name=$DEPLOYED_MODEL_NAME \
--machine-type=n1-standard-2 \
--service-account=$SERVICE_ACCOUNT_EMAIL
39 changes: 39 additions & 0 deletions docker/setup/FF_apply.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Import the configs for featureform
import featureform as ff

from feature_store.repo import (
FF_0_config as config,
FF_1_providers as providers,
FF_4_transformations as transformations,
FF_5_registering_sets as reg_sets,
)
from feature_store.utils import logger, storage


if __name__ == "__main__":
# Setup logger
logging = logger.get_logger()

# Connect to featureform host
logging.info("Connecting to GKE cluster with Featureform")
client = ff.ResourceClient(f"{config.FEATUREFORM_HOST}")

# TODO: Register providers (Bigquery & Redis) with Featureform
logging.info("Register BigQuery & Redis as providers with Featureform")
bigquery, redis = providers.register_providers(ff)

# TODO: Register Sets with Featureform
logging.info("Registering entity with Featureform")
# Define an entity for the state.
# You can think of an entity as a primary key used to
state = ff.register_entity("state")

# TODO: Defining & Registering Transformations with Featureform
transformations.register_vaccine_search_trends(bigquery, redis, state)
transformations.register_vaccine_counts(bigquery, redis, state)

logging.info("Registering training & serving sets with Featureform")
reg_sets.register_sets(ff)

# TODO: Apply
client.apply()
47 changes: 47 additions & 0 deletions docker/setup/FF_create.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from feast import RepoConfig
from google.cloud import bigquery
from feature_store.utils import logger, storage
from feature_store.repo import config, features


if __name__ == "__main__":
# Setup logger
logging = logger.get_logger()

# Create a feature store repo config
logging.info("Creating Feast repo configuration")
repo_config = RepoConfig(
project=config.FEAST_PROJECT,
# Cloud Storage Blob for the Registry
registry=f"gs://{config.BUCKET_NAME}/data/registry.db",
# Google Cloud Project -- GCP
provider="gcp",
# Redis Enterprise as the Online Store
online_store={
"type": "redis",
"connection_string": f"{config.REDIS_CONNECTION_STRING},password={config.REDIS_PASSWORD}",
},
entity_key_serialization_version=2,
)

# Host the config in cloud storage
logging.info("Uploading repo config to cloud storage bucket")
storage.upload_pkl(repo_config, config.BUCKET_NAME, config.REPO_CONFIG)

# Generate initial features data in offline store
logging.info("Generating initial vaccine features in GCP")
client = bigquery.Client()

features.generate_vaccine_counts(
logging,
client,
f"{config.PROJECT_ID}.{config.BIGQUERY_DATASET_NAME}.{config.WEEKLY_VACCINATIONS_TABLE}",
)

features.generate_vaccine_search_trends(
logging,
client,
f"{config.PROJECT_ID}.{config.BIGQUERY_DATASET_NAME}.{config.VACCINE_SEARCH_TRENDS_TABLE}",
)

logging.info("Done")
19 changes: 19 additions & 0 deletions docker/setup/FF_teardown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from feature_store.repo import config
from feature_store.utils import logger, storage


if __name__ == "__main__":
# Setup logging
logging = logger.get_logger()

# Create FeatureStore
logging.info("Fetching feature store")
store = storage.get_feature_store(
config_path=config.REPO_CONFIG, bucket_name=config.BUCKET_NAME
)

# Teardown
logging.info("Tearing down feature store")
store.teardown()

logging.info("Done")
52 changes: 52 additions & 0 deletions docker/setup/FF_teardown.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Auth
gcloud auth activate-service-account $SERVICE_ACCOUNT_EMAIL \
--key-file=$GOOGLE_APPLICATION_CREDENTIALS \
--project=$PROJECT_ID

# Cleanup BigQuery
bq rm -t -f "gcp_feast_demo.vaccine_search_trends"
bq rm -t -f "gcp_feast_demo.us_weekly_vaccinations"
bq rm -r -f -d "gcp_feast_demo"


# TODO: Need to also teardown Kubernetes cluster

# Teardown Vertex AI Stuff
ENDPOINT_NAME=vaccine-predictor-endpoint
DEPLOYED_MODEL_NAME=vaccine-predictor
ARTIFACT_REPOSITORY_NAME=nvidia-triton

ENDPOINT_ID=$(gcloud ai endpoints list \
--region=$GCP_REGION \
--filter=display_name=$ENDPOINT_NAME \
--format="value(name)")

DEPLOYED_MODEL_ID=$(gcloud ai endpoints describe $ENDPOINT_ID \
--region=$GCP_REGION \
--format="value(deployedModels.id)")

gcloud ai endpoints undeploy-model $ENDPOINT_ID \
--region=$GCP_REGION \
--deployed-model-id=$DEPLOYED_MODEL_ID

gcloud ai endpoints delete $ENDPOINT_ID \
--region=$GCP_REGION \
--quiet

MODEL_ID=$(gcloud ai models list \
--region=$GCP_REGION \
--filter=display_name=$DEPLOYED_MODEL_NAME \
--format="value(name)")

gcloud ai models delete $MODEL_ID \
--region=$GCP_REGION \
--quiet

gcloud artifacts repositories delete $ARTIFACT_REPOSITORY_NAME \
--location=$GCP_REGION \
--quiet

# TODO: Replace with Featureform specific scripts
# Teardown Feast
echo "Tearing down Featureform infrastructure"
python setup/teardown.py
31 changes: 14 additions & 17 deletions docker/triton/models/fetch-vaccine-features/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,11 @@
# and converting Triton input/output types to numpy types.
import triton_python_backend_utils as pb_utils
from feature_store.repo import config
from feature_store.utils import (
DataFetcher,
logger,
storage
)
from feature_store.utils import DataFetcher, logger, storage

logging = logger.get_logger()



class TritonPythonModel:
"""Your Python model must use the same class name. Every Python model
that is created must have "TritonPythonModel" as the class name.
Expand All @@ -42,20 +37,21 @@ def initialize(self, args):
"""

# You must parse model_config. JSON string is not parsed here
self.model_config = model_config = json.loads(args['model_config'])
self.model_config = model_config = json.loads(args["model_config"])

# Get OUTPUT0 configuration
output0_config = pb_utils.get_output_config_by_name(
model_config, "feature_values")
model_config, "feature_values"
)

# Convert Triton types to numpy types
self.output0_dtype = pb_utils.triton_string_to_numpy(
output0_config['data_type'])
output0_config["data_type"]
)

logging.info("Loading feature store")
self.fs = storage.get_feature_store(
config_path=config.REPO_CONFIG,
bucket_name=config.BUCKET_NAME
config_path=config.REPO_CONFIG, bucket_name=config.BUCKET_NAME
)
logging.info("Loading feature store")
self.data_fetcher = DataFetcher(self.fs)
Expand Down Expand Up @@ -95,16 +91,17 @@ def execute(self, requests):
logging.info(state)

# Fetch feature data from Feast db
feature_vector = self.data_fetcher.get_online_data(state=state[0].decode('utf-8'))
feature_vector = self.data_fetcher.get_online_data(
state=state[0].decode("utf-8")
)
feature_out = feature_vector.to_numpy().reshape(-1, 8)
logging.info(feature_vector)

# Create InferenceResponse
inference_response = pb_utils.InferenceResponse(
output_tensors=[pb_utils.Tensor(
"feature_values",
feature_out.astype(output0_dtype)
)]
output_tensors=[
pb_utils.Tensor("feature_values", feature_out.astype(output0_dtype))
]
)
responses.append(inference_response)

Expand All @@ -116,4 +113,4 @@ def finalize(self):
Implementing `finalize` function is OPTIONAL. This function allows
the model to perform any necessary clean ups before exit.
"""
logging.info('Cleaning up...')
logging.info("Cleaning up...")
27 changes: 27 additions & 0 deletions feature_store/repo/FF_0_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import os

PROJECT_ID = os.environ["PROJECT_ID"]
GOOGLE_APPLICATION_CREDENTIALS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")

REDIS_CONNECTION_STRING = os.getenv("REDIS_CONNECTION_STRING", "localhost:6379")
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "")

BUCKET_NAME = os.getenv("BUCKET_NAME", "gcp-feast-demo")
GCP_REGION = os.getenv("GCP_REGION", "us-east1")

BIGQUERY_DATASET_NAME = "gcp_feast_demo"

MODEL_NAME = "predict-vaccine-counts"
MODEL_FILENAME = "xgboost.json"

VACCINE_SEARCH_TRENDS_TABLE = "vaccine_search_trends"
WEEKLY_VACCINATIONS_TABLE = "us_weekly_vaccinations"
DAILY_VACCINATIONS_CSV_URL = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/us_state_vaccinations.csv"

# Featureform Configuration
FEATUREFORM_HOST = os.getenv("FEATUREFORM_HOST")

REDIS_TEAM = os.getenv("REDIS_TEAM")
REDIS_HOST = os.getenv("REDIS_HOST")
REDIS_PORT = os.getenv("REDIS_PORT")
REDIS_DB = os.getenv("REDIS_DB")
Loading