-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Fix: Fixing some bugs in example feature repo for spark #5407
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 4 commits
0ce35f4
0b6ed9a
62f7313
c86cf74
145f8d1
0cdd927
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,6 @@ | |
import pandas as pd | ||
|
||
from feast import FeatureStore | ||
from feast.data_source import PushMode | ||
|
||
|
||
def run_demo(): | ||
|
@@ -27,27 +26,6 @@ def run_demo(): | |
print("\n--- Online features retrieved (instead) through a feature service---") | ||
fetch_online_features(store, use_feature_service=True) | ||
|
||
print("\n--- Simulate a stream event ingestion of the hourly stats df ---") | ||
event_df = pd.DataFrame.from_dict( | ||
{ | ||
"driver_id": [1001], | ||
"event_timestamp": [ | ||
datetime(2021, 5, 13, 10, 59, 42), | ||
], | ||
"created": [ | ||
datetime(2021, 5, 13, 10, 59, 42), | ||
], | ||
"conv_rate": [1.0], | ||
"acc_rate": [1.0], | ||
"avg_daily_trips": [1000], | ||
} | ||
) | ||
print(event_df) | ||
store.push("driver_stats_push_source", event_df, to=PushMode.ONLINE) | ||
|
||
print("\n--- Online features again with updated values from a stream push---") | ||
fetch_online_features(store, use_feature_service=True) | ||
|
||
print("\n--- Run feast teardown ---") | ||
subprocess.run(["feast", "teardown"]) | ||
|
||
|
@@ -82,8 +60,6 @@ def fetch_historical_features_entity_df(store: FeatureStore, for_batch_scoring: | |
"driver_hourly_stats:conv_rate", | ||
"driver_hourly_stats:acc_rate", | ||
"driver_hourly_stats:avg_daily_trips", | ||
"transformed_conv_rate:conv_rate_plus_val1", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did you mean to remove these? I think you use them later. |
||
"transformed_conv_rate:conv_rate_plus_val2", | ||
], | ||
).to_df() | ||
print(training_df.head()) | ||
|
@@ -94,23 +70,24 @@ def fetch_online_features(store, use_feature_service: bool): | |
# {join_key: entity_value} | ||
{ | ||
"driver_id": 1001, | ||
"customer_id": 201, | ||
"val_to_add": 1000, | ||
"val_to_add_2": 2000, | ||
}, | ||
{ | ||
"driver_id": 1002, | ||
"customer_id": 202, | ||
"val_to_add": 1001, | ||
"val_to_add_2": 2002, | ||
}, | ||
] | ||
if use_feature_service: | ||
features_to_fetch = store.get_feature_service("driver_activity_v1") | ||
features_to_fetch = store.get_feature_service("driver_activity") | ||
else: | ||
features_to_fetch = [ | ||
"driver_hourly_stats:acc_rate", | ||
"driver_hourly_stats:avg_daily_trips", | ||
"transformed_conv_rate:conv_rate_plus_val1", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh these didn't work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This on-the-fly feature was not declared in the example feature repo for Spark and i have just removed it from this example. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at the code under # Define an on demand feature view which can generate new features based on
# existing feature views and RequestSource features
@on_demand_feature_view(
sources=[driver_stats_fv, input_request],
schema=[
Field(name="conv_rate_plus_val1", dtype=Float64),
Field(name="conv_rate_plus_val2", dtype=Float64),
],
)
def transformed_conv_rate(inputs: pd.DataFrame) -> pd.DataFrame:
df = pd.DataFrame()
df["conv_rate_plus_val1"] = inputs["conv_rate"] + inputs["val_to_add"]
df["conv_rate_plus_val2"] = inputs["conv_rate"] + inputs["val_to_add_2"]
return df There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great, it works. I've restored it. |
||
"transformed_conv_rate:conv_rate_plus_val2", | ||
] | ||
returned_features = store.get_online_features( | ||
features=features_to_fetch, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why remove online feature retrieval?