Skip to content

Commit 8d5c23b

Browse files
authored
Merge pull request #1478 from cmu-delphi/google_symptoms_omicron
New Google symptoms signals - omicron related
2 parents d983305 + a877c81 commit 8d5c23b

File tree

12 files changed

+179
-140
lines changed

12 files changed

+179
-140
lines changed

ansible/templates/google_symptoms-params-prod.json.j2

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,13 @@
3636
"dynamic": {
3737
"ref_window_size": 7,
3838
"smoothed_signals": [
39-
"ageusia_smoothed_search",
40-
"sum_anosmia_ageusia_smoothed_search",
41-
"anosmia_smoothed_search"
39+
"s01_smoothed_search",
40+
"s02_smoothed_search",
41+
"s03_smoothed_search",
42+
"s05_smoothed_search",
43+
"s06_smoothed_search",
44+
"s08_smoothed_search",
45+
"scontrol_smoothed_search"
4246
]
4347
}
4448
},

google_symptoms/delphi_google_symptoms/constants.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,33 @@
11
"""Registry for constants."""
2-
from functools import partial
32
from datetime import timedelta
43

5-
from .smooth import (
6-
identity,
7-
kday_moving_average,
8-
)
4+
from delphi_utils import Smoother
95

106
# global constants
11-
METRICS = ["Anosmia", "Ageusia"]
12-
COMBINED_METRIC = "sum_anosmia_ageusia"
7+
8+
SYMPTOM_SETS = {
9+
"s01": ["Cough", "Phlegm", "Sputum", "Upper respiratory tract infection"],
10+
"s02": ["Nasal congestion", "Post nasal drip", "Rhinorrhea", "Sinusitis",
11+
"Rhinitis", "Common cold"],
12+
"s03": ["Fever", "Hyperthermia", "Chills", "Shivering", "Low grade fever"],
13+
#"s04": ["Fatigue", "Weakness", "Muscle weakness", "Myalgia", "Pain"],
14+
"s05": ["Shortness of breath", "Wheeze", "Croup", "Pneumonia", "Asthma",
15+
"Crackles", "Acute bronchitis", "Bronchitis"],
16+
"s06": ["Anosmia", "Dysgeusia", "Ageusia"],
17+
#"s07": ["Nausea", "Vomiting", "Diarrhea", "Indigestion", "Abdominal pain"],
18+
"s08": ["Laryngitis", "Sore throat", "Throat irritation"],
19+
#"s09": ["Headache", "Migraine", "Cluster headache", "Dizziness", "Lightheadedness"],
20+
#"s10": ["Night sweats","Perspiration", "hyperhidrosis"],
21+
"scontrol": ["Type 2 diabetes", "Urinary tract infection", "Hair loss",
22+
"Candidiasis", "Weight gain"]
23+
}
24+
25+
COMBINED_METRIC = list(SYMPTOM_SETS.keys())
26+
27+
METRICS = list()
28+
for combmetric in COMBINED_METRIC:
29+
METRICS = METRICS + SYMPTOM_SETS[combmetric]
30+
1331
SMOOTHERS = ["raw", "smoothed"]
1432
GEO_RESOLUTIONS = [
1533
"state",
@@ -20,12 +38,15 @@
2038
"nation"
2139
]
2240

23-
seven_day_moving_average = partial(kday_moving_average, k=7)
2441
SMOOTHERS_MAP = {
25-
"raw": (identity, lambda d: d - timedelta(days=7)),
26-
"smoothed": (seven_day_moving_average, lambda d: d),
42+
"raw": (Smoother("identity", impute_method=None),
43+
lambda d: d - timedelta(days=7)),
44+
"smoothed": (Smoother("moving_average", window_length=7,
45+
impute_method='zeros'), lambda d: d)
2746
}
2847

48+
49+
2950
STATE_TO_ABBREV = {'Alabama': 'al',
3051
'Alaska': 'ak',
3152
# 'American Samoa': 'as',

google_symptoms/delphi_google_symptoms/geo.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def generate_transition_matrix(geo_res):
4848
).fillna(0).reset_index().rename({mapping_flag: "geo_id"}, axis = 1)
4949
return map_df
5050

51-
def geo_map(df, geo_res):
51+
def geo_map(df, geo_res, namescols = None):
5252
"""
5353
Compute derived HRR and MSA counts as a weighted sum of the county dataset.
5454
@@ -59,6 +59,11 @@ def geo_map(df, geo_res):
5959
and columns for signal vals
6060
geo_res: str
6161
"msa", "hrr", "hhs" or "nation"
62+
namescols: list of strings
63+
names of columns of df but geo_id and timestamp
64+
when running the pipeline, this will always be METRICS+COMBINED_METRIC
65+
this parameter was added to allow us to run unit tests in subsets of
66+
metrics and combined_metric's
6267
6368
Returns
6469
-------
@@ -67,6 +72,9 @@ def geo_map(df, geo_res):
6772
and columns for signal vals.
6873
The geo_id has been converted from fips to HRRs/MSAs
6974
"""
75+
if namescols is None:
76+
namescols = METRICS + COMBINED_METRIC
77+
7078
if geo_res == "county":
7179
return df
7280

@@ -75,7 +83,7 @@ def geo_map(df, geo_res):
7583
for _date in df["timestamp"].unique():
7684
val_lists = df[df["timestamp"] == _date].merge(
7785
map_df["geo_id"], how="right"
78-
)[METRICS + [COMBINED_METRIC]].fillna(0)
86+
)[namescols].fillna(0)
7987
newdf = pd.DataFrame(
8088
np.matmul(map_df.values[:, 1:].T, val_lists.values),
8189
columns = list(val_lists.keys())

google_symptoms/delphi_google_symptoms/pull.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
import pandas as pd
99

10-
from .constants import DC_FIPS, METRICS, COMBINED_METRIC
10+
from .constants import DC_FIPS, METRICS, COMBINED_METRIC, SYMPTOM_SETS
1111

1212

1313
# Create map of BigQuery symptom column names to desired column names.
@@ -39,18 +39,18 @@ def preprocess(df, level):
3939
Dataframe as described above.
4040
"""
4141
# Constants
42-
KEEP_COLUMNS = ["geo_id", "date"] + METRICS + [COMBINED_METRIC]
42+
KEEP_COLUMNS = ["geo_id", "date"] + METRICS + COMBINED_METRIC
4343

4444
df.rename(colname_map, axis=1, inplace=True)
4545
df["geo_id"] = df["open_covid_region_code"].apply(
4646
lambda x: x.split("-")[-1].lower())
4747

48-
df[COMBINED_METRIC] = 0
49-
for metric in METRICS:
50-
df[COMBINED_METRIC] += df[metric].fillna(0)
51-
df.loc[
52-
(df["Anosmia"].isnull())
53-
& (df["Ageusia"].isnull()), COMBINED_METRIC] = np.nan
48+
for cb_metric in COMBINED_METRIC:
49+
df[cb_metric] = 0
50+
for metric in SYMPTOM_SETS[cb_metric]:
51+
df[cb_metric] += df[metric].fillna(0)
52+
df[cb_metric] = df[cb_metric]/len(SYMPTOM_SETS[cb_metric])
53+
df.loc[df[SYMPTOM_SETS[cb_metric]].isnull().all(axis=1), cb_metric] = np.nan
5454

5555
# Delete rows with missing FIPS
5656
null_mask = (df["geo_id"].isnull())

google_symptoms/delphi_google_symptoms/run.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
)
1818
from delphi_utils.validator.utils import lag_converter
1919

20-
from .constants import (METRICS, COMBINED_METRIC,
20+
from .constants import (COMBINED_METRIC,
2121
GEO_RESOLUTIONS, SMOOTHERS, SMOOTHERS_MAP)
2222
from .geo import geo_map
2323
from .pull import pull_gs_data
@@ -94,15 +94,16 @@ def run_module(params):
9494

9595
if len(df_pull) == 0:
9696
continue
97-
for metric, smoother in product(
98-
METRICS+[COMBINED_METRIC], SMOOTHERS):
97+
for metric, smoother in product(COMBINED_METRIC, SMOOTHERS):
9998
logger.info("generating signal and exporting to CSV",
10099
geo_res=geo_res,
101100
metric=metric,
102101
smoother=smoother)
103-
df = df_pull.set_index(["timestamp", "geo_id"])
104-
df["val"] = df[metric].groupby(level=1
105-
).transform(SMOOTHERS_MAP[smoother][0])
102+
df = df_pull
103+
df["val"] = df[metric].astype(float)
104+
df["val"] = df[["geo_id", "val"]].groupby(
105+
"geo_id")["val"].transform(
106+
SMOOTHERS_MAP[smoother][0].smooth)
106107
df["se"] = np.nan
107108
df["sample_size"] = np.nan
108109
# Drop early entries where data insufficient for smoothing

google_symptoms/delphi_google_symptoms/smooth.py

Lines changed: 0 additions & 38 deletions
This file was deleted.

google_symptoms/params.json.template

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"common": {
33
"export_dir": "./receiving",
44
"log_exceptions": false,
5-
"log_filename": "./google-symptoms.log"
5+
"log_filename": "./google-symptoms.log"
66
},
77
"indicator": {
88
"export_start_date": "2020-02-20",
@@ -26,9 +26,13 @@
2626
"dynamic": {
2727
"ref_window_size": 7,
2828
"smoothed_signals": [
29-
"ageusia_smoothed_search",
30-
"sum_anosmia_ageusia_smoothed_search",
31-
"anosmia_smoothed_search"
29+
"s01_smoothed_search",
30+
"s02_smoothed_search",
31+
"s03_smoothed_search",
32+
"s05_smoothed_search",
33+
"s06_smoothed_search",
34+
"s08_smoothed_search",
35+
"scontrol_smoothed_search"
3236
]
3337
}
3438
},

0 commit comments

Comments
 (0)