@@ -110,6 +110,8 @@ def validate(self, all_frames, report):
110110 self .check_max_allowed_max_date (
111111 max_date , geo_type , signal_type , report )
112112
113+ self .check_na_vals (geo_sig_df , geo_type , signal_type , report )
114+
113115 # Get relevant reference data from API dictionary.
114116 api_df_or_error = all_api_df [(geo_type , signal_type )]
115117
@@ -168,6 +170,43 @@ def validate(self, all_frames, report):
168170 if self .test_mode and kroc == 2 :
169171 break
170172
173+ def check_na_vals (self , geo_sig_df , geo_type , signal_type , report ):
174+ """Check if there are any NA values.
175+
176+ In particular, make sure that error doesn't occur for new Geo IDs introduced.
177+
178+ Arguments:
179+ - geo_type: str; geo type name (county, msa, hrr, state) as in the CSV name
180+ - signal_type: str; signal name as in the CSV name
181+ - report: ValidationReport; report where results are added
182+
183+ Returns:
184+ - None
185+ """
186+ def replace_first_six (df , start_date ):
187+ x = df .val .isnull ()
188+ # First 6 days have to be null
189+ x .iloc [:6 ] = False
190+ df = df [x ]
191+ return df .time_value [df .time_value >= start_date ]
192+
193+ grouped_df = geo_sig_df .groupby ('geo_id' )
194+ error_df = grouped_df .apply (replace_first_six ,
195+ start_date = self .params .time_window .start_date )
196+
197+ if not error_df .empty :
198+ for index , value in error_df .iteritems ():
199+ report .add_raised_error (
200+ ValidationFailure ("check_val_missing" ,
201+ geo_type = geo_type ,
202+ signal = signal_type ,
203+ date = value ,
204+ message = f"geo_id { index [0 ]} "
205+ )
206+ )
207+
208+ report .increment_total_checks ()
209+
171210 def check_min_allowed_max_date (self , max_date , geo_type , signal_type , report ):
172211 """Check if time since data was generated is reasonable or too long ago.
173212
0 commit comments