Skip to content

Commit fbf4d6e

Browse files
add AIT model
1 parent c6ba394 commit fbf4d6e

File tree

2 files changed

+220
-0
lines changed

2 files changed

+220
-0
lines changed

pvops/timeseries/models/AIT.py

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
import numpy as np
2+
from numpy.core.fromnumeric import prod
3+
from sklearn.metrics import mean_squared_error, r2_score
4+
5+
6+
class Predictor:
7+
def __init__(self):
8+
super(Predictor, self).__init__()
9+
10+
def apply_additive_polynomial_model(self, model_terms, Xs):
11+
"""Predict energy using a model derived by pvOps.
12+
13+
Parameters
14+
----------
15+
df : dataframe
16+
Data containing columns with the values in
17+
the `prod_col_dict`
18+
19+
model_terms : list of tuples
20+
Contain model coefficients and powers. For example,
21+
22+
[(0.29359785963294494, [1, 0]),
23+
(0.754806343190528, [0, 1]),
24+
(0.396833207207238, [1, 1]),
25+
(-0.0588375219110795, [0, 0])]
26+
27+
prod_col_dict : dict
28+
Dictionary mapping nicknamed parameters to
29+
the named parameters in the dataframe `df`.
30+
31+
Returns
32+
-------
33+
Array of predicted energy values
34+
"""
35+
for idx, (coeff, powers) in enumerate(model_terms):
36+
for i, (x, n) in enumerate(zip(Xs, powers)):
37+
if i == 0:
38+
term = x**n
39+
else:
40+
term *= x**n
41+
if idx == 0:
42+
energy = coeff * term
43+
else:
44+
energy += coeff * term
45+
return energy
46+
47+
def evaluate(self, real, pred,):
48+
logrmse = np.log(np.sqrt(mean_squared_error(real, pred)))
49+
r2 = r2_score(real, pred)
50+
print(f"The fit has an R-squared of {r2} and a log RMSE of {logrmse}")
51+
return logrmse, r2
52+
53+
54+
class Processer:
55+
def __init__(self):
56+
super(Processer, self).__init__()
57+
self._col_scaled_prefix = 'stdscaled_'
58+
59+
def check_data(self, data, prod_col_dict):
60+
self.do_eval = False
61+
if 'energyprod' in prod_col_dict:
62+
if prod_col_dict['energyprod'] in data.columns.tolist():
63+
self.do_eval = True
64+
65+
if not self.do_eval:
66+
print("Because the power production data is not"
67+
" passed, the fit will not be evaluated."
68+
" Predictions will still be rendered.")
69+
70+
def _apply_transform(self, data,
71+
scaler_info):
72+
data -= scaler_info["mean"]
73+
data /= scaler_info["scale"]
74+
return data
75+
76+
def _apply_inverse_transform(self, data,
77+
scaler_info):
78+
data *= scaler_info["scale"]
79+
data += scaler_info["mean"]
80+
return data
81+
82+
def _clean_columns(self, scaler, prod_df, prod_col_dict):
83+
for k, d in scaler.items():
84+
del prod_df[self._col_scaled_prefix + prod_col_dict[k]]
85+
86+
87+
# @dev: The 'AIT' class can be one of many models that inherit the
88+
# @dev: Processor and Predictor templates. When adding new models,
89+
# @dev: use the Processor and Predictor classes to hold general
90+
# @dev: functionality while having model-specific nuances in the
91+
# @dev: classes below. The above classes may be placed in a different
92+
# @dev: if it seems fit.
93+
class AIT(Processer, Predictor):
94+
def __init__(self):
95+
super(AIT, self).__init__()
96+
self._load_params()
97+
98+
def _load_params(self):
99+
self.scaler_highcap = {"irradiance": {"mean": 571.45952959,
100+
"scale": 324.19905495},
101+
"dcsize": {"mean": 14916.2339917,
102+
"scale": 20030.00088265},
103+
"energyprod": {"mean": 7449.15184666,
104+
"scale": 12054.52533771}
105+
}
106+
self.model_terms_highcap = [(0.29359785963294494, [1, 0]),
107+
(0.754806343190528, [0, 1]),
108+
(0.396833207207238, [1, 1]),
109+
(-0.0588375219110795, [0, 0])]
110+
111+
self.scaler_lowcap = {"irradiance": {"mean": 413.53334101,
112+
"scale": 286.11031612},
113+
"dcsize": {"mean": 375.91883522,
114+
"scale": 234.15141671},
115+
"energyprod": {"mean": 119.00787546,
116+
"scale": 119.82927847}
117+
}
118+
self.model_terms_lowcap = [(0.6866363032474436, [1, 0]),
119+
(0.6473846301807609, [0, 1]),
120+
(0.41926724219597955, [1, 1]),
121+
(0.06624491753542901, [0, 0])]
122+
123+
def predict_subset(self, prod_df, scaler, model_terms, prod_col_dict):
124+
prod_df = prod_df.copy()
125+
self.check_data(prod_df, prod_col_dict)
126+
127+
"""1. Standardize the data using same scales"""
128+
for k, d in scaler.items():
129+
data = prod_df[prod_col_dict[k]]
130+
scaled_data = self._apply_transform(data, d)
131+
prod_df[self._col_scaled_prefix + prod_col_dict[k]] = scaled_data
132+
133+
prod_irr = prod_col_dict["irradiance"]
134+
prod_dcsize = prod_col_dict["dcsize"]
135+
136+
irr = prod_df[self._col_scaled_prefix + prod_irr].values
137+
capacity = prod_df[self._col_scaled_prefix + prod_dcsize].values
138+
Xs = [irr, capacity]
139+
140+
"""2. Predict energy"""
141+
predicted_energy = self.apply_additive_polynomial_model(model_terms,
142+
Xs)
143+
"""3. Rescale predictions"""
144+
predicted_rescaled_energy = self._apply_inverse_transform(predicted_energy,
145+
scaler['energyprod'])
146+
147+
"""4. Evaluate"""
148+
if self.do_eval:
149+
self.evaluate(prod_df[prod_col_dict["energyprod"]].values,
150+
predicted_rescaled_energy)
151+
return predicted_rescaled_energy
152+
153+
def predict(self, prod_df, prod_col_dict):
154+
155+
# High-capacity systems
156+
high_cap_mask = prod_df[prod_col_dict['dcsize']] > 1000
157+
predicted = self.predict_subset(prod_df.loc[high_cap_mask, :],
158+
self.scaler_highcap,
159+
self.model_terms_highcap,
160+
prod_col_dict)
161+
prod_df.loc[high_cap_mask, prod_col_dict["baseline"]] = predicted
162+
return prod_df
163+
164+
165+
def AIT_calc(prod_df, prod_col_dict):
166+
"""
167+
Calculates expected energy using measured irradiance
168+
based on IEC calculations
169+
170+
Parameters
171+
172+
----------
173+
prod_df: DataFrame
174+
A data frame corresponding to the production data
175+
176+
prod_col_dict: dict of {str : str}
177+
A dictionary that contains the column names relevant
178+
for the production data
179+
180+
- **irradiance** (*string*), should be assigned to
181+
irradiance column name in prod_df, where data
182+
should be in [W/m^2]
183+
- **dcsize**, (*string*), should be assigned to
184+
preferred column name for site capacity in prod_df
185+
- **energyprod**, (*string*), should be assigned to
186+
the column name holding the power or energy production.
187+
If this is passed, an evaluation will be provided.
188+
- **baseline**, (*string*), should be assigned to
189+
preferred column name to capture the calculations
190+
in prod_df
191+
192+
Example
193+
194+
-------
195+
196+
production_col_dict = {'irradiance': 'irrad_poa_Wm2',
197+
'ambient_temperature': 'temp_amb_C',
198+
'dcsize': 'capacity_DC_kW',
199+
'energyprod': 'energy_generated_kWh',
200+
'baseline': 'predicted'
201+
}
202+
data = AIT_calc(data, production_col_dict)
203+
204+
205+
Returns
206+
207+
-------
208+
DataFrame
209+
A data frame for production data with a new column,
210+
the predicted energy
211+
"""
212+
prod_df = prod_df.copy()
213+
# assigning dictionary items to local variables for cleaner code
214+
model = AIT()
215+
prod_df = model.predict(prod_df, prod_col_dict)
216+
return prod_df

pvops/timeseries/models/linear.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,10 @@ def modeller(prod_col_dict,
452452
in prod_df
453453
- **dcsize**, (*string*), should be assigned to
454454
preferred column name for site capacity in prod_df
455+
- **powerprod**, (*string*), should be assigned to
456+
the column name holding the power or energy production.
457+
This will be used as the output column if Y_parameter
458+
is not passed.
455459
456460
kernel_type : str
457461
Type of kernel type for the statistical model

0 commit comments

Comments
 (0)