Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/continuous-integration-workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
os: [ubuntu-latest]

steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/deploy-pages.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3"

- name: Install dependencies
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,5 @@ SMARTEOLE_WakeSteering_ReadMe.xlsx
SMARTEOLE_WakeSteering_Map.pdf
SMARTEOLE-WFC-open-dataset.zip
examples_artificial_data/03_energy_ratio/heterogeneity_layouts.pdf
examples_smarteole/data/SMARTEOLE-LES-simulation-data/data
*.pkl
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Welcome to the documentation of the NLR FLASC repository!

```{note}
As of FLASC v2.3, FLASC requires `numpy` version 2, following the update in FLORIS v4.3. See the [numpy documentation for details](https://numpy.org/doc/stable/numpy_2_0_migration_guide.html).
As of FLASC v2.5, FLASC requires python v3.10 or greater.
```

FLASC provides a comprehensive toolkit for wind farm analysis, combining SCADA data processing with advanced wake modeling capabilities. The repository is intended as a community-driven toolbox, available on its [GitHub Repository](https://github.com/NatLabRockies/flasc).
Expand Down
4 changes: 2 additions & 2 deletions docs/introduction.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ See {cite:p}`Doekemeijer2022a` and {cite:p}`Bay2022a` for practical examples of

If FLASC played a role in your research, please cite it. This software can be cited as:

FLASC. Version 2.4.2 (2026). Available at https://github.com/NatLabRockies/flasc.
FLASC. Version 2.5 (2026). Available at https://github.com/NatLabRockies/flasc.

For LaTeX users:

@misc{flasc2026,
author = {NLR},
title = {FLASC. Version 2.4.2},
title = {FLASC. Version 2.5},
year = {2026},
publisher = {GitHub},
journal = {GitHub repository},
Expand Down
402 changes: 402 additions & 0 deletions examples_smarteole/12_model_les_wake_loss_validation.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
import glob
import os
import zipfile
from datetime import timedelta as td

import numpy as np
import pandas as pd
import xarray as xr
from floris.utilities import wrap_360
from zenodo_get import download as zn_download

from flasc.data_processing.time_operations import df_resample_by_interpolation


class AspireTimeseriesReader:
"""This class is used to read the output .tar.gz files from Whiffle and export
them in various formats.
"""

def __init__(self, aspire_metmast_filelist=[], aspire_turbine_filelist=[], verbose=False):
"""Initialize the class.

Args:
aspire_filelist (list): List of strings, where each entry of the list defines the
path to one of the ASPIRE nc files.
"""
self.metmast_files = aspire_metmast_filelist
self.turbine_files = aspire_turbine_filelist
self.turbine_datasets = None
self.metmast_datasets = None
self.verbose = verbose

# Private functions
def _read_member_as_xarray(self, fn):
"""Read the contents of one of the files within the .tar.gz file as an xarray DataSet.

Args:
fn (str): Filename refering to a turbine ASPIRE datafile.

Returns:
dataset (xarray.Dataset): Contents of the HDF5 file imported as an xarray Dataset.
"""
# Now read the turbine .nc (HDF5) data file as an xarray and convert to a Pandas DataFrame
dataset = xr.open_dataset(fn).load() # Load it into xarray format
dataset.close() # Close dataset after reading to avoid conflicts

if self.verbose:
print(f"Successfully imported the contents of {os.path.basename(fn)}.")

return dataset

def _concatenate_dataframes_and_remove_startup_time(self, df_list):
"""Typically, ASPIRE simulations are performed one day at a time, including several hours of
simulation start-up. This means that each day runs for about 26 hours, and that means there
is an overlap window of about 2 hours between simulations. The start-up period, usually the
first two hours of the simulation, must be removed so that each datafile contains exactly
24 hours of data. This script removes the startup periods and concatenates the Pandas
DataFrames into a single file.

Args:
df_list (list): List where each entry is a Pandas DataFrame containing the simulation
data for one day from ASPIRE. These datasets typically start about 2 hours before the
actual day of simulation. These 2 hours will be removed so that it perfectly connects
with the simulation data of the previous day.

Returns:
df_out (pd.DataFrame): Pandas DataFrame containing multiple days of simulation data,
with the start-up periods and overlapping measurement times removed.
"""
# Here we stitch dataframes together while removing start-up periods

# The dataset usually starts a couple hours before midnight in the day before the
# actual day of the simulation. That is the start-up period. We must remove that
# period from the dataset. Let's do this for the first dataframe separately.
df = df_list[0].copy()
first_day_change = np.where(np.diff([t.day for t in df["time"]]) != 0)[0][0] + 2
df = df.loc[first_day_change::] # Remove start-up period from first file
df_list[0] = df # Update the dataframe with the start-up measurements removed

# Establish the end time of the first simulation, so we can use that to remove start-up
# periods from the next files
t_end_prev_simulation = df.iloc[-1]["time"]

for ii in range(1, len(df_list)):
# For every file after the first, we can see where the previous simulation ended
# and make sure we remove measurement data of this simulation that happens *before*
# the latest simulation time of the previous file. Namely, that period is considered
# the start-up period for this file and should be removed.
df = df_list[ii].copy()
dt = df["time"].diff().median() # Average duration between timesteps
df = df.loc[
df["time"] > t_end_prev_simulation + 0.5 * dt
] # Only keep timesteps at least 5 minutes past the last measurement from previous

# Update the dataframe with the start-up measurements removed
df_list[ii] = df
t_end_prev_simulation = df.iloc[-1]["time"]

# Collect all the outputs together into a single DataFrame and sort it chronologically
df_out = pd.concat(df_list, axis=0).sort_values(by="time").reset_index(drop=True)
return df_out

def get_turbine_hub_heights(self):
"""Extract the turbine hub heights from the imported turbine data files.

Returns:
hub_heights (np.array): Array with length equal to the number of turbines, containing
the hub height value for each wind turbine in the ASPIRE simulation.
"""
if self.turbine_datasets is None:
raise UserWarning(
"Cannot extract turbine hub heights. Please read the files first using "
"get_turbine_data_as_xarrays()."
)

# Extract hub heights from the first file
hub_heights = np.array(self.turbine_datasets[0]["ztur"], dtype=float)
return hub_heights

def get_turbine_data_as_xarrays(self):
"""Import the turbine HDF5 file from each .tar.gz file and format them as xarray Datasets.

Returns:
turbine_datasets: List of xarray Datasets, one for each tarball file, containing
the turbine simulation output data.
"""
turbine_datasets = []
for fn in self.turbine_files:
# Now read the turbine .nc (HDF5) data file as an xarray
turbine_dataset = self._read_member_as_xarray(fn)
turbine_datasets.append(turbine_dataset)

self.turbine_datasets = turbine_datasets
return turbine_datasets

def get_turbine_data_as_dataframe(self, variables=None):
"""Convert the turbine data that is formatted as xarray Datasets to a single Pandas
DataFrame that is easy to investigate and do analysis with.

Args:
variables (list, optional): List of turbine measurement variables that should be
exported from the simulation file. Defaults to ["ptur", "ufsf", "vfsf"].

Returns:
df_out (pd.DataFrame): Pandas DataFrame containing the turbine measurement data in a
wide table format, where there is one column for each turbine and for each variable.
Each rows depicts the timestamp of one set of measurements. The overlapping time windows
of about 2 hours between each day of ASPIRE simulation has been removed so that the
start-up periods are removed and the dataset is monotonically increasing with 10-minute
timesteps.
"""
# Load the turbine data files, if we haven't done that yet
if self.turbine_datasets is None:
self.get_turbine_data_as_xarrays() # Get all turbine data as xarrays

# Default options:
if variables is None:
if "ufsf" in list(self.turbine_datasets[0].data_vars):
variables = ["ptur", "ufsf", "vfsf"]
print(f"WARNING: Using legacy variable naming convention from GRASP, '{variables}'")
elif "Mdfs" in list(self.turbine_datasets[0].data_vars):
variables = ["ptur", "Mdfs", "cosangle", "sinangle"]
else:
raise UserWarning("Unfamiliar variable naming convention in GRASP datafiles.")

# Convert files one by one to Pandas DataFrames
df_list = []
num_turbines = len(self.turbine_datasets[0].turbine)
for turbine_dataset in self.turbine_datasets:
# Convert the data from each file into a 'wide' Pandas DataFrame
df_turbine = turbine_dataset[variables].to_dataframe().unstack()

# Update column names in wide format following FLASC format
columns = []
for var in variables:
columns = columns + [f"{var:s}_{ti:03d}" for ti in range(num_turbines)]
df_turbine.columns = columns
df_turbine = df_turbine.reset_index()

# Finally, append it to a list that we will stitch together later
df_list.append(df_turbine)

# Concatenate all the individual dataframes and deal with overlap in timeseries entries
df_out = self._concatenate_dataframes_and_remove_startup_time(df_list)
return df_out

def construct_flasc_timeseries(self):
""" """
# First, get the turbine information, if we dont have those in memory yet
self.get_turbine_data_as_dataframe()
df_tot = self.get_turbine_data_as_dataframe()

# Calculate turbine wind speed and wind direction, if variables available
n_turbines = len([c for c in df_tot.columns if c.startswith("ptur_")])
df_tot.columns = [c.replace("ptur_", "pow_") for c in df_tot.columns]

dict_out = {}
for ti in range(n_turbines):
u = df_tot[f"cosangle_{ti:03d}"] * df_tot[f"Mdfs_{ti:03d}"]
v = df_tot[f"sinangle_{ti:03d}"] * df_tot[f"Mdfs_{ti:03d}"]
dict_out[f"ws_{ti:03d}"] = np.sqrt(u**2.0 + v**2.0)
dict_out[f"wd_{ti:03d}"] = wrap_360(180.0 + np.rad2deg(np.arctan2(u, v)))

# Append local wind speed and direction measurements to the dataframe
df_tot = pd.concat([df_tot, pd.DataFrame(dict_out)], axis=1)
df_tot = df_tot.drop(columns=[c for c in df_tot.columns if c.startswith("cosangle_")])
df_tot = df_tot.drop(columns=[c for c in df_tot.columns if c.startswith("sinangle_")])
df_tot = df_tot.drop(columns=[c for c in df_tot.columns if c.startswith("Mdfs_")])

return df_tot


if __name__ == "__main__":
# Download files from Zenodo. Note that this may fail in certain VPN
# environments or with certain SSL certificates.
root_path = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(root_path, "data")
zn_download("10.5281/zenodo.18888663", output_dir=data_path)

# Unzip the LES timeseries data
path_to_zip_file = os.path.join(data_path, "les_output.zip")
with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
zip_ref.extractall(data_path)

aspire_turbine_files = glob.glob(
os.path.join(data_path, "les_output", "2020", "*", "*", "00", "turbinesOut.les.nc")
)
aspire_turbine_files = np.sort(aspire_turbine_files)

# Load them one at a time
atr = AspireTimeseriesReader(aspire_turbine_filelist=aspire_turbine_files, verbose=True)
df_turbine = atr.get_turbine_data_as_dataframe()
df_les_timeseries = atr.construct_flasc_timeseries()

# Resample to 10 min steps
t0 = (
str(df_les_timeseries["time"].iloc[0])[0:17] + "00"
) # Create time array rounded to nearest 10-min averages
t1 = str(df_les_timeseries["time"].iloc[-1])[0:17] + "00"
time_array = pd.date_range(start=t0, end=t1, freq="10min").tolist()
df_les_timeseries = df_resample_by_interpolation(
df=df_les_timeseries,
time_array=time_array, # Interpolate onto the same timeseries that df_metmast uses
circular_cols=[
c for c in df_les_timeseries.columns if c.startswith("wd_")
], # No variables in turbine measurement dataset that require circular averaging
interp_method="linear", # Use linear interpolation
max_gap=td(
minutes=20
), # Do not interpolate over gaps larger than 20 minutes between measurements
verbose=False,
)

# Save as .csv
fout = os.path.join(root_path, "les_timeseries.csv")
df_les_timeseries.to_csv(fout, index=False, float_format="%.3f")
print("Converted LES timeseries data has been saved to: " + fout)
Loading
Loading