|
| 1 | +use crate::data::sorted_array::SortedArray; |
| 2 | +use crate::float_trait::Float; |
| 3 | +use crate::types::CowArray1; |
| 4 | + |
| 5 | +use conv::prelude::*; |
| 6 | +use ndarray::{s, Array1, ArrayView1, Zip}; |
| 7 | + |
| 8 | +/// A [`TimeSeries`] component |
| 9 | +#[derive(Clone, Debug)] |
| 10 | +pub struct DataSample<'a, T> |
| 11 | +where |
| 12 | + T: Float, |
| 13 | +{ |
| 14 | + pub sample: CowArray1<'a, T>, |
| 15 | + sorted: Option<SortedArray<T>>, |
| 16 | + min: Option<T>, |
| 17 | + max: Option<T>, |
| 18 | + mean: Option<T>, |
| 19 | + median: Option<T>, |
| 20 | + std: Option<T>, |
| 21 | + std2: Option<T>, |
| 22 | +} |
| 23 | + |
| 24 | +macro_rules! data_sample_getter { |
| 25 | + ($attr: ident, $getter: ident, $func: expr, $method_sorted: ident) => { |
| 26 | + pub fn $getter(&mut self) -> T { |
| 27 | + match self.$attr { |
| 28 | + Some(x) => x, |
| 29 | + None => { |
| 30 | + self.$attr = Some(match self.sorted.as_ref() { |
| 31 | + Some(sorted) => sorted.$method_sorted(), |
| 32 | + None => $func(self), |
| 33 | + }); |
| 34 | + self.$attr.unwrap() |
| 35 | + } |
| 36 | + } |
| 37 | + } |
| 38 | + }; |
| 39 | + ($attr: ident, $getter: ident, $func: expr) => { |
| 40 | + pub fn $getter(&mut self) -> T { |
| 41 | + match self.$attr { |
| 42 | + Some(x) => x, |
| 43 | + None => { |
| 44 | + self.$attr = Some($func(self)); |
| 45 | + self.$attr.unwrap() |
| 46 | + } |
| 47 | + } |
| 48 | + } |
| 49 | + }; |
| 50 | +} |
| 51 | + |
| 52 | +impl<'a, T> DataSample<'a, T> |
| 53 | +where |
| 54 | + T: Float, |
| 55 | +{ |
| 56 | + pub fn new(sample: CowArray1<'a, T>) -> Self { |
| 57 | + Self { |
| 58 | + sample, |
| 59 | + sorted: None, |
| 60 | + min: None, |
| 61 | + max: None, |
| 62 | + mean: None, |
| 63 | + median: None, |
| 64 | + std: None, |
| 65 | + std2: None, |
| 66 | + } |
| 67 | + } |
| 68 | + |
| 69 | + pub fn as_slice(&mut self) -> &[T] { |
| 70 | + if !self.sample.is_standard_layout() { |
| 71 | + let owned: Array1<_> = self.sample.iter().copied().collect::<Vec<_>>().into(); |
| 72 | + self.sample = owned.into(); |
| 73 | + } |
| 74 | + self.sample.as_slice().unwrap() |
| 75 | + } |
| 76 | + |
| 77 | + pub fn get_sorted(&mut self) -> &SortedArray<T> { |
| 78 | + if self.sorted.is_none() { |
| 79 | + self.sorted = Some(self.sample.to_vec().into()); |
| 80 | + } |
| 81 | + self.sorted.as_ref().unwrap() |
| 82 | + } |
| 83 | + |
| 84 | + fn set_min_max(&mut self) { |
| 85 | + let (min, max) = |
| 86 | + self.sample |
| 87 | + .slice(s![1..]) |
| 88 | + .fold((self.sample[0], self.sample[0]), |(min, max), &x| { |
| 89 | + if x > max { |
| 90 | + (min, x) |
| 91 | + } else if x < min { |
| 92 | + (x, max) |
| 93 | + } else { |
| 94 | + (min, max) |
| 95 | + } |
| 96 | + }); |
| 97 | + self.min = Some(min); |
| 98 | + self.max = Some(max); |
| 99 | + } |
| 100 | + |
| 101 | + data_sample_getter!( |
| 102 | + min, |
| 103 | + get_min, |
| 104 | + |ds: &mut DataSample<'a, T>| { |
| 105 | + ds.set_min_max(); |
| 106 | + ds.min.unwrap() |
| 107 | + }, |
| 108 | + minimum |
| 109 | + ); |
| 110 | + data_sample_getter!( |
| 111 | + max, |
| 112 | + get_max, |
| 113 | + |ds: &mut DataSample<'a, T>| { |
| 114 | + ds.set_min_max(); |
| 115 | + ds.max.unwrap() |
| 116 | + }, |
| 117 | + maximum |
| 118 | + ); |
| 119 | + data_sample_getter!(mean, get_mean, |ds: &mut DataSample<'a, T>| { |
| 120 | + ds.sample.mean().expect("time series must be non-empty") |
| 121 | + }); |
| 122 | + data_sample_getter!(median, get_median, |ds: &mut DataSample<'a, T>| { |
| 123 | + ds.get_sorted().median() |
| 124 | + }); |
| 125 | + data_sample_getter!(std, get_std, |ds: &mut DataSample<'a, T>| { |
| 126 | + ds.get_std2().sqrt() |
| 127 | + }); |
| 128 | + data_sample_getter!(std2, get_std2, |ds: &mut DataSample<'a, T>| { |
| 129 | + // Benchmarks show that it is faster than `ndarray::ArrayBase::var(T::one)` |
| 130 | + let mean = ds.get_mean(); |
| 131 | + ds.sample |
| 132 | + .fold(T::zero(), |sum, &x| sum + (x - mean).powi(2)) |
| 133 | + / (ds.sample.len() - 1).value_as::<T>().unwrap() |
| 134 | + }); |
| 135 | + |
| 136 | + pub fn signal_to_noise(&mut self, value: T) -> T { |
| 137 | + if self.get_std().is_zero() { |
| 138 | + T::zero() |
| 139 | + } else { |
| 140 | + (value - self.get_mean()) / self.get_std() |
| 141 | + } |
| 142 | + } |
| 143 | + |
| 144 | + /// Returns true if all values are equal. Always true for zero- or one- length |
| 145 | + pub fn is_all_same(&self) -> bool { |
| 146 | + if self.sample.is_empty() { |
| 147 | + return true; |
| 148 | + } |
| 149 | + if self.max.is_some() && self.max == self.min { |
| 150 | + return true; |
| 151 | + } |
| 152 | + if self.std2 == Some(T::zero()) { |
| 153 | + return true; |
| 154 | + } |
| 155 | + if let Some(sorted) = &self.sorted { |
| 156 | + return sorted[0] == sorted[sorted.len() - 1]; |
| 157 | + } |
| 158 | + let x0 = self.sample[0]; |
| 159 | + // all() returns true for the empty slice, i.e. single-point time series |
| 160 | + Zip::from(self.sample.slice(s![1..])).all(|&x| x == x0) |
| 161 | + } |
| 162 | +} |
| 163 | + |
| 164 | +impl<'a, T> From<SortedArray<T>> for DataSample<'a, T> |
| 165 | +where |
| 166 | + T: Float, |
| 167 | +{ |
| 168 | + fn from(sorted: SortedArray<T>) -> Self { |
| 169 | + let sample = sorted.0.clone().into(); |
| 170 | + Self { |
| 171 | + sample, |
| 172 | + sorted: Some(sorted), |
| 173 | + min: None, |
| 174 | + max: None, |
| 175 | + median: None, |
| 176 | + mean: None, |
| 177 | + std: None, |
| 178 | + std2: None, |
| 179 | + } |
| 180 | + } |
| 181 | +} |
| 182 | + |
| 183 | +impl<'a, T, Slice: ?Sized> From<&'a Slice> for DataSample<'a, T> |
| 184 | +where |
| 185 | + T: Float, |
| 186 | + Slice: AsRef<[T]>, |
| 187 | +{ |
| 188 | + fn from(s: &'a Slice) -> Self { |
| 189 | + ArrayView1::from(s).into() |
| 190 | + } |
| 191 | +} |
| 192 | + |
| 193 | +impl<'a, T> From<Vec<T>> for DataSample<'a, T> |
| 194 | +where |
| 195 | + T: Float, |
| 196 | +{ |
| 197 | + fn from(v: Vec<T>) -> Self { |
| 198 | + Array1::from(v).into() |
| 199 | + } |
| 200 | +} |
| 201 | + |
| 202 | +impl<'a, T> From<ArrayView1<'a, T>> for DataSample<'a, T> |
| 203 | +where |
| 204 | + T: Float, |
| 205 | +{ |
| 206 | + fn from(a: ArrayView1<'a, T>) -> Self { |
| 207 | + Self::new(a.into()) |
| 208 | + } |
| 209 | +} |
| 210 | + |
| 211 | +impl<'a, T> From<Array1<T>> for DataSample<'a, T> |
| 212 | +where |
| 213 | + T: Float, |
| 214 | +{ |
| 215 | + fn from(a: Array1<T>) -> Self { |
| 216 | + Self::new(a.into()) |
| 217 | + } |
| 218 | +} |
| 219 | + |
| 220 | +impl<'a, T> From<CowArray1<'a, T>> for DataSample<'a, T> |
| 221 | +where |
| 222 | + T: Float, |
| 223 | +{ |
| 224 | + fn from(a: CowArray1<'a, T>) -> Self { |
| 225 | + Self::new(a) |
| 226 | + } |
| 227 | +} |
| 228 | + |
| 229 | +#[cfg(test)] |
| 230 | +#[allow(clippy::unreadable_literal)] |
| 231 | +#[allow(clippy::excessive_precision)] |
| 232 | +mod tests { |
| 233 | + use super::*; |
| 234 | + |
| 235 | + use approx::assert_relative_eq; |
| 236 | + |
| 237 | + macro_rules! data_sample_test { |
| 238 | + ($name: ident, $method: ident, $desired: literal, $x: tt $(,)?) => { |
| 239 | + #[test] |
| 240 | + fn $name() { |
| 241 | + let x = $x; |
| 242 | + let desired = $desired; |
| 243 | + |
| 244 | + let mut ds: DataSample<_> = DataSample::from(&x); |
| 245 | + assert_relative_eq!(ds.$method(), desired, epsilon = 1e-6); |
| 246 | + assert_relative_eq!(ds.$method(), desired, epsilon = 1e-6); |
| 247 | + |
| 248 | + let mut ds: DataSample<_> = DataSample::from(&x); |
| 249 | + ds.get_sorted(); |
| 250 | + assert_relative_eq!(ds.$method(), desired, epsilon = 1e-6); |
| 251 | + assert_relative_eq!(ds.$method(), desired, epsilon = 1e-6); |
| 252 | + } |
| 253 | + }; |
| 254 | + } |
| 255 | + |
| 256 | + data_sample_test!( |
| 257 | + data_sample_min, |
| 258 | + get_min, |
| 259 | + -7.79420906, |
| 260 | + [3.92948846, 3.28436964, 6.73375373, -7.79420906, -7.23407407], |
| 261 | + ); |
| 262 | + |
| 263 | + data_sample_test!( |
| 264 | + data_sample_max, |
| 265 | + get_max, |
| 266 | + 6.73375373, |
| 267 | + [3.92948846, 3.28436964, 6.73375373, -7.79420906, -7.23407407], |
| 268 | + ); |
| 269 | + |
| 270 | + data_sample_test!( |
| 271 | + data_sample_mean, |
| 272 | + get_mean, |
| 273 | + -0.21613426, |
| 274 | + [3.92948846, 3.28436964, 6.73375373, -7.79420906, -7.23407407], |
| 275 | + ); |
| 276 | + |
| 277 | + data_sample_test!( |
| 278 | + data_sample_median_odd, |
| 279 | + get_median, |
| 280 | + 3.28436964, |
| 281 | + [3.92948846, 3.28436964, 6.73375373, -7.79420906, -7.23407407], |
| 282 | + ); |
| 283 | + |
| 284 | + data_sample_test!( |
| 285 | + data_sample_median_even, |
| 286 | + get_median, |
| 287 | + 5.655794743124782, |
| 288 | + [9.47981408, 3.86815751, 9.90299294, -2.986894, 7.44343197, 1.52751816], |
| 289 | + ); |
| 290 | + |
| 291 | + data_sample_test!( |
| 292 | + data_sample_std, |
| 293 | + get_std, |
| 294 | + 6.7900544035968435, |
| 295 | + [3.92948846, 3.28436964, 6.73375373, -7.79420906, -7.23407407], |
| 296 | + ); |
| 297 | +} |
0 commit comments