@@ -25,7 +25,7 @@ import io.github.mandar2812.dynaml.models.ParameterizedLearner
2525import io .github .mandar2812 .dynaml .models .gp .AbstractGPRegressionModel
2626import io .github .mandar2812 .dynaml .optimization .{CoupledSimulatedAnnealing , GPMLOptimizer , GloballyOptWithGrad , GridSearch }
2727import io .github .mandar2812 .dynaml .pipes .{DataPipe , ReversibleScaler , Scaler , StreamDataPipe }
28- import io .github .mandar2812 .dynaml .utils .{GaussianScaler , MinMaxScaler }
28+ import io .github .mandar2812 .dynaml .utils .{GaussianScaler , MinMaxScaler , MVGaussianScaler }
2929import org .apache .log4j .Logger
3030
3131/**
@@ -340,6 +340,33 @@ object DynaMLPipe {
340340 (scaler(trainTest), (featuresScaler, targetsScaler))
341341 })
342342
343+ /**
344+ * Scale a data set which is stored as a [[Stream ]],
345+ * return the scaled data as well as a [[MVGaussianScaler ]] instance
346+ * which can be used to reverse the scaled values to the original
347+ * data.
348+ * */
349+ val multivariateGaussianScaling =
350+ DataPipe ((trainTest : Stream [(DenseVector [Double ], DenseVector [Double ])]) => {
351+
352+ val (num_features, num_targets) = (trainTest.head._1.length, trainTest.head._2.length)
353+
354+ val (m, sigma) = utils.getStatsMult(trainTest.map(tup =>
355+ DenseVector (tup._1.toArray ++ tup._2.toArray)).toList)
356+
357+ val featuresScaler = new MVGaussianScaler (
358+ m(0 until num_features),
359+ sigma(0 until num_features, 0 until num_features))
360+
361+ val targetsScaler = new MVGaussianScaler (
362+ m(num_features until num_features + num_targets),
363+ sigma(num_features until num_features + num_targets, num_features until num_features + num_targets))
364+
365+ val scaler : ReversibleScaler [(DenseVector [Double ], DenseVector [Double ])] = featuresScaler * targetsScaler
366+
367+ (scaler(trainTest), (featuresScaler, targetsScaler))
368+ })
369+
343370
344371 /**
345372 * Perform gaussian normalization on a data stream which
@@ -371,6 +398,36 @@ object DynaMLPipe {
371398 (scaler(trainTest._1), scaler(trainTest._2), (featuresScaler, targetsScaler))
372399 })
373400
401+ /**
402+ * Scale a data set which is stored as a [[Stream ]],
403+ * return the scaled data as well as a [[MVGaussianScaler ]] instance
404+ * which can be used to reverse the scaled values to the original
405+ * data.
406+ * */
407+ val multivariateGaussianScalingTrainTest =
408+ DataPipe ((trainTest : (Stream [(DenseVector [Double ], DenseVector [Double ])],
409+ Stream [(DenseVector [Double ], DenseVector [Double ])])) => {
410+
411+ val (num_features, num_targets) = (trainTest._1.head._1.length, trainTest._1.head._2.length)
412+
413+ val (m, sigma) = utils.getStatsMult(trainTest._1.map(tup =>
414+ DenseVector (tup._1.toArray ++ tup._2.toArray)).toList)
415+
416+ val featuresScaler = new MVGaussianScaler (
417+ m(0 until num_features),
418+ sigma(0 until num_features, 0 until num_features))
419+
420+ val targetsScaler = new MVGaussianScaler (
421+ m(num_features until num_features + num_targets),
422+ sigma(num_features until num_features + num_targets, num_features until num_features + num_targets))
423+
424+ val scaler : ReversibleScaler [(DenseVector [Double ], DenseVector [Double ])] = featuresScaler * targetsScaler
425+
426+ (scaler(trainTest._1), scaler(trainTest._2), (featuresScaler, targetsScaler))
427+
428+ })
429+
430+
374431 /**
375432 * Scale a data set which is stored as a [[Stream ]],
376433 * return the scaled data as well as a [[MinMaxScaler ]] instance
0 commit comments