Skip to content

Commit 66656bc

Browse files
committed
Added product implementations for scaling pipes, added minmax and gaussian scaling based on objects
1 parent 2c0a5ef commit 66656bc

File tree

4 files changed

+107
-3
lines changed

4 files changed

+107
-3
lines changed

build.sbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ packageDescription := "DynaML is a scala library/repl for implementing and worki
1212
"which can be extended easily to implement advanced models for small and large scale applications.\n\n"+
1313
"But the library can also be used as an educational/research tool for data analysis."
1414

15-
val mainVersion = "v1.4-beta.11"
15+
val mainVersion = "v1.4-beta.12"
1616

1717
val dataDirectory = settingKey[File]("The directory holding the data files for running example scripts")
1818

dynaml-core/src/main/scala-2.11/io/github/mandar2812/dynaml/DynaMLPipe.scala

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import io.github.mandar2812.dynaml.models.ParameterizedLearner
77
import io.github.mandar2812.dynaml.models.gp.AbstractGPRegressionModel
88
import io.github.mandar2812.dynaml.optimization.{CoupledSimulatedAnnealing, GPMLOptimizer, GloballyOptWithGrad, GridSearch}
99
import io.github.mandar2812.dynaml.pipes.{DataPipe, ReversibleScaler, Scaler, StreamDataPipe}
10+
import io.github.mandar2812.dynaml.utils.{GaussianScaler, MinMaxScaler}
1011
import org.apache.log4j.Logger
1112

1213
/**
@@ -284,6 +285,62 @@ object DynaMLPipe {
284285
trainTest._2.map(normalizationFunc)), (mean, stdDev))
285286
})
286287

288+
/**
289+
* Perform gaussian normalization on a data stream which
290+
* is a [[Tuple2]] of the form.
291+
*
292+
* (Stream(training data), Stream(test data))
293+
* */
294+
val gaussianScalingTrainTest =
295+
DataPipe((trainTest: (Stream[(DenseVector[Double], DenseVector[Double])],
296+
Stream[(DenseVector[Double], DenseVector[Double])])) => {
297+
298+
val (num_features, num_targets) = (trainTest._1.head._1.length, trainTest._1.head._2.length)
299+
300+
val (mean, variance) = utils.getStats(trainTest._1.map(tup =>
301+
DenseVector(tup._1.toArray ++ tup._2.toArray)).toList)
302+
303+
val stdDev: DenseVector[Double] = variance.map(v =>
304+
math.sqrt(v/(trainTest._1.length.toDouble - 1.0)))
305+
306+
307+
val featuresScaler = new GaussianScaler(mean(0 until num_features), stdDev(0 until num_features))
308+
309+
val targetsScaler = new GaussianScaler(
310+
mean(num_features until num_features + num_targets - 1),
311+
stdDev(num_features until num_features + num_targets - 1))
312+
313+
val scaler = featuresScaler * targetsScaler
314+
315+
(scaler(trainTest._1), scaler(trainTest._2), scaler)
316+
})
317+
318+
/**
319+
* Perform [0,1] scaling on a data stream which
320+
* is a [[Tuple2]] of the form.
321+
*
322+
* (Stream(training data), Stream(test data))
323+
* */
324+
val minMaxScalingTrainTest =
325+
DataPipe((trainTest: (Stream[(DenseVector[Double], DenseVector[Double])],
326+
Stream[(DenseVector[Double], DenseVector[Double])])) => {
327+
328+
val (num_features, num_targets) = (trainTest._1.head._1.length, trainTest._1.head._2.length)
329+
330+
val (min, max) = utils.getMinMax(trainTest._1.map(tup =>
331+
DenseVector(tup._1.toArray ++ tup._2.toArray)).toList)
332+
333+
val featuresScaler = new GaussianScaler(min(0 until num_features), max(0 until num_features))
334+
335+
val targetsScaler = new MinMaxScaler(
336+
min(num_features until num_features + num_targets - 1),
337+
max(num_features until num_features + num_targets - 1))
338+
339+
val scaler = featuresScaler * targetsScaler
340+
341+
(scaler(trainTest._1), scaler(trainTest._2), scaler)
342+
})
343+
287344
/**
288345
* Extract a subset of the data into a [[Tuple2]] which
289346
* can be used as a training, test combo for model learning and evaluation.

dynaml-core/src/main/scala-2.11/io/github/mandar2812/dynaml/utils/package.scala

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,43 @@ package object utils {
7575
i: Int):
7676
(DenseVector[Double], DenseVector[Double]) = d match {
7777
case Nil => (m, s)
78-
case x :: rest => {
78+
case x :: rest =>
7979
getStatsRec(rest, m + (x - m)/i.toDouble,
8080
s + ((x - m) :* (x - (m + (x - m)/i.toDouble))),
8181
i - 1)
82-
}
82+
8383
}
8484

8585
getStatsRec(data.tail, data.head,
8686
DenseVector.zeros[Double](data.head.length),
8787
data.length)
8888
}
8989

90+
91+
def getMinMax(data: List[DenseVector[Double]]):
92+
(DenseVector[Double], DenseVector[Double]) = {
93+
@tailrec
94+
def getMinMaxRec(d: List[DenseVector[Double]],
95+
m: DenseVector[Double],
96+
s: DenseVector[Double],
97+
i: Int):
98+
(DenseVector[Double], DenseVector[Double]) = d match {
99+
case Nil => (m, s)
100+
case x :: rest =>
101+
getMinMaxRec(rest,
102+
DenseVector((x.toArray zip m.toArray).map(c => math.min(c._1, c._2))),
103+
DenseVector((x.toArray zip s.toArray).map(c => math.max(c._1, c._2))),
104+
i - 1)
105+
106+
}
107+
108+
getMinMaxRec(
109+
data.tail,
110+
data.head,
111+
data.head,
112+
data.length)
113+
}
114+
90115
/*
91116
* Calculate the value of the hermite polynomials
92117
* tail recursively. This is needed to calculate

dynaml-pipes/src/main/scala-2.11/io/github/mandar2812/dynaml/pipes/Scaler.scala

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@ package io.github.mandar2812.dynaml.pipes
99
trait Scaler[S] extends DataPipe[S, S]{
1010
def apply[T[S] <: Traversable[S]](data: T[S]) =
1111
data.map(run _).asInstanceOf[T[S]]
12+
13+
def *[T](that: Scaler[T]) = {
14+
val firstRun = this.run _
15+
new Scaler[(S,T)] {
16+
override def run(data: (S, T)): (S, T) = (firstRun(data._1), that(data._2))
17+
}
18+
}
19+
1220
}
1321

1422
object Scaler {
@@ -30,4 +38,18 @@ trait ReversibleScaler[S] extends Scaler[S] {
3038
*
3139
* */
3240
val i: Scaler[S]
41+
42+
def *[T](that: ReversibleScaler[T]) = {
43+
44+
val firstInv = this.i
45+
46+
val firstRun = this.run _
47+
48+
new ReversibleScaler[(S, T)] {
49+
50+
val i: Scaler[(S,T)] = firstInv * that.i
51+
52+
override def run(data: (S, T)): (S, T) = (firstRun(data._1), that(data._2))
53+
}
54+
}
3355
}

0 commit comments

Comments
 (0)