Skip to content

Commit 3671f35

Browse files
committed
Bug fixes and optimisations
1. Critical fix to Min-max scaling pipes in DynaML, feature scaling was done with a GaussianScaler object. 2. Used scaly-streams `optimize` method 3. Added script for playing around with kernel GP models
1 parent 527349b commit 3671f35

File tree

7 files changed

+78
-55
lines changed

7 files changed

+78
-55
lines changed

build.sbt

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ packageDescription := "DynaML is a scala library/repl for implementing and worki
1212
"which can be extended easily to implement advanced models for small and large scale applications.\n\n"+
1313
"But the library can also be used as an educational/research tool for data analysis."
1414

15-
val mainVersion = "v1.4.1-beta.7"
15+
val mainVersion = "v1.4.1-beta.8"
1616

1717
val dataDirectory = settingKey[File]("The directory holding the data files for running example scripts")
1818

@@ -33,17 +33,9 @@ lazy val commonSettings = Seq(
3333
linearAlgebraDependencies ++ chartsDependencies ++
3434
tinkerpopDependency ++ notebookInterfaceDependency ++
3535
openMLDependency ++ rejinDependency ++
36-
rPackages ++ cppCompatDependencies)/*,
36+
rPackages ++ cppCompatDependencies),
3737

38-
scalacOptions ++= Seq("-Xplugin-require:scalaxy-streams", "-optimise", "-Yclosure-elim", "-Yinline"),
39-
40-
scalacOptions in Test ~= (_ filterNot (_ == "-Xplugin-require:scalaxy-streams")),
41-
42-
scalacOptions in Test += "-Xplugin-disable:scalaxy-streams",
43-
44-
autoCompilerPlugins := true,
45-
46-
addCompilerPlugin("com.nativelibs4java" % "scalaxy-streams_2.11" % "0.3.4")*/
38+
scalacOptions ++= Seq("-optimise", "-Yclosure-elim", "-Yinline")
4739
)
4840

4941
lazy val pipes = (project in file("dynaml-pipes")).settings(baseSettings:_*)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
implicit val ev = VectorField(6)
2+
implicit val sp = genericReplicationEncoder[DenseVector[Double]](2)
3+
4+
val sp1 = breezeDVSplitEncoder(2)
5+
val kernel = new LaplacianKernel(1.5)
6+
7+
val other_kernel = new RBFKernel(4.5)
8+
val other_kernel1 = new CauchyKernel(1.0)
9+
10+
val otherSumK = kernel + other_kernel
11+
val sumK2 = new DecomposableCovariance(otherSumK, other_kernel1)(sp1)
12+
13+
AbottPowerPlant(sumK2, new DiracKernel(0.09),
14+
opt = Map("globalOpt" -> "GS", "grid" -> "1", "step" -> "0.004"),
15+
num_training = 3000, num_test = 1000, deltaT = 2, column = 7)
16+

dynaml-core/src/main/scala-2.11/io/github/mandar2812/dynaml/DynaMLPipe.scala

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import io.github.mandar2812.dynaml.wavelets.{GroupedHaarWaveletFilter, HaarWavel
3030
import org.apache.log4j.Logger
3131
import org.renjin.script.RenjinScriptEngine
3232
import org.renjin.sexp._
33-
33+
import scalaxy.streams.optimize
3434
import scala.reflect.ClassTag
3535

3636
/**
@@ -254,7 +254,7 @@ object DynaMLPipe {
254254
* (Stream(training data), Stream(test data))
255255
* */
256256
@deprecated("*Standardization pipes are deprecated as of v1.4,"+
257-
" use pipes that output scaler objects instead")
257+
" use pipes that output io.github.mandar2812.dynaml.pipes.Scaler objects instead")
258258
val trainTestGaussianStandardization =
259259
DataPipe((trainTest: (Stream[(DenseVector[Double], Double)],
260260
Stream[(DenseVector[Double], Double)])) => {
@@ -286,7 +286,7 @@ object DynaMLPipe {
286286
* (Stream(training data), Stream(test data))
287287
* */
288288
@deprecated("*Standardization pipes are deprecated as of v1.4,"+
289-
" use pipes that output scaler objects instead")
289+
" use pipes that output io.github.mandar2812.dynaml.pipes.Scaler objects instead")
290290
val featuresGaussianStandardization =
291291
DataPipe((trainTest: (Stream[(DenseVector[Double], Double)],
292292
Stream[(DenseVector[Double], Double)])) => {
@@ -315,7 +315,7 @@ object DynaMLPipe {
315315
* (Stream(training data), Stream(test data))
316316
* */
317317
@deprecated("*Standardization pipes are deprecated as of v1.4,"+
318-
" use pipes that output scaler objects instead")
318+
" use pipes that output io.github.mandar2812.dynaml.pipes.Scaler objects instead")
319319
val trainTestGaussianStandardizationMO =
320320
DataPipe((trainTest: (Stream[(DenseVector[Double], DenseVector[Double])],
321321
Stream[(DenseVector[Double], DenseVector[Double])])) => {
@@ -361,9 +361,9 @@ object DynaMLPipe {
361361
math.sqrt(v/(trainTest.length.toDouble - 1.0)))
362362

363363

364-
val featuresScaler = new GaussianScaler(mean(0 until num_features), stdDev(0 until num_features))
364+
val featuresScaler = GaussianScaler(mean(0 until num_features), stdDev(0 until num_features))
365365

366-
val targetsScaler = new GaussianScaler(
366+
val targetsScaler = GaussianScaler(
367367
mean(num_features until num_features + num_targets),
368368
stdDev(num_features until num_features + num_targets))
369369

@@ -386,11 +386,11 @@ object DynaMLPipe {
386386
val (m, sigma) = utils.getStatsMult(trainTest.map(tup =>
387387
DenseVector(tup._1.toArray ++ tup._2.toArray)).toList)
388388

389-
val featuresScaler = new MVGaussianScaler(
389+
val featuresScaler = MVGaussianScaler(
390390
m(0 until num_features),
391391
sigma(0 until num_features, 0 until num_features))
392392

393-
val targetsScaler = new MVGaussianScaler(
393+
val targetsScaler = MVGaussianScaler(
394394
m(num_features until num_features + num_targets),
395395
sigma(num_features until num_features + num_targets, num_features until num_features + num_targets))
396396

@@ -419,9 +419,9 @@ object DynaMLPipe {
419419
math.sqrt(v/(trainTest._1.length.toDouble - 1.0)))
420420

421421

422-
val featuresScaler = new GaussianScaler(mean(0 until num_features), stdDev(0 until num_features))
422+
val featuresScaler = GaussianScaler(mean(0 until num_features), stdDev(0 until num_features))
423423

424-
val targetsScaler = new GaussianScaler(
424+
val targetsScaler = GaussianScaler(
425425
mean(num_features until num_features + num_targets),
426426
stdDev(num_features until num_features + num_targets))
427427

@@ -445,11 +445,11 @@ object DynaMLPipe {
445445
val (m, sigma) = utils.getStatsMult(trainTest._1.map(tup =>
446446
DenseVector(tup._1.toArray ++ tup._2.toArray)).toList)
447447

448-
val featuresScaler = new MVGaussianScaler(
448+
val featuresScaler = MVGaussianScaler(
449449
m(0 until num_features),
450450
sigma(0 until num_features, 0 until num_features))
451451

452-
val targetsScaler = new MVGaussianScaler(
452+
val targetsScaler = MVGaussianScaler(
453453
m(num_features until num_features + num_targets),
454454
sigma(num_features until num_features + num_targets, num_features until num_features + num_targets))
455455

@@ -475,9 +475,9 @@ object DynaMLPipe {
475475
val (min, max) = utils.getMinMax(trainTest.map(tup =>
476476
DenseVector(tup._1.toArray ++ tup._2.toArray)).toList)
477477

478-
val featuresScaler = new GaussianScaler(min(0 until num_features), max(0 until num_features))
478+
val featuresScaler = MinMaxScaler(min(0 until num_features), max(0 until num_features))
479479

480-
val targetsScaler = new MinMaxScaler(
480+
val targetsScaler = MinMaxScaler(
481481
min(num_features until num_features + num_targets),
482482
max(num_features until num_features + num_targets))
483483

@@ -501,9 +501,9 @@ object DynaMLPipe {
501501
val (min, max) = utils.getMinMax(trainTest._1.map(tup =>
502502
DenseVector(tup._1.toArray ++ tup._2.toArray)).toList)
503503

504-
val featuresScaler = new GaussianScaler(min(0 until num_features), max(0 until num_features))
504+
val featuresScaler = MinMaxScaler(min(0 until num_features), max(0 until num_features))
505505

506-
val targetsScaler = new MinMaxScaler(
506+
val targetsScaler = MinMaxScaler(
507507
min(num_features until num_features + num_targets),
508508
max(num_features until num_features + num_targets))
509509

@@ -572,9 +572,13 @@ object DynaMLPipe {
572572
* put them back together.
573573
* */
574574
val breezeDVSplitEncoder = (n: Int) => Encoder((v: DenseVector[Double]) => {
575-
v.toArray.grouped(n).map(DenseVector(_)).toArray
575+
optimize {
576+
v.toArray.grouped(n).map(DenseVector(_)).toArray
577+
}
576578
}, (vs: Array[DenseVector[Double]]) => {
577-
DenseVector(vs.map(_.toArray).reduceLeft((a,b) => a++b))
579+
optimize {
580+
DenseVector(vs.map(_.toArray).reduceLeft((a,b) => a++b))
581+
}
578582
})
579583

580584
/**

dynaml-core/src/main/scala-2.11/io/github/mandar2812/dynaml/kernels/LocalScalarKernel.scala

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
package io.github.mandar2812.dynaml.kernels
22

3+
import scalaxy.streams.optimize
4+
import scala.reflect.ClassTag
35
import breeze.linalg.DenseMatrix
46
import io.github.mandar2812.dynaml.DynaMLPipe
57
import io.github.mandar2812.dynaml.algebra.PartitionedPSDMatrix
68
import io.github.mandar2812.dynaml.pipes._
79

8-
import scala.reflect.ClassTag
9-
10-
1110
/**
1211
* Scalar Kernel defines algebraic behavior for kernels of the form
1312
* K: Index x Index -> Double, i.e. kernel functions whose output
@@ -129,10 +128,14 @@ class DecomposableCovariance[S](kernels: LocalScalarKernel[S]*)(
129128

130129
override def evaluate(x: S, y: S): Double = {
131130
val (xs, ys) = (encoding*encoding)((x,y))
132-
reducer(xs.zip(ys).zip(kernels).map(coupleAndKern => {
133-
val (u,v) = coupleAndKern._1
134-
coupleAndKern._2.evaluate(u,v)
135-
}))
131+
reducer(
132+
optimize {
133+
xs.zip(ys).zip(kernels).map(coupleAndKern => {
134+
val (u,v) = coupleAndKern._1
135+
coupleAndKern._2.evaluate(u,v)
136+
})
137+
}
138+
)
136139
}
137140

138141
override def gradient(x: S, y: S): Map[String, Double] = reducer match {

dynaml-core/src/main/scala-2.11/io/github/mandar2812/dynaml/utils/package.scala

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import scala.annotation.tailrec
3131
import scala.util.matching.Regex
3232
import sys.process._
3333
import java.net.URL
34-
34+
import scalaxy.streams.optimize
3535
import spire.algebra.Field
3636

3737
/**
@@ -210,8 +210,8 @@ package object utils {
210210

211211
def strReplace(fileName: String)
212212
(findStringRegex: String, replaceString: String)
213-
: Stream[String] = textFileToStream(fileName)
214-
.map(replace(findStringRegex)(replaceString))
213+
: Stream[String] = optimize {textFileToStream(fileName)
214+
.map(replace(findStringRegex)(replaceString))}
215215

216216
def writeToFile(destination: String)(lines: Stream[String]): Unit = {
217217
val writer = new BufferedWriter(new FileWriter(new File(destination)))
@@ -222,23 +222,25 @@ package object utils {
222222
}
223223

224224
def transformData(transform: (String) => String)(lines: Stream[String]): Stream[String] =
225-
lines.map(transform)
225+
optimize { lines.map(transform) }
226226

227227
def extractColumns(lines: Stream[String], sep: String,
228228
columns: List[Int], naStrings:Map[Int, String]): Stream[String] = {
229229
val tFunc = (line: String) => {
230230
val fields = line.split(sep)
231231

232-
val newFields:List[String] = columns.map(col => {
233-
if (!naStrings.contains(col) || fields(col) != naStrings(col)) fields(col)
234-
else "<NA>"
235-
})
232+
optimize {
233+
val newFields:List[String] = columns.map(col => {
234+
if (!naStrings.contains(col) || fields(col) != naStrings(col)) fields(col)
235+
else "<NA>"
236+
})
236237

237-
val newLine = newFields.foldLeft("")(
238-
(str1, str2) => str1+sep+str2
239-
)
238+
val newLine = newFields.foldLeft("")(
239+
(str1, str2) => str1+sep+str2
240+
)
240241

241-
newLine.tail
242+
newLine.tail
243+
}
242244
}
243245

244246
transformData(tFunc)(lines)
@@ -301,9 +303,11 @@ package object utils {
301303
def isSymmetricMatrix[V](mat: Matrix[V]): Unit = {
302304
isSquareMatrix(mat)
303305

304-
for (i <- 0 until mat.rows; j <- 0 until i)
305-
if (mat(i,j) != mat(j,i))
306-
throw new MatrixNotSymmetricException
306+
optimize {
307+
for (i <- 0 until mat.rows; j <- 0 until i)
308+
if (mat(i,j) != mat(j,i))
309+
throw new MatrixNotSymmetricException
310+
}
307311
}
308312

309313
}

dynaml-pipes/src/main/scala-2.11/io/github/mandar2812/dynaml/pipes/Scaler.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package io.github.mandar2812.dynaml.pipes
22

3+
import scalaxy.streams._
4+
35
/**
46
* @author mandar2812 17/6/16.
57
*
@@ -8,7 +10,7 @@ package io.github.mandar2812.dynaml.pipes
810
*/
911
trait Scaler[S] extends DataPipe[S, S]{
1012
def apply[T[S] <: Traversable[S]](data: T[S]) =
11-
data.map(run _).asInstanceOf[T[S]]
13+
optimize { data.map(run).asInstanceOf[T[S]] }
1214

1315
def *[T](that: Scaler[T]) = {
1416
val firstRun = this.run _

dynaml-pipes/src/main/scala-2.11/io/github/mandar2812/dynaml/pipes/StreamDataPipe.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ specific language governing permissions and limitations
1717
under the License.
1818
* */
1919
package io.github.mandar2812.dynaml.pipes
20+
import scalaxy.streams.optimize
21+
2022

2123
/**
2224
* @author mandar2812 on 17/11/15.
@@ -44,15 +46,15 @@ trait StreamDataPipe[I, J, K] extends DataPipe[Stream[I], K]{
4446
}
4547

4648
trait StreamMapPipe[I, J] extends StreamDataPipe[I, J, Stream[J]] {
47-
override def run(data: Stream[I]): Stream[J] = data.map(pipe)
49+
override def run(data: Stream[I]): Stream[J] = optimize { data.map(pipe) }
4850
}
4951

5052
trait StreamFilterPipe[I] extends StreamDataPipe[I, Boolean, Stream[I]] {
51-
override def run(data: Stream[I]): Stream[I] = data.filter(pipe)
53+
override def run(data: Stream[I]): Stream[I] = optimize { data.filter(pipe) }
5254
}
5355

5456
trait StreamPartitionPipe[I] extends StreamDataPipe[I, Boolean, (Stream[I], Stream[I])] {
55-
override def run(data: Stream[I]): (Stream[I], Stream[I]) = data.partition(pipe)
57+
override def run(data: Stream[I]): (Stream[I], Stream[I]) = optimize { data.partition(pipe) }
5658
}
5759

5860
trait StreamSideEffectPipe[I] extends StreamDataPipe[I, Unit, Unit] {

0 commit comments

Comments
 (0)