Skip to content

Commit 00b5ff2

Browse files
committed
Added LSSVM committee model based on spark RDDs
1 parent 705e2b0 commit 00b5ff2

File tree

8 files changed

+167
-66
lines changed

8 files changed

+167
-66
lines changed

build.sbt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,7 @@ lazy val commonSettings = Seq(
4040
"org.jzy3d" % "jzy3d-api" % "0.9.1" % "compile",
4141
"com.lihaoyi" % "ammonite-repl_2.11.7" % "0.5.8"
4242
),
43-
initialCommands in console :=
44-
"""io.github.mandar2812.dynaml.DynaML.run();"""
43+
initialCommands in console := """io.github.mandar2812.dynaml.DynaML.run();"""
4544
)
4645

4746
lazy val DynaML = (project in file(".")).enablePlugins(JavaAppPackaging, BuildInfoPlugin)

src/main/scala/io/github/mandar2812/dynaml/models/CommitteeModel.scala renamed to src/main/scala/io/github/mandar2812/dynaml/models/ensemble/CommitteeModel.scala

Lines changed: 8 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -16,62 +16,13 @@ KIND, either express or implied. See the License for the
1616
specific language governing permissions and limitations
1717
under the License.
1818
* */
19-
package io.github.mandar2812.dynaml.models
19+
package io.github.mandar2812.dynaml.models.ensemble
2020

2121
import breeze.linalg.DenseVector
22-
import io.github.mandar2812.dynaml.pipes.ModelPipe
23-
import io.github.mandar2812.dynaml.models.neuralnets.FeedForwardNetwork
2422
import io.github.mandar2812.dynaml.models.gp.GPRegression
25-
26-
/**
27-
*
28-
* w1*y1(x) + w2*y2(x) + ... + wb*yb(x)
29-
* Defines the basic skeleton of a "meta-model" or
30-
* a model of models.
31-
*
32-
* A set of base models are trained on sub-sampled versions
33-
* of the training data set and finally a predictor of the form.
34-
*
35-
* y(x) = f(y1(x), y2(x), ..., yb(x))
36-
*
37-
* Where f is some combination function and
38-
* b is the number of base models used.
39-
*
40-
* @tparam D The type of the data structure containing the
41-
* training data set.
42-
*
43-
* @tparam D1 The type of data structure containing the data
44-
* of the base models.
45-
*
46-
* @tparam BaseModel The type of model used as base model
47-
* for the meta model.
48-
* example: [[FeedForwardNetwork]], [[GPRegression]], etc
49-
*
50-
* @tparam Pipe A sub-type of [[ModelPipe]] which yields a [[BaseModel]]
51-
* with [[D1]] as the base data structure given a
52-
* data structure of type [[D]]
53-
*
54-
* @param num The number of training data points.
55-
*
56-
* @param data The actual training data
57-
*
58-
* @param networks A sequence of [[Pipe]] objects yielding [[BaseModel]]
59-
* */
60-
abstract class MetaModel[
61-
D, D1,
62-
BaseModel <: Model[D1, DenseVector[Double], Double],
63-
Pipe <: ModelPipe[D, D1, DenseVector[Double], Double, BaseModel]
64-
](num: Int, data: D, networks: Pipe*)
65-
extends Model[D, DenseVector[Double], Double] {
66-
67-
override protected val g = data
68-
69-
val baseNetworks: List[BaseModel] =
70-
networks.toList.map(net => net.run(g))
71-
72-
}
73-
74-
23+
import io.github.mandar2812.dynaml.models.neuralnets.FeedForwardNetwork
24+
import io.github.mandar2812.dynaml.models.{LinearModel, Model}
25+
import io.github.mandar2812.dynaml.pipes.ModelPipe
7526

7627
/**
7728
* Defines an abstract implementation of a "committee-model".
@@ -108,7 +59,7 @@ abstract class CommitteeModel[
10859
D, D1,
10960
BaseModel <: Model[D1, DenseVector[Double], Double],
11061
Pipe <: ModelPipe[D, D1, DenseVector[Double], Double, BaseModel]
111-
](num: Int, data: D, networks: Pipe*) extends
62+
](num: Long, data: D, networks: Pipe*) extends
11263
MetaModel[D,D1,BaseModel,Pipe](num, data, networks:_*) with
11364
LinearModel[D, DenseVector[Double], DenseVector[Double], Double, D] {
11465

@@ -128,7 +79,7 @@ LinearModel[D, DenseVector[Double], DenseVector[Double], Double, D] {
12879
DenseVector.fill[Double](baseNetworks.length)(1.0)
12980

13081
override def initParams(): DenseVector[Double] =
131-
DenseVector.fill[Double](baseNetworks.length)(1.0)
82+
DenseVector.fill[Double](baseNetworks.length)(1.0/baseNetworks.length)
13283

13384
/**
13485
* Learn the parameters
@@ -138,16 +89,14 @@ LinearModel[D, DenseVector[Double], DenseVector[Double], Double, D] {
13889
*
13990
**/
14091
override def learn(): Unit = {
141-
14292
params = optimizer.optimize(num_points, g, initParams())
14393
}
14494

14595
override protected var params: DenseVector[Double] =
146-
DenseVector.fill[Double](baseNetworks.length)(1.0)
96+
initParams()
14797

14898
featureMap = (pattern) =>
149-
DenseVector(baseNetworks.map(net =>
150-
net.predict(pattern)).toArray)
99+
DenseVector(baseNetworks.map(_.predict(pattern)).toArray)
151100

152101

153102
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
* */
19+
package io.github.mandar2812.dynaml.models.ensemble
20+
21+
import breeze.linalg.DenseVector
22+
import io.github.mandar2812.dynaml.models.Model
23+
import io.github.mandar2812.dynaml.models.gp.GPRegression
24+
import io.github.mandar2812.dynaml.models.neuralnets.FeedForwardNetwork
25+
import io.github.mandar2812.dynaml.pipes.ModelPipe
26+
27+
/**
28+
*
29+
* w1*y1(x) + w2*y2(x) + ... + wb*yb(x)
30+
* Defines the basic skeleton of a "meta-model" or
31+
* a model of models.
32+
*
33+
* A set of base models are trained on sub-sampled versions
34+
* of the training data set and finally a predictor of the form.
35+
*
36+
* y(x) = f(y1(x), y2(x), ..., yb(x))
37+
*
38+
* Where f is some combination function and
39+
* b is the number of base models used.
40+
*
41+
* @tparam D The type of the data structure containing the
42+
* training data set.
43+
* @tparam D1 The type of data structure containing the data
44+
* of the base models.
45+
* @tparam BaseModel The type of model used as base model
46+
* for the meta model.
47+
* example: [[FeedForwardNetwork]], [[GPRegression]], etc
48+
* @tparam Pipe A sub-type of [[ModelPipe]] which yields a [[BaseModel]]
49+
* with [[D1]] as the base data structure given a
50+
* data structure of type [[D]]
51+
* @param num The number of training data points.
52+
* @param data The actual training data
53+
* @param networks A sequence of [[Pipe]] objects yielding [[BaseModel]]
54+
* */
55+
abstract class MetaModel[
56+
D, D1,
57+
BaseModel <: Model[D1, DenseVector[Double], Double],
58+
Pipe <: ModelPipe[D, D1, DenseVector[Double], Double, BaseModel]
59+
](num: Long, data: D, networks: Pipe*)
60+
extends Model[D, DenseVector[Double], Double] {
61+
62+
override protected val g = data
63+
64+
val baseNetworks: List[BaseModel] =
65+
networks.toList.map(_(g))
66+
67+
}

src/main/scala/io/github/mandar2812/dynaml/models/neuralnets/CommitteeNetwork.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,15 @@ LinearModel[D, DenseVector[Double], DenseVector[Double],
6262
override def predict(point: DenseVector[Double]): Double =
6363
params dot featureMap(point)
6464

65-
override def clearParameters(): Unit =
66-
DenseVector.fill[Double](baseNetworks.length)(1.0)
65+
override def clearParameters(): Unit = {
66+
params = initParams()
67+
}
6768

6869
override def initParams(): DenseVector[Double] =
69-
DenseVector.fill[Double](baseNetworks.length)(1.0)
70+
DenseVector.fill[Double](baseNetworks.length)(1.0/baseNetworks.length)
7071

7172
featureMap = (pattern) =>
72-
DenseVector(baseNetworks.map(net => net.forwardPass(pattern)(0)).toArray)
73+
DenseVector(baseNetworks.map(_.forwardPass(pattern)(0)).toArray)
7374

7475
/**
7576
* Learn the parameters

src/main/scala/io/github/mandar2812/dynaml/models/svm/AbstractDualLSSVM.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,5 +81,6 @@ abstract class AbstractDualLSSVM[Index](data: Stream[(Index, Double)],
8181
current_state += ("regularization" -> h("regularization"))
8282
}
8383

84+
def getState = current_state
8485

8586
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package io.github.mandar2812.dynaml.models.svm
2+
3+
import breeze.linalg.DenseVector
4+
import io.github.mandar2812.dynaml.models.ensemble.CommitteeModel
5+
import io.github.mandar2812.dynaml.optimization.{GlobalOptimizer, GridSearch, RDDCommitteeSolver, RegularizedOptimizer}
6+
import io.github.mandar2812.dynaml.pipes.DLSSVMPipe
7+
import org.apache.spark.rdd.RDD
8+
9+
/**
10+
* Created by mandar on 3/6/16.
11+
*/
12+
13+
class LSSVMCommittee(num: Long,
14+
data: RDD[(DenseVector[Double], Double)],
15+
pipes: DLSSVMPipe[RDD[(DenseVector[Double], Double)]]*) extends
16+
CommitteeModel[RDD[(DenseVector[Double], Double)],
17+
Stream[(DenseVector[Double], Double)],
18+
DLSSVM, DLSSVMPipe[RDD[(DenseVector[Double], Double)]]] (num, data, pipes:_*){
19+
20+
override protected val optimizer: RegularizedOptimizer[
21+
DenseVector[Double],
22+
DenseVector[Double], Double,
23+
RDD[(DenseVector[Double], Double)]] = new RDDCommitteeSolver
24+
25+
var modelTuners: List[GlobalOptimizer[DLSSVM]] =
26+
baseNetworks.map(m => new GridSearch[DLSSVM](m).setGridSize(10).setStepSize(0.1))
27+
28+
override def learn(): Unit = {
29+
//First tune and learn the base SVM models
30+
(baseNetworks zip modelTuners).foreach(modelCouple => {
31+
val (_, conf) = modelCouple._2.optimize(modelCouple._1.getState, Map())
32+
modelCouple._1.setState(conf)
33+
modelCouple._1.learn()
34+
})
35+
//Now learn the committee weights
36+
val fMap = featureMap
37+
params = optimizer.optimize(num_points,
38+
g.map(patternCouple => (fMap(patternCouple._1), patternCouple._2)),
39+
initParams())
40+
}
41+
42+
}

src/main/scala/io/github/mandar2812/dynaml/optimization/CommitteeModelSolver.scala

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ under the License.
1818
* */
1919
package io.github.mandar2812.dynaml.optimization
2020

21-
import breeze.linalg.{DenseMatrix, inv, DenseVector}
21+
import breeze.linalg.{DenseMatrix, DenseVector, inv}
22+
import org.apache.spark.rdd.RDD
2223

2324
/**
2425
* Solves the optimization problem pertaining
@@ -50,3 +51,31 @@ RegularizedOptimizer[DenseVector[Double],
5051
ans/Z
5152
}
5253
}
54+
55+
56+
class RDDCommitteeSolver extends
57+
RegularizedOptimizer[DenseVector[Double],
58+
DenseVector[Double], Double,
59+
RDD[(DenseVector[Double], Double)]] {
60+
/**
61+
* Solve the convex optimization problem.
62+
*
63+
* min wT.C.w such that ||w||<sub>1</sub> = 1
64+
*/
65+
override def optimize(nPoints: Long,
66+
ParamOutEdges: RDD[(DenseVector[Double], Double)],
67+
initialP: DenseVector[Double]): DenseVector[Double] = {
68+
69+
val sumMat = ParamOutEdges.map(couple => {
70+
val diff = couple._1 - DenseVector.fill[Double](couple._1.length)(couple._2)
71+
diff * diff.t
72+
}).reduce((mat1, mat2) => mat1+mat2)
73+
74+
sumMat :/= nPoints.toDouble
75+
val ones = DenseVector.ones[Double](initialP.length)
76+
val invMat = inv(sumMat + DenseMatrix.eye[Double](initialP.length)*regParam)
77+
val ans: DenseVector[Double] = invMat*ones
78+
val Z: Double = ones dot ans
79+
ans/Z
80+
}
81+
}

src/main/scala/io/github/mandar2812/dynaml/pipes/ModelPipe.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import io.github.mandar2812.dynaml.kernels.CovarianceFunction
2323
import io.github.mandar2812.dynaml.models._
2424
import io.github.mandar2812.dynaml.models.gp.AbstractGPRegressionModel
2525
import io.github.mandar2812.dynaml.models.lm.GeneralizedLinearModel
26+
import io.github.mandar2812.dynaml.models.svm.{AbstractDualLSSVM, DLSSVM}
2627

2728
/**
2829
* Top level trait for Pipes involving ML models.
@@ -68,4 +69,16 @@ class GLMPipe[T, Source](pre: (Source) => Stream[(DenseVector[Double], Double)],
6869

6970
}
7071

72+
class DLSSVMPipe[Source](pre: (Source) => Stream[(DenseVector[Double], Double)],
73+
cov: CovarianceFunction[DenseVector[Double], Double, DenseMatrix[Double]],
74+
task: String = "regression") extends
75+
ModelPipe[Source, Stream[(DenseVector[Double], Double)],
76+
DenseVector[Double], Double, DLSSVM] {
77+
78+
override val preProcess = pre
7179

80+
override def run(data: Source) = {
81+
val training = preProcess(data)
82+
new DLSSVM(training, training.length, cov, task)
83+
}
84+
}

0 commit comments

Comments
 (0)