From b5e590c6fde10c65e8fe1bcfac63d0bdedda3a7d Mon Sep 17 00:00:00 2001 From: SemanticBeeng Date: Sun, 5 Sep 2021 07:31:49 -0400 Subject: [PATCH 1/4] build update --- .gitignore | 4 ++++ build.sbt | 6 +++--- project/build.properties | 2 +- project/plugins.sbt | 4 ++-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index c58d83b..bde21ef 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,10 @@ lib_managed/ src_managed/ project/boot/ project/plugins/project/ +.bsp/ +project/.boot/ +project/.ivy/ +project/.sbtboot/ # Scala-IDE specific .scala_dependencies diff --git a/build.sbt b/build.sbt index 24507d3..047dd22 100644 --- a/build.sbt +++ b/build.sbt @@ -20,9 +20,9 @@ limitations under the License. // publish isarn-sketches-java for exactly one scala version: // sbt isarn_sketches_java/publish -scalaVersion := "2.12.8" +scalaVersion := "2.12.13" -crossScalaVersions := Seq("2.11.12", "2.12.8") +crossScalaVersions := Seq("2.11.12", "2.12.13") // these do not "inherit" when defined at top level, so // define them here for inclusion in each subproject. @@ -114,7 +114,7 @@ lazy val isarn_sketches = (project in file(".")) "org.isarnproject" %% "isarn-algebra-api" % "0.0.3", "org.isarnproject" %% "isarn-collections" % "0.0.4", "org.isarnproject" %% "isarn-scalatest" % "0.0.3" % Test, - "org.scalatest" %% "scalatest" % "3.0.5" % Test, + "org.scalatest" %% "scalatest" % "3.2.5" % Test, "org.apache.commons" % "commons-math3" % "3.6.1" % Test) ) .settings(publishSettings :_*) diff --git a/project/build.properties b/project/build.properties index 654fe70..10fd9ee 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.3.12 +sbt.version=1.5.5 diff --git a/project/plugins.sbt b/project/plugins.sbt index 4f35abb..522763c 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,11 +1,11 @@ resolvers += Resolver.url( "bintray-sbt-plugin-releases", - url("http://dl.bintray.com/content/sbt/sbt-plugin-releases"))( + url("https://dl.bintray.com/content/sbt/sbt-plugin-releases"))( Resolver.ivyStylePatterns) resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/" -resolvers += "jgit-repo" at "http://download.eclipse.org/jgit/maven" +resolvers += "jgit-repo" at "https://download.eclipse.org/jgit/maven" addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.6.3") From fc68cdb95e29e9fe08430ee1c7dc255da6360597 Mon Sep 17 00:00:00 2001 From: SemanticBeeng Date: Sun, 5 Sep 2021 09:19:43 -0400 Subject: [PATCH 2/4] build upgrade turned off sbt-unicode --- .gitignore | 1 + build.sbt | 28 +- project/plugins.sbt | 2 +- .../isarnproject/sketches/TDigestTest.scala | 224 +++++++----- .../sketches/java/JavaTDigestTest.scala | 337 ++++++++++-------- 5 files changed, 326 insertions(+), 266 deletions(-) diff --git a/.gitignore b/.gitignore index bde21ef..d876fc4 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ project/plugins/project/ project/.boot/ project/.ivy/ project/.sbtboot/ +.idea/ # Scala-IDE specific .scala_dependencies diff --git a/build.sbt b/build.sbt index 047dd22..fc252b3 100644 --- a/build.sbt +++ b/build.sbt @@ -67,36 +67,36 @@ javacOptions ++= Seq() scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") -scalacOptions in (Compile, doc) ++= Seq("-doc-root-content", baseDirectory.value+"/root-doc.txt") +//scalacOptions in (Compile, doc) ++= Seq("-doc-root-content", baseDirectory.value+"/root-doc.txt") -enablePlugins(ScalaUnidocPlugin, JavaUnidocPlugin, GhpagesPlugin) +//enablePlugins(ScalaUnidocPlugin, JavaUnidocPlugin, GhpagesPlugin) git.remoteRepo := "git@github.com:isarn/isarn-sketches.git" -siteSubdirName in ScalaUnidoc := "scala/api" +//siteSubdirName in ScalaUnidoc := "scala/api" -siteSubdirName in JavaUnidoc := "java/api" +//siteSubdirName in JavaUnidoc := "java/api" -addMappingsToSiteDir(mappings in (ScalaUnidoc, packageDoc), siteSubdirName in ScalaUnidoc) +//addMappingsToSiteDir(mappings in (ScalaUnidoc, packageDoc), siteSubdirName in ScalaUnidoc) -addMappingsToSiteDir(mappings in (JavaUnidoc, packageDoc), siteSubdirName in JavaUnidoc) +//addMappingsToSiteDir(mappings in (JavaUnidoc, packageDoc), siteSubdirName in JavaUnidoc) // tell unidoc to not do scala-doc for the isarn-sketches-java (javadoc will still get created) -unidocProjectFilter in (ScalaUnidoc, unidoc) := inAnyProject -- inProjects(isarn_sketches_java) +//unidocProjectFilter in (ScalaUnidoc, unidoc) := inAnyProject -- inProjects(isarn_sketches_java) // this target needs to execute only once, at the top level // turn it off for any sub-projects -def siteSubProjectSettings = Seq( - previewSite := {} -) +//def siteSubProjectSettings = Seq( +// previewSite := {} +//) // browser insisted on caching some older generated site at the default (4000) -previewFixedPort := Some(4444) +//previewFixedPort := Some(4444) lazy val isarn_sketches_java = (project in file("isarn-sketches-java")) .settings(name := "isarn-sketches-java") - .enablePlugins(GenJavadocPlugin, PublishJavadocPlugin) - .settings(siteSubProjectSettings :_*) + //.enablePlugins(GenJavadocPlugin, PublishJavadocPlugin) + //.settings(siteSubProjectSettings :_*) .settings( crossPaths := false, // drop off Scala suffix from artifact names autoScalaLibrary := false // exclude scala-library from dependencies @@ -117,4 +117,4 @@ lazy val isarn_sketches = (project in file(".")) "org.scalatest" %% "scalatest" % "3.2.5" % Test, "org.apache.commons" % "commons-math3" % "3.6.1" % Test) ) - .settings(publishSettings :_*) + //.settings(publishSettings :_*) diff --git a/project/plugins.sbt b/project/plugins.sbt index 522763c..b8271b4 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -9,7 +9,7 @@ resolvers += "jgit-repo" at "https://download.eclipse.org/jgit/maven" addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.6.3") -addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.4.3") +//addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.4.3") addSbtPlugin("io.crashbox" % "sbt-gpg" % "0.2.1") diff --git a/src/test/scala/org/isarnproject/sketches/TDigestTest.scala b/src/test/scala/org/isarnproject/sketches/TDigestTest.scala index 0aba8aa..56fa48b 100644 --- a/src/test/scala/org/isarnproject/sketches/TDigestTest.scala +++ b/src/test/scala/org/isarnproject/sketches/TDigestTest.scala @@ -16,14 +16,17 @@ limitations under the License. package org.isarnproject.sketches -import org.scalatest._ - import org.isarnproject.scalatest.matchers.seq._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AsyncWordSpec + + +class TDigestTest extends AsyncWordSpec with Matchers { -class TDigestTest extends FlatSpec with Matchers { import org.apache.commons.math3.distribution.RealDistribution import org.apache.commons.math3.distribution.IntegerDistribution + val seed = 235711L scala.util.Random.setSeed(seed) @@ -41,7 +44,7 @@ class TDigestTest extends FlatSpec with Matchers { .map(x => math.abs(td.cdf(x) - dist.cumulativeProbability(x))).max val dInv = (0.01 to 0.99 by 0.01).iterator - .map(x => math.abs(td.cdfInverse(x) - dist.inverseCumulativeProbability(x))).max / stdv + .map(x => math.abs(td.cdfInverse(x) - dist.inverseCumulativeProbability(x))).max / stdv val pass = d <= maxD && dInv <= maxDI if (!pass) Console.err.println(s"testTDvsDist failure: d= $d dInv= $dInv") @@ -49,8 +52,12 @@ class TDigestTest extends FlatSpec with Matchers { } def testSamplingPDF(td: TDigest, dist: RealDistribution): Boolean = { - val tdSamples = Array.fill(10000) { td.samplePDF } - val distSamples = Array.fill(10000) { dist.sample } + val tdSamples = Array.fill(10000) { + td.samplePDF + } + val distSamples = Array.fill(10000) { + dist.sample + } val kst = new org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest() val d = kst.kolmogorovSmirnovStatistic(tdSamples, distSamples) val pass = d <= maxD @@ -59,9 +66,13 @@ class TDigestTest extends FlatSpec with Matchers { } def testSamplingPMF(td: TDigest, dist: IntegerDistribution): Boolean = { - td.nclusters should be <=(td.maxDiscrete) - val tdSamples = Array.fill(10000) { td.samplePMF } - val distSamples = Array.fill(10000) { dist.sample.toDouble } + td.nclusters should be <= (td.maxDiscrete) + val tdSamples = Array.fill(10000) { + td.samplePMF + } + val distSamples = Array.fill(10000) { + dist.sample.toDouble + } val kst = new org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest() val d = kst.kolmogorovSmirnovStatistic(tdSamples, distSamples) val pass = d <= maxD @@ -72,14 +83,18 @@ class TDigestTest extends FlatSpec with Matchers { def testDistribution(dist: RealDistribution, stdv: Double): Boolean = { dist.reseedRandomGenerator(seed) - val td = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) + val td = TDigest.sketch(Iterator.fill(ss) { + dist.sample + }, delta = delta) testTDvsDist(td, dist, stdv) && testSamplingPDF(td, dist) } def testMonotoneCDF(dist: RealDistribution): Boolean = { dist.reseedRandomGenerator(seed) - val td = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) + val td = TDigest.sketch(Iterator.fill(ss) { + dist.sample + }, delta = delta) val (xmin, xmax) = (td.clusters.keyMin.get, td.clusters.keyMax.get) val step = (xmax - xmin) / 100000 val t = (xmin to xmax by step).iterator.map(x => td.cdf(x)).sliding(2).map(w => w(1) - w(0)).min @@ -90,7 +105,9 @@ class TDigestTest extends FlatSpec with Matchers { def testMonotoneCDFI(dist: RealDistribution): Boolean = { dist.reseedRandomGenerator(seed) - val td = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) + val td = TDigest.sketch(Iterator.fill(ss) { + dist.sample + }, delta = delta) val (xmin, xmax) = (0.0, 1.0) val step = (xmax - xmin) / 100000 val t = (xmin to xmax by step).iterator.map(q => td.cdfInverse(q)).sliding(2).map(w => w(1) - w(0)).min @@ -103,91 +120,102 @@ class TDigestTest extends FlatSpec with Matchers { testMonotoneCDF(dist) && testMonotoneCDFI(dist) } - it should "sketch a uniform distribution" in { - import org.apache.commons.math3.distribution.UniformRealDistribution - val dist = new UniformRealDistribution() - testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } - - it should "sketch a normal distribution" in { - import org.apache.commons.math3.distribution.NormalDistribution - val dist = new NormalDistribution() - testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } - - it should "sketch an exponential distribution" in { - import org.apache.commons.math3.distribution.ExponentialDistribution - val dist = new ExponentialDistribution(1.0) - testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } - - it should "aggregate with another t-digest using ++" in { - import org.apache.commons.math3.distribution.NormalDistribution - val dist = new NormalDistribution() - dist.reseedRandomGenerator(seed) - - val td1 = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) - val td2 = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) - - testTDvsDist(td1 ++ td2, dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } - - it should "respect monotonic cdf and inverse" in { - import org.apache.commons.math3.distribution.ExponentialDistribution - import org.apache.commons.math3.distribution.NormalDistribution - import org.apache.commons.math3.distribution.UniformRealDistribution - - testMonotone(new UniformRealDistribution()) should be (true) - testMonotone(new ExponentialDistribution(1.0)) should be (true) - testMonotone(new NormalDistribution(0.0, 0.1)) should be (true) - } - - it should "respect maxDiscrete parameter" in { - import org.apache.commons.math3.distribution.GeometricDistribution - val gd = new GeometricDistribution(0.33) - val data = gd.sample(1000000) - val dataUniq = data.distinct.sorted - val kt = dataUniq.map(_.toDouble).toSet - val td = TDigest.sketch(data, maxDiscrete = 50) - val clust = td.clusters - clust.keys.toSet should be (kt) - val D = clust.keys.map { x => td.cdfDiscrete(x) } - .zip(dataUniq.map { k => gd.cumulativeProbability(k) }) - .map { case (p1, p2) => math.abs(p1 - p2) } - .max - (D <= 0.01) should be (true) - testSamplingPMF(td, gd) should be (true) - } - - it should "respect maxDiscrete parameter over ++" in { - import org.apache.commons.math3.distribution.GeometricDistribution - val gd = new GeometricDistribution(0.33) - val tdvec = Vector.fill(10) { TDigest.sketch(gd.sample(100000), maxDiscrete = 50) } - val td = tdvec.reduce(_ ++ _) - val clust = td.clusters - clust.keys.map(_.toInt).map(_.toDouble) should beEqSeq(clust.keys) - val D = clust.keys.map { x => td.cdfDiscrete(x) } - .zip(clust.keys.map(_.toInt).map { k => gd.cumulativeProbability(k) }) - .map { case (p1, p2) => math.abs(p1 - p2) } - .max - (D <= 0.01) should be (true) - testSamplingPMF(td, gd) should be (true) - } - - it should "serialize and deserialize" in { - import org.apache.commons.math3.distribution.NormalDistribution - - import org.isarnproject.scalatest.serde.roundTripSerDe - - val dist = new NormalDistribution() - dist.reseedRandomGenerator(seed) - - val tdo = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) - - val tdi = roundTripSerDe(tdo) - - (tdi == tdo) should be (true) - - testTDvsDist(tdi, dist, math.sqrt(dist.getNumericalVariance())) should be (true) + it should { + + "sketch a uniform distribution" in { + import org.apache.commons.math3.distribution.UniformRealDistribution + val dist = new UniformRealDistribution() + testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } + + "sketch a normal distribution" in { + import org.apache.commons.math3.distribution.NormalDistribution + val dist = new NormalDistribution() + testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } + + "sketch an exponential distribution" in { + import org.apache.commons.math3.distribution.ExponentialDistribution + val dist = new ExponentialDistribution(1.0) + testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } + + "aggregate with another t-digest using ++" in { + import org.apache.commons.math3.distribution.NormalDistribution + val dist = new NormalDistribution() + dist.reseedRandomGenerator(seed) + + val td1 = TDigest.sketch(Iterator.fill(ss) { + dist.sample + }, delta = delta) + val td2 = TDigest.sketch(Iterator.fill(ss) { + dist.sample + }, delta = delta) + + testTDvsDist(td1 ++ td2, dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } + + "respect monotonic cdf and inverse" in { + import org.apache.commons.math3.distribution.ExponentialDistribution + import org.apache.commons.math3.distribution.NormalDistribution + import org.apache.commons.math3.distribution.UniformRealDistribution + + testMonotone(new UniformRealDistribution()) should be(true) + testMonotone(new ExponentialDistribution(1.0)) should be(true) + testMonotone(new NormalDistribution(0.0, 0.1)) should be(true) + } + + "respect maxDiscrete parameter" in { + import org.apache.commons.math3.distribution.GeometricDistribution + val gd = new GeometricDistribution(0.33) + val data = gd.sample(1000000) + val dataUniq = data.distinct.sorted + val kt = dataUniq.map(_.toDouble).toSet + val td = TDigest.sketch(data, maxDiscrete = 50) + val clust = td.clusters + clust.keys.toSet should be(kt) + val D = clust.keys.map { x => td.cdfDiscrete(x) } + .zip(dataUniq.map { k => gd.cumulativeProbability(k) }) + .map { case (p1, p2) => math.abs(p1 - p2) } + .max + (D <= 0.01) should be(true) + testSamplingPMF(td, gd) should be(true) + } + + "respect maxDiscrete parameter over ++" in { + import org.apache.commons.math3.distribution.GeometricDistribution + val gd = new GeometricDistribution(0.33) + val tdvec = Vector.fill(10) { + TDigest.sketch(gd.sample(100000), maxDiscrete = 50) + } + val td = tdvec.reduce(_ ++ _) + val clust = td.clusters + clust.keys.map(_.toInt).map(_.toDouble) should beEqSeq(clust.keys) + val D = clust.keys.map { x => td.cdfDiscrete(x) } + .zip(clust.keys.map(_.toInt).map { k => gd.cumulativeProbability(k) }) + .map { case (p1, p2) => math.abs(p1 - p2) } + .max + (D <= 0.01) should be(true) + testSamplingPMF(td, gd) should be(true) + } + + "serialize and deserialize" in { + import org.apache.commons.math3.distribution.NormalDistribution + + import org.isarnproject.scalatest.serde.roundTripSerDe + + val dist = new NormalDistribution() + dist.reseedRandomGenerator(seed) + + val tdo = TDigest.sketch(Iterator.fill(ss) { + dist.sample + }, delta = delta) + + val tdi = roundTripSerDe(tdo) + + (tdi == tdo) should be(true) + + testTDvsDist(tdi, dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } } } diff --git a/src/test/scala/org/isarnproject/sketches/java/JavaTDigestTest.scala b/src/test/scala/org/isarnproject/sketches/java/JavaTDigestTest.scala index ea80b6e..69cb489 100644 --- a/src/test/scala/org/isarnproject/sketches/java/JavaTDigestTest.scala +++ b/src/test/scala/org/isarnproject/sketches/java/JavaTDigestTest.scala @@ -16,14 +16,17 @@ limitations under the License. package org.isarnproject.sketches.java -import org.scalatest._ - import org.isarnproject.scalatest.matchers.seq._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AsyncWordSpec + + +class JavaTDigestTest extends AsyncWordSpec with Matchers { -class JavaTDigestTest extends FlatSpec with Matchers { import org.apache.commons.math3.distribution.RealDistribution import org.apache.commons.math3.distribution.IntegerDistribution + val seed = 235711L scala.util.Random.setSeed(seed) @@ -41,7 +44,7 @@ class JavaTDigestTest extends FlatSpec with Matchers { .map(x => math.abs(td.cdf(x) - dist.cumulativeProbability(x))).max val dInv = (0.01 to 0.99 by 0.01).iterator - .map(x => math.abs(td.cdfInverse(x) - dist.inverseCumulativeProbability(x))).max / stdv + .map(x => math.abs(td.cdfInverse(x) - dist.inverseCumulativeProbability(x))).max / stdv val pass = d <= maxD && dInv <= maxDI if (!pass) Console.err.println(s"testTDvsDist failure: d= $d dInv= $dInv") @@ -49,8 +52,12 @@ class JavaTDigestTest extends FlatSpec with Matchers { } def testSamplingPDF(td: TDigest, dist: RealDistribution): Boolean = { - val tdSamples = Array.fill(10000) { td.samplePDF } - val distSamples = Array.fill(10000) { dist.sample } + val tdSamples = Array.fill(10000) { + td.samplePDF + } + val distSamples = Array.fill(10000) { + dist.sample + } val kst = new org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest() val d = kst.kolmogorovSmirnovStatistic(tdSamples, distSamples) val pass = d <= maxD @@ -59,9 +66,13 @@ class JavaTDigestTest extends FlatSpec with Matchers { } def testSamplingPMF(td: TDigest, dist: IntegerDistribution): Boolean = { - td.nclusters should be <=(td.maxDiscrete) - val tdSamples = Array.fill(10000) { td.samplePMF } - val distSamples = Array.fill(10000) { dist.sample.toDouble } + td.nclusters should be <= (td.maxDiscrete) + val tdSamples = Array.fill(10000) { + td.samplePMF + } + val distSamples = Array.fill(10000) { + dist.sample.toDouble + } val kst = new org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest() val d = kst.kolmogorovSmirnovStatistic(tdSamples, distSamples) val pass = d <= maxD @@ -72,14 +83,18 @@ class JavaTDigestTest extends FlatSpec with Matchers { def testDistribution(dist: RealDistribution, stdv: Double): Boolean = { dist.reseedRandomGenerator(seed) - val td = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) + val td = TDigest.sketch(Array.fill(ss) { + dist.sample + }, delta) testTDvsDist(td, dist, stdv) && testSamplingPDF(td, dist) } def testMonotoneCDF(dist: RealDistribution): Boolean = { dist.reseedRandomGenerator(seed) - val td = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) + val td = TDigest.sketch(Array.fill(ss) { + dist.sample + }, delta) val (xmin, xmax) = (td.cent(0), td.cent(td.nclusters - 1)) val step = (xmax - xmin) / 100000 val t = (xmin to xmax by step).iterator.map(x => td.cdf(x)).sliding(2).map(w => w(1) - w(0)).min @@ -90,7 +105,9 @@ class JavaTDigestTest extends FlatSpec with Matchers { def testMonotoneCDFI(dist: RealDistribution): Boolean = { dist.reseedRandomGenerator(seed) - val td = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) + val td = TDigest.sketch(Array.fill(ss) { + dist.sample + }, delta) val (xmin, xmax) = (0.0, 1.0) val step = (xmax - xmin) / 100000 val t = (xmin to xmax by step).iterator.map(q => td.cdfInverse(q)).sliding(2).map(w => w(1) - w(0)).min @@ -103,168 +120,182 @@ class JavaTDigestTest extends FlatSpec with Matchers { testMonotoneCDF(dist) && testMonotoneCDFI(dist) } - it should "sketch a uniform distribution" in { - import org.apache.commons.math3.distribution.UniformRealDistribution - val dist = new UniformRealDistribution() - testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } - - it should "sketch a normal distribution" in { - import org.apache.commons.math3.distribution.NormalDistribution - val dist = new NormalDistribution() - testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } - - it should "sketch an exponential distribution" in { - import org.apache.commons.math3.distribution.ExponentialDistribution - val dist = new ExponentialDistribution(1.0) - testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } - - it should "aggregate with another t-digest using merge method" in { - import org.apache.commons.math3.distribution.NormalDistribution - val dist = new NormalDistribution() - dist.reseedRandomGenerator(seed) + it should { + "sketch a uniform distribution" in { + import org.apache.commons.math3.distribution.UniformRealDistribution + val dist = new UniformRealDistribution() + testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } - val td1 = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) - val td2 = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) + "sketch a normal distribution" in { + import org.apache.commons.math3.distribution.NormalDistribution + val dist = new NormalDistribution() + testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } - testTDvsDist(TDigest.merge(td1, td2), dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } + "sketch an exponential distribution" in { + import org.apache.commons.math3.distribution.ExponentialDistribution + val dist = new ExponentialDistribution(1.0) + testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } - it should "respect monotonic cdf and inverse" in { - import org.apache.commons.math3.distribution.ExponentialDistribution - import org.apache.commons.math3.distribution.NormalDistribution - import org.apache.commons.math3.distribution.UniformRealDistribution + "aggregate with another t-digest using merge method" in { + import org.apache.commons.math3.distribution.NormalDistribution + val dist = new NormalDistribution() + dist.reseedRandomGenerator(seed) - testMonotone(new UniformRealDistribution()) should be (true) - testMonotone(new ExponentialDistribution(1.0)) should be (true) - testMonotone(new NormalDistribution(0.0, 0.1)) should be (true) - } + val td1 = TDigest.sketch(Array.fill(ss) { + dist.sample + }, delta) + val td2 = TDigest.sketch(Array.fill(ss) { + dist.sample + }, delta) - it should "respect maxDiscrete parameter" in { - import org.apache.commons.math3.distribution.GeometricDistribution - val gd = new GeometricDistribution(0.33) - val data = gd.sample(1000000).map(_.toDouble) - val dataUniq = data.distinct.sorted - val kt = dataUniq.map(_.toDouble).toSet - val td = TDigest.sketch(data, delta, 50) - val clust = td.cent - clust.toSet should be (kt) - val D = clust.map { x => td.cdfDiscrete(x) } - .zip(dataUniq.map { k => gd.cumulativeProbability(k.toInt) }) - .map { case (p1, p2) => math.abs(p1 - p2) } - .max - (D <= 0.01) should be (true) - testSamplingPMF(td, gd) should be (true) - } + testTDvsDist(TDigest.merge(td1, td2), dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } - it should "respect maxDiscrete parameter over merge" in { - import org.apache.commons.math3.distribution.GeometricDistribution - val gd = new GeometricDistribution(0.33) - val tdvec = Vector.fill(10) { TDigest.sketch(gd.sample(100000).map(_.toDouble), delta, 50) } - val td = tdvec.reduce((a, b) => TDigest.merge(a, b)) - val clust = td.cent - clust.map(_.toInt).map(_.toDouble).toVector should beEqSeq(clust.toVector) - val D = clust.map { x => td.cdfDiscrete(x) } - .zip(clust.map(_.toInt).map { k => gd.cumulativeProbability(k) }) - .map { case (p1, p2) => math.abs(p1 - p2) } - .max - (D <= 0.01) should be (true) - testSamplingPMF(td, gd) should be (true) - } + "respect monotonic cdf and inverse" in { + import org.apache.commons.math3.distribution.ExponentialDistribution + import org.apache.commons.math3.distribution.NormalDistribution + import org.apache.commons.math3.distribution.UniformRealDistribution - it should "support copy constructor" in { - import org.apache.commons.math3.distribution.NormalDistribution + testMonotone(new UniformRealDistribution()) should be(true) + testMonotone(new ExponentialDistribution(1.0)) should be(true) + testMonotone(new NormalDistribution(0.0, 0.1)) should be(true) + } - val dist = new NormalDistribution() - dist.reseedRandomGenerator(seed) - val data = Array.fill(ss) { dist.sample } - val td1 = TDigest.sketch(data, delta) - val td2 = new TDigest(td1) - (td2.equals(td1)) should be (true) - (td1.equals(td2)) should be (true) - - // add more data and re-check equality to ensure - // that all state for future updates was correctly copied - for { x <- data } { - td1.update(x) - td2.update(x) + "respect maxDiscrete parameter" in { + import org.apache.commons.math3.distribution.GeometricDistribution + val gd = new GeometricDistribution(0.33) + val data = gd.sample(1000000).map(_.toDouble) + val dataUniq = data.distinct.sorted + val kt = dataUniq.map(_.toDouble).toSet + val td = TDigest.sketch(data, delta, 50) + val clust = td.cent + clust.toSet should be(kt) + val D = clust.map { x => td.cdfDiscrete(x) } + .zip(dataUniq.map { k => gd.cumulativeProbability(k.toInt) }) + .map { case (p1, p2) => math.abs(p1 - p2) } + .max + (D <= 0.01) should be(true) + testSamplingPMF(td, gd) should be(true) } - (td2.equals(td1)) should be (true) - (td1.equals(td2)) should be (true) - } - def testTDClose(td1: TDigest, td2: TDigest, eps: Double = 1e-6): Unit = { - td1.getCompression() should be (td2.getCompression()) - td1.getMaxDiscrete() should be (td2.getMaxDiscrete()) - td1.size() should be (td2.size()) - td1.mass() should be (td2.mass() +- eps) - for { j <- 0 until td1.size() } { - td1.getCentUnsafe()(j) should be (td2.getCentUnsafe()(j) +- eps) - td1.getMassUnsafe()(j) should be (td2.getMassUnsafe()(j) +- eps) - td1.getFTUnsafe()(1 + j) should be (td2.getFTUnsafe()(1 + j) +- eps) + "respect maxDiscrete parameter over merge" in { + import org.apache.commons.math3.distribution.GeometricDistribution + val gd = new GeometricDistribution(0.33) + val tdvec = Vector.fill(10) { + TDigest.sketch(gd.sample(100000).map(_.toDouble), delta, 50) + } + val td = tdvec.reduce((a, b) => TDigest.merge(a, b)) + val clust = td.cent + clust.map(_.toInt).map(_.toDouble).toVector should beEqSeq(clust.toVector) + val D = clust.map { x => td.cdfDiscrete(x) } + .zip(clust.map(_.toInt).map { k => gd.cumulativeProbability(k) }) + .map { case (p1, p2) => math.abs(p1 - p2) } + .max + (D <= 0.01) should be(true) + testSamplingPMF(td, gd) should be(true) } - } - it should "support dser constructor" in { - import java.util.Arrays; - import org.apache.commons.math3.distribution.NormalDistribution + "support copy constructor" in { + import org.apache.commons.math3.distribution.NormalDistribution + + val dist = new NormalDistribution() + dist.reseedRandomGenerator(seed) + val data = Array.fill(ss) { + dist.sample + } + val td1 = TDigest.sketch(data, delta) + val td2 = new TDigest(td1) + (td2.equals(td1)) should be(true) + (td1.equals(td2)) should be(true) + + // add more data and re-check equality to ensure + // that all state for future updates was correctly copied + for {x <- data} { + td1.update(x) + td2.update(x) + } + (td2.equals(td1)) should be(true) + (td1.equals(td2)) should be(true) + } - val eps = 1e-9 + def testTDClose(td1: TDigest, td2: TDigest, eps: Double = 1e-6): Unit = { + td1.getCompression() should be(td2.getCompression()) + td1.getMaxDiscrete() should be(td2.getMaxDiscrete()) + td1.size() should be(td2.size()) + td1.mass() should be(td2.mass() +- eps) + for {j <- 0 until td1.size()} { + td1.getCentUnsafe()(j) should be(td2.getCentUnsafe()(j) +- eps) + td1.getMassUnsafe()(j) should be(td2.getMassUnsafe()(j) +- eps) + td1.getFTUnsafe()(1 + j) should be(td2.getFTUnsafe()(1 + j) +- eps) + } + } - val dist = new NormalDistribution() - dist.reseedRandomGenerator(seed) - val data = Array.fill(ss) { dist.sample } - - // test constructing empty t-digests - val td1 = new TDigest(0.5, 0, Array.empty[Double], Array.empty[Double]) - val td2 = new TDigest( - td1.getCompression(), - td1.getMaxDiscrete(), - Arrays.copyOf(td1.getCentUnsafe(), td1.size()), - Arrays.copyOf(td1.getMassUnsafe(), td1.size()) - ) - testTDClose(td1, td2, eps) - - // test sketching from empty state - for { x <- data } { - td1.update(x) - td2.update(x) - } - testTDClose(td1, td2, eps) - - // copy from non-empty state - val td3 = new TDigest( - td1.getCompression(), - td1.getMaxDiscrete(), - Arrays.copyOf(td1.getCentUnsafe(), td1.size()), - Arrays.copyOf(td1.getMassUnsafe(), td1.size()) - ) - testTDClose(td1, td3, eps) - - // test from non-empty state - for { x <- data } { - td1.update(x) - td3.update(x) + "support dser constructor" in { + import java.util.Arrays; + import org.apache.commons.math3.distribution.NormalDistribution + + val eps = 1e-9 + + val dist = new NormalDistribution() + dist.reseedRandomGenerator(seed) + val data = Array.fill(ss) { + dist.sample + } + + // test constructing empty t-digests + val td1 = new TDigest(0.5, 0, Array.empty[Double], Array.empty[Double]) + val td2 = new TDigest( + td1.getCompression(), + td1.getMaxDiscrete(), + Arrays.copyOf(td1.getCentUnsafe(), td1.size()), + Arrays.copyOf(td1.getMassUnsafe(), td1.size()) + ) + testTDClose(td1, td2, eps) + + // test sketching from empty state + for {x <- data} { + td1.update(x) + td2.update(x) + } + testTDClose(td1, td2, eps) + + // copy from non-empty state + val td3 = new TDigest( + td1.getCompression(), + td1.getMaxDiscrete(), + Arrays.copyOf(td1.getCentUnsafe(), td1.size()), + Arrays.copyOf(td1.getMassUnsafe(), td1.size()) + ) + testTDClose(td1, td3, eps) + + // test from non-empty state + for {x <- data} { + td1.update(x) + td3.update(x) + } + testTDClose(td1, td3, eps) } - testTDClose(td1, td3, eps) - } - it should "serialize and deserialize" in { - import org.apache.commons.math3.distribution.NormalDistribution + "serialize and deserialize" in { + import org.apache.commons.math3.distribution.NormalDistribution - import org.isarnproject.scalatest.serde.roundTripSerDe + import org.isarnproject.scalatest.serde.roundTripSerDe - val dist = new NormalDistribution() - dist.reseedRandomGenerator(seed) + val dist = new NormalDistribution() + dist.reseedRandomGenerator(seed) - val tdo = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) + val tdo = TDigest.sketch(Array.fill(ss) { + dist.sample + }, delta) - val tdi = roundTripSerDe(tdo) + val tdi = roundTripSerDe(tdo) - (tdi.equals(tdo)) should be (true) + (tdi.equals(tdo)) should be(true) - testTDvsDist(tdi, dist, math.sqrt(dist.getNumericalVariance())) should be (true) + testTDvsDist(tdi, dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } } } From 75a75233eeec124d69ca05013c6fbc821bfc9557 Mon Sep 17 00:00:00 2001 From: SemanticBeeng Date: Tue, 7 Sep 2021 16:56:04 -0400 Subject: [PATCH 3/4] build upgrade turned off GenJavaDoc due to inability to resolve the plugin jar See https://github.com/isarn/isarn-sketches/issues/17 --- build.sbt | 30 +++++++++++++++--------------- project/build.properties | 2 +- project/plugins.sbt | 17 ++++++++--------- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/build.sbt b/build.sbt index fc252b3..5b1af64 100644 --- a/build.sbt +++ b/build.sbt @@ -20,9 +20,9 @@ limitations under the License. // publish isarn-sketches-java for exactly one scala version: // sbt isarn_sketches_java/publish -scalaVersion := "2.12.13" +scalaVersion := "2.12.14" -crossScalaVersions := Seq("2.11.12", "2.12.13") +crossScalaVersions := Seq("2.11.12", "2.12.14") // these do not "inherit" when defined at top level, so // define them here for inclusion in each subproject. @@ -67,36 +67,36 @@ javacOptions ++= Seq() scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") -//scalacOptions in (Compile, doc) ++= Seq("-doc-root-content", baseDirectory.value+"/root-doc.txt") +scalacOptions in (Compile, doc) ++= Seq("-doc-root-content", baseDirectory.value+"/root-doc.txt") -//enablePlugins(ScalaUnidocPlugin, JavaUnidocPlugin, GhpagesPlugin) +enablePlugins(ScalaUnidocPlugin, JavaUnidocPlugin, GhpagesPlugin) git.remoteRepo := "git@github.com:isarn/isarn-sketches.git" -//siteSubdirName in ScalaUnidoc := "scala/api" +siteSubdirName in ScalaUnidoc := "scala/api" -//siteSubdirName in JavaUnidoc := "java/api" +siteSubdirName in JavaUnidoc := "java/api" -//addMappingsToSiteDir(mappings in (ScalaUnidoc, packageDoc), siteSubdirName in ScalaUnidoc) +addMappingsToSiteDir(mappings in (ScalaUnidoc, packageDoc), siteSubdirName in ScalaUnidoc) -//addMappingsToSiteDir(mappings in (JavaUnidoc, packageDoc), siteSubdirName in JavaUnidoc) +addMappingsToSiteDir(mappings in (JavaUnidoc, packageDoc), siteSubdirName in JavaUnidoc) // tell unidoc to not do scala-doc for the isarn-sketches-java (javadoc will still get created) -//unidocProjectFilter in (ScalaUnidoc, unidoc) := inAnyProject -- inProjects(isarn_sketches_java) +unidocProjectFilter in (ScalaUnidoc, unidoc) := inAnyProject -- inProjects(isarn_sketches_java) // this target needs to execute only once, at the top level // turn it off for any sub-projects -//def siteSubProjectSettings = Seq( -// previewSite := {} -//) +def siteSubProjectSettings = Seq( + previewSite := {} +) // browser insisted on caching some older generated site at the default (4000) -//previewFixedPort := Some(4444) +previewFixedPort := Some(4444) lazy val isarn_sketches_java = (project in file("isarn-sketches-java")) .settings(name := "isarn-sketches-java") //.enablePlugins(GenJavadocPlugin, PublishJavadocPlugin) - //.settings(siteSubProjectSettings :_*) + .settings(siteSubProjectSettings :_*) .settings( crossPaths := false, // drop off Scala suffix from artifact names autoScalaLibrary := false // exclude scala-library from dependencies @@ -117,4 +117,4 @@ lazy val isarn_sketches = (project in file(".")) "org.scalatest" %% "scalatest" % "3.2.5" % Test, "org.apache.commons" % "commons-math3" % "3.6.1" % Test) ) - //.settings(publishSettings :_*) + .settings(publishSettings :_*) diff --git a/project/build.properties b/project/build.properties index 10fd9ee..9edb75b 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.5.5 +sbt.version=1.5.4 diff --git a/project/plugins.sbt b/project/plugins.sbt index b8271b4..84fd37f 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,15 +1,14 @@ -resolvers += Resolver.url( - "bintray-sbt-plugin-releases", - url("https://dl.bintray.com/content/sbt/sbt-plugin-releases"))( - Resolver.ivyStylePatterns) - -resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/" - -resolvers += "jgit-repo" at "https://download.eclipse.org/jgit/maven" +resolvers ++= Seq( + "jgit-repo".at("https://download.eclipse.org/jgit/maven"), + //"sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/", + //Resolver.url("bintray-sbt-plugin-releases", url("https://dl.bintray.com/content/sbt/sbt-plugin-releases"))( + // Resolver.ivyStylePatterns + //) +) addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.6.3") -//addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.4.3") +addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.4.3") addSbtPlugin("io.crashbox" % "sbt-gpg" % "0.2.1") From d93a5dfd4eb6a5fd72b45a9cb809142745edf697 Mon Sep 17 00:00:00 2001 From: SemanticBeeng Date: Tue, 7 Sep 2021 17:05:14 -0400 Subject: [PATCH 4/4] build upgrade turned off GenJavaDoc due to inability to resolve the plugin jar See https://github.com/isarn/isarn-sketches/issues/17 --- .../isarnproject/sketches/TDigestTest.scala | 44 ++++------------ .../sketches/java/JavaTDigestTest.scala | 52 +++++-------------- 2 files changed, 24 insertions(+), 72 deletions(-) diff --git a/src/test/scala/org/isarnproject/sketches/TDigestTest.scala b/src/test/scala/org/isarnproject/sketches/TDigestTest.scala index 56fa48b..4ee6e91 100644 --- a/src/test/scala/org/isarnproject/sketches/TDigestTest.scala +++ b/src/test/scala/org/isarnproject/sketches/TDigestTest.scala @@ -52,12 +52,8 @@ class TDigestTest extends AsyncWordSpec with Matchers { } def testSamplingPDF(td: TDigest, dist: RealDistribution): Boolean = { - val tdSamples = Array.fill(10000) { - td.samplePDF - } - val distSamples = Array.fill(10000) { - dist.sample - } + val tdSamples = Array.fill(10000) { td.samplePDF } + val distSamples = Array.fill(10000) { dist.sample } val kst = new org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest() val d = kst.kolmogorovSmirnovStatistic(tdSamples, distSamples) val pass = d <= maxD @@ -67,12 +63,8 @@ class TDigestTest extends AsyncWordSpec with Matchers { def testSamplingPMF(td: TDigest, dist: IntegerDistribution): Boolean = { td.nclusters should be <= (td.maxDiscrete) - val tdSamples = Array.fill(10000) { - td.samplePMF - } - val distSamples = Array.fill(10000) { - dist.sample.toDouble - } + val tdSamples = Array.fill(10000) { td.samplePMF } + val distSamples = Array.fill(10000) { dist.sample.toDouble } val kst = new org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest() val d = kst.kolmogorovSmirnovStatistic(tdSamples, distSamples) val pass = d <= maxD @@ -83,18 +75,14 @@ class TDigestTest extends AsyncWordSpec with Matchers { def testDistribution(dist: RealDistribution, stdv: Double): Boolean = { dist.reseedRandomGenerator(seed) - val td = TDigest.sketch(Iterator.fill(ss) { - dist.sample - }, delta = delta) + val td = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) testTDvsDist(td, dist, stdv) && testSamplingPDF(td, dist) } def testMonotoneCDF(dist: RealDistribution): Boolean = { dist.reseedRandomGenerator(seed) - val td = TDigest.sketch(Iterator.fill(ss) { - dist.sample - }, delta = delta) + val td = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) val (xmin, xmax) = (td.clusters.keyMin.get, td.clusters.keyMax.get) val step = (xmax - xmin) / 100000 val t = (xmin to xmax by step).iterator.map(x => td.cdf(x)).sliding(2).map(w => w(1) - w(0)).min @@ -105,9 +93,7 @@ class TDigestTest extends AsyncWordSpec with Matchers { def testMonotoneCDFI(dist: RealDistribution): Boolean = { dist.reseedRandomGenerator(seed) - val td = TDigest.sketch(Iterator.fill(ss) { - dist.sample - }, delta = delta) + val td = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) val (xmin, xmax) = (0.0, 1.0) val step = (xmax - xmin) / 100000 val t = (xmin to xmax by step).iterator.map(q => td.cdfInverse(q)).sliding(2).map(w => w(1) - w(0)).min @@ -145,12 +131,8 @@ class TDigestTest extends AsyncWordSpec with Matchers { val dist = new NormalDistribution() dist.reseedRandomGenerator(seed) - val td1 = TDigest.sketch(Iterator.fill(ss) { - dist.sample - }, delta = delta) - val td2 = TDigest.sketch(Iterator.fill(ss) { - dist.sample - }, delta = delta) + val td1 = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) + val td2 = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) testTDvsDist(td1 ++ td2, dist, math.sqrt(dist.getNumericalVariance())) should be(true) } @@ -185,9 +167,7 @@ class TDigestTest extends AsyncWordSpec with Matchers { "respect maxDiscrete parameter over ++" in { import org.apache.commons.math3.distribution.GeometricDistribution val gd = new GeometricDistribution(0.33) - val tdvec = Vector.fill(10) { - TDigest.sketch(gd.sample(100000), maxDiscrete = 50) - } + val tdvec = Vector.fill(10) { TDigest.sketch(gd.sample(100000), maxDiscrete = 50) } val td = tdvec.reduce(_ ++ _) val clust = td.clusters clust.keys.map(_.toInt).map(_.toDouble) should beEqSeq(clust.keys) @@ -207,9 +187,7 @@ class TDigestTest extends AsyncWordSpec with Matchers { val dist = new NormalDistribution() dist.reseedRandomGenerator(seed) - val tdo = TDigest.sketch(Iterator.fill(ss) { - dist.sample - }, delta = delta) + val tdo = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) val tdi = roundTripSerDe(tdo) diff --git a/src/test/scala/org/isarnproject/sketches/java/JavaTDigestTest.scala b/src/test/scala/org/isarnproject/sketches/java/JavaTDigestTest.scala index 69cb489..9d5f320 100644 --- a/src/test/scala/org/isarnproject/sketches/java/JavaTDigestTest.scala +++ b/src/test/scala/org/isarnproject/sketches/java/JavaTDigestTest.scala @@ -52,12 +52,8 @@ class JavaTDigestTest extends AsyncWordSpec with Matchers { } def testSamplingPDF(td: TDigest, dist: RealDistribution): Boolean = { - val tdSamples = Array.fill(10000) { - td.samplePDF - } - val distSamples = Array.fill(10000) { - dist.sample - } + val tdSamples = Array.fill(10000) { td.samplePDF } + val distSamples = Array.fill(10000) { dist.sample } val kst = new org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest() val d = kst.kolmogorovSmirnovStatistic(tdSamples, distSamples) val pass = d <= maxD @@ -67,12 +63,8 @@ class JavaTDigestTest extends AsyncWordSpec with Matchers { def testSamplingPMF(td: TDigest, dist: IntegerDistribution): Boolean = { td.nclusters should be <= (td.maxDiscrete) - val tdSamples = Array.fill(10000) { - td.samplePMF - } - val distSamples = Array.fill(10000) { - dist.sample.toDouble - } + val tdSamples = Array.fill(10000) { td.samplePMF } + val distSamples = Array.fill(10000) { dist.sample.toDouble } val kst = new org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest() val d = kst.kolmogorovSmirnovStatistic(tdSamples, distSamples) val pass = d <= maxD @@ -83,18 +75,14 @@ class JavaTDigestTest extends AsyncWordSpec with Matchers { def testDistribution(dist: RealDistribution, stdv: Double): Boolean = { dist.reseedRandomGenerator(seed) - val td = TDigest.sketch(Array.fill(ss) { - dist.sample - }, delta) + val td = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) testTDvsDist(td, dist, stdv) && testSamplingPDF(td, dist) } def testMonotoneCDF(dist: RealDistribution): Boolean = { dist.reseedRandomGenerator(seed) - val td = TDigest.sketch(Array.fill(ss) { - dist.sample - }, delta) + val td = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) val (xmin, xmax) = (td.cent(0), td.cent(td.nclusters - 1)) val step = (xmax - xmin) / 100000 val t = (xmin to xmax by step).iterator.map(x => td.cdf(x)).sliding(2).map(w => w(1) - w(0)).min @@ -105,9 +93,7 @@ class JavaTDigestTest extends AsyncWordSpec with Matchers { def testMonotoneCDFI(dist: RealDistribution): Boolean = { dist.reseedRandomGenerator(seed) - val td = TDigest.sketch(Array.fill(ss) { - dist.sample - }, delta) + val td = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) val (xmin, xmax) = (0.0, 1.0) val step = (xmax - xmin) / 100000 val t = (xmin to xmax by step).iterator.map(q => td.cdfInverse(q)).sliding(2).map(w => w(1) - w(0)).min @@ -144,12 +130,8 @@ class JavaTDigestTest extends AsyncWordSpec with Matchers { val dist = new NormalDistribution() dist.reseedRandomGenerator(seed) - val td1 = TDigest.sketch(Array.fill(ss) { - dist.sample - }, delta) - val td2 = TDigest.sketch(Array.fill(ss) { - dist.sample - }, delta) + val td1 = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) + val td2 = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) testTDvsDist(TDigest.merge(td1, td2), dist, math.sqrt(dist.getNumericalVariance())) should be(true) } @@ -184,9 +166,7 @@ class JavaTDigestTest extends AsyncWordSpec with Matchers { "respect maxDiscrete parameter over merge" in { import org.apache.commons.math3.distribution.GeometricDistribution val gd = new GeometricDistribution(0.33) - val tdvec = Vector.fill(10) { - TDigest.sketch(gd.sample(100000).map(_.toDouble), delta, 50) - } + val tdvec = Vector.fill(10) { TDigest.sketch(gd.sample(100000).map(_.toDouble), delta, 50) } val td = tdvec.reduce((a, b) => TDigest.merge(a, b)) val clust = td.cent clust.map(_.toInt).map(_.toDouble).toVector should beEqSeq(clust.toVector) @@ -203,9 +183,7 @@ class JavaTDigestTest extends AsyncWordSpec with Matchers { val dist = new NormalDistribution() dist.reseedRandomGenerator(seed) - val data = Array.fill(ss) { - dist.sample - } + val data = Array.fill(ss) { dist.sample } val td1 = TDigest.sketch(data, delta) val td2 = new TDigest(td1) (td2.equals(td1)) should be(true) @@ -241,9 +219,7 @@ class JavaTDigestTest extends AsyncWordSpec with Matchers { val dist = new NormalDistribution() dist.reseedRandomGenerator(seed) - val data = Array.fill(ss) { - dist.sample - } + val data = Array.fill(ss) { dist.sample } // test constructing empty t-digests val td1 = new TDigest(0.5, 0, Array.empty[Double], Array.empty[Double]) @@ -287,9 +263,7 @@ class JavaTDigestTest extends AsyncWordSpec with Matchers { val dist = new NormalDistribution() dist.reseedRandomGenerator(seed) - val tdo = TDigest.sketch(Array.fill(ss) { - dist.sample - }, delta) + val tdo = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) val tdi = roundTripSerDe(tdo)