Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
init

init

init

init

init

init

init

init

init

init

use nativeBLAS for dense input

add py

refactor

refactor

refactor

nit

revert BLAS.ger

revert BLAS.ger

revert BLAS.ger

nit

nit

simplify
  • Loading branch information
zhengruifeng committed May 6, 2020
1 parent ebdf41d commit 473e5a2
Show file tree
Hide file tree
Showing 6 changed files with 312 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ private[spark] object BLAS extends Serializable {
}

/**
* Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's ?SPR.
* Adds alpha * v * v.t to a matrix in-place. This is the same as BLAS's ?SPR.
*
* @param U the upper triangular part of the matrix packed in an array (column major)
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class MultivariateGaussian @Since("2.0.0") (
*/
@transient private lazy val tuple = {
val (rootSigmaInv, u) = calculateCovarianceConstants
val rootSigmaInvMat = Matrices.fromBreeze(rootSigmaInv)
val rootSigmaInvMat = Matrices.fromBreeze(rootSigmaInv).toDense
val rootSigmaInvMulMu = rootSigmaInvMat.multiply(mean)
(rootSigmaInvMat, u, rootSigmaInvMulMu)
}
Expand All @@ -81,6 +81,43 @@ class MultivariateGaussian @Since("2.0.0") (
u - 0.5 * BLAS.dot(v, v)
}

private[ml] def pdf(X: Matrix): Vector = {
val m = X.numRows
val n = X.numCols
val mat = new DenseMatrix(m, n, Array.ofDim[Double](m * n))
pdf(X, mat)
}

private[ml] def pdf(X: Matrix, mat: DenseMatrix): Vector = {
require(!mat.isTransposed)
val localU = u
val localRootSigmaInvMat = rootSigmaInvMat
val localRootSigmaInvMulMu = rootSigmaInvMulMu.toArray

BLAS.gemm(1.0, X, localRootSigmaInvMat.transpose, 0.0, mat)
val arr = mat.values
val m = mat.numRows
val n = mat.numCols

val pdfArr = Array.ofDim[Double](m)
var i = 0
while (i < m) {
var squaredSum = 0.0
var index = i
var j = 0
while (j < n) {
val d = arr(index) - localRootSigmaInvMulMu(j)
squaredSum += d * d
index += m
j += 1
}
pdfArr(i) = math.exp(localU - 0.5 * squaredSum)
i += 1
}

Vectors.dense(pdfArr)
}

/**
* Calculate distribution dependent components used for the density function:
* pdf(x) = (2*pi)^(-k/2)^ * det(sigma)^(-1/2)^ * exp((-1/2) * (x-mu).t * inv(sigma) * (x-mu))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class MultivariateGaussianSuite extends SparkMLFunSuite {
test("univariate") {
val x1 = Vectors.dense(0.0)
val x2 = Vectors.dense(1.5)
val mat = Matrices.fromVectors(Seq(x1, x2))

val mu = Vectors.dense(0.0)
val sigma1 = Matrices.dense(1, 1, Array(1.0))
Expand All @@ -35,18 +36,21 @@ class MultivariateGaussianSuite extends SparkMLFunSuite {
assert(dist1.logpdf(x2) ~== -2.0439385332046727 absTol 1E-5)
assert(dist1.pdf(x1) ~== 0.39894 absTol 1E-5)
assert(dist1.pdf(x2) ~== 0.12952 absTol 1E-5)
assert(dist1.pdf(mat) ~== Vectors.dense(0.39894, 0.12952) absTol 1E-5)

val sigma2 = Matrices.dense(1, 1, Array(4.0))
val dist2 = new MultivariateGaussian(mu, sigma2)
assert(dist2.logpdf(x1) ~== -1.612085713764618 absTol 1E-5)
assert(dist2.logpdf(x2) ~== -1.893335713764618 absTol 1E-5)
assert(dist2.pdf(x1) ~== 0.19947 absTol 1E-5)
assert(dist2.pdf(x2) ~== 0.15057 absTol 1E-5)
assert(dist2.pdf(mat) ~== Vectors.dense(0.19947, 0.15057) absTol 1E-5)
}

test("multivariate") {
val x1 = Vectors.dense(0.0, 0.0)
val x2 = Vectors.dense(1.0, 1.0)
val mat = Matrices.fromVectors(Seq(x1, x2))

val mu = Vectors.dense(0.0, 0.0)
val sigma1 = Matrices.dense(2, 2, Array(1.0, 0.0, 0.0, 1.0))
Expand All @@ -55,28 +59,33 @@ class MultivariateGaussianSuite extends SparkMLFunSuite {
assert(dist1.logpdf(x2) ~== -2.8378770664093453 absTol 1E-5)
assert(dist1.pdf(x1) ~== 0.15915 absTol 1E-5)
assert(dist1.pdf(x2) ~== 0.05855 absTol 1E-5)
assert(dist1.pdf(mat) ~== Vectors.dense(0.15915, 0.05855) absTol 1E-5)

val sigma2 = Matrices.dense(2, 2, Array(4.0, -1.0, -1.0, 2.0))
val dist2 = new MultivariateGaussian(mu, sigma2)
assert(dist2.logpdf(x1) ~== -2.810832140937002 absTol 1E-5)
assert(dist2.logpdf(x2) ~== -3.3822607123655732 absTol 1E-5)
assert(dist2.pdf(x1) ~== 0.060155 absTol 1E-5)
assert(dist2.pdf(x2) ~== 0.033971 absTol 1E-5)
assert(dist2.pdf(mat) ~== Vectors.dense(0.060155, 0.033971) absTol 1E-5)
}

test("multivariate degenerate") {
val x1 = Vectors.dense(0.0, 0.0)
val x2 = Vectors.dense(1.0, 1.0)
val mat = Matrices.fromVectors(Seq(x1, x2))

val mu = Vectors.dense(0.0, 0.0)
val sigma = Matrices.dense(2, 2, Array(1.0, 1.0, 1.0, 1.0))
val dist = new MultivariateGaussian(mu, sigma)
assert(dist.pdf(x1) ~== 0.11254 absTol 1E-5)
assert(dist.pdf(x2) ~== 0.068259 absTol 1E-5)
assert(dist.pdf(mat) ~== Vectors.dense(0.11254, 0.068259) absTol 1E-5)
}

test("SPARK-11302") {
val x = Vectors.dense(629, 640, 1.7188, 618.19)
val mat = Matrices.fromVectors(Seq(x))
val mu = Vectors.dense(
1055.3910505836575, 1070.489299610895, 1.39020554474708, 1040.5907503867697)
val sigma = Matrices.dense(4, 4, Array(
Expand All @@ -87,5 +96,6 @@ class MultivariateGaussianSuite extends SparkMLFunSuite {
val dist = new MultivariateGaussian(mu, sigma)
// Agrees with R's dmvnorm: 7.154782e-05
assert(dist.pdf(x) ~== 7.154782224045512E-5 absTol 1E-9)
assert(dist.pdf(mat) ~== Vectors.dense(7.154782224045512E-5) absTol 1E-5)
}
}
Loading

0 comments on commit 473e5a2

Please sign in to comment.