AsafManela · AsafManela · Jul 4, 2019 · Jun 28, 2019 · Jun 28, 2019 · Jun 28, 2019
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,7 @@
 *.jl.cov
 *.jl.*.cov
-*.jl.mem
+*.jl.*.mem
 .DS_Store
 Manifest.toml
+results.*.json
+benchmark.md
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
@@ -1,40 +1,6 @@
 using BenchmarkTools
 
-# Add parallel workers and make package available to workers
-using Distributed, CSV, GLM, Distributions, Random, SparseArrays
-import HurdleDMR
-addprocs(Sys.CPU_THREADS-2)
-@everywhere using HurdleDMR
-
-pminmax(x, xmin, xmax) = max(xmin, min(xmax, x))
-
-function bdata(n, p, d; seed=13, qmin=1e-4, qmax=1.0-1e-4)
-  Random.seed!(seed)
-
-  λ0 = 500
-  m = 1 .+ rand(Poisson(λ0*log(d)),n)
-  covars = rand(n,p)
-  ηfn(vi) = exp.([0 + λ0/d*log(1+i)*sum(vi) for i=1:d])
-
-  # raw category probabilities
-  q = [ηfn(covars[i,:]) for i=1:n]
-
-  # rescale once and chop extremes
-  for i=1:n
-    q[i] = pminmax.(q[i]/sum(q[i]), qmin, qmax)
-  end
-
-  # rescale again to make sure sums to 1
-  for i=1:n
-    q[i] ./= sum(q[i])
-  end
-
-  counts = convert(SparseMatrixCSC{Float64,Int},hcat(broadcast((qi,mi)->rand(Multinomial(mi, qi)),q,m)...)')
-  # projdir = p
-
-  covars, counts
-end
-
+include("helpers.jl")
 
 const SUITE = BenchmarkGroup()
 
@@ -44,9 +10,7 @@ BenchmarkTools.DEFAULT_PARAMETERS.seconds = 60
 
 n = 300
 p = 3
-# covars, counts = bdata(n, p, 10^4)
-# sum(all(iszero.(counts), dims=1))
-# @btime m = hdmr(covars, counts)
+covars, counts = bdata(n, p, 10^2)
 
 for d in 10 .^ (1:4)
   SUITE[d] = BenchmarkGroup()

diff --git a/benchmark/helpers.jl b/benchmark/helpers.jl
@@ -0,0 +1,34 @@
+# Add parallel workers and make package available to workers
+using Distributed, CSV, GLM, Distributions, Random, SparseArrays
+import HurdleDMR
+addprocs(Sys.CPU_THREADS-2)
+@everywhere using HurdleDMR
+
+pminmax(x, xmin, xmax) = max(xmin, min(xmax, x))
+
+function bdata(n, p, d; seed=13, qmin=1e-4, qmax=1.0-1e-4)
+  Random.seed!(seed)
+
+  λ0 = 500
+  m = 1 .+ rand(Poisson(λ0*log(d)),n)
+  covars = rand(n,p)
+  ηfn(vi) = exp.([0 + λ0/d*log(1+i)*sum(vi) for i=1:d])
+
+  # raw category probabilities
+  q = [ηfn(covars[i,:]) for i=1:n]
+
+  # rescale once and chop extremes
+  for i=1:n
+    q[i] = pminmax.(q[i]/sum(q[i]), qmin, qmax)
+  end
+
+  # rescale again to make sure sums to 1
+  for i=1:n
+    q[i] ./= sum(q[i])
+  end
+
+  counts = convert(SparseMatrixCSC{Float64,Int},hcat(broadcast((qi,mi)->rand(Multinomial(mi, qi)),q,m)...)')
+  # projdir = p
+
+  covars, counts
+end
diff --git a/benchmark/judge.jl b/benchmark/judge.jl
@@ -1,14 +1,19 @@
 # a short exmple script comparing two branches
 using BenchmarkTools, PkgBenchmark
+import HurdleDMR
 
 cd(@__DIR__)
 
-# checkout master
-benchmark = benchmarkpkg("HurdleDMR", "master")
-#writeresults("results.master.json", benchmark)
-#benchmark = readresults("results.master.json")
+env = Dict("JULIA_NUM_THREADS" => Sys.CPU_THREADS-2)
+pkg = joinpath(dirname(pathof(HurdleDMR)),"..")
 
 # now switch branches and run
-target = benchmarkpkg("HurdleDMR", "typesafe_segselect")
+current = benchmarkpkg(pkg, BenchmarkConfig(env = env))
+writeresults("results.current.json", current)
+
+# checkout master
+baseline = benchmarkpkg(pkg, BenchmarkConfig(id = "master", env = env))
+writeresults("results.baseline.json", baseline)
+#baseline = readresults("results.baseline.json")
 
-export_markdown("benchmark.md", judge(target, benchmark))
+export_markdown("benchmark.md", judge(current, baseline))
diff --git a/benchmark/malloc.jl b/benchmark/malloc.jl
@@ -0,0 +1,43 @@
+##############################
+# memory allocation
+##############################
+
+# # first start julia with:
+# julia --track-allocation=user malloc.jl <args>
+# where args is e.g. true
+local_cluster = parse(Bool, ARGS[1])
+@info "tracking memory allocation with local_cluster=$local_cluster"
+
+using BenchmarkTools, Profile, Traceur, InteractiveUtils, Lasso
+
+include("helpers.jl")
+
+kwargs = Dict(:verbose=>false)
+
+n = 300
+p = 3
+covars, counts = bdata(n, p, 10)
+@assert sum(all(iszero.(counts), dims=1)) == 0 "some columns are always zero"
+
+@info "First run is to ensure that everything is compiled (because compilation allocates memory)."
+hdmr(covars, counts; local_cluster=local_cluster, kwargs...)
+
+@info "clear stuff contaminated by compilation"
+Profile.clear_malloc_data()
+
+@info "Run your commands again"
+hdmr(covars, counts; local_cluster=local_cluster, kwargs...)
+
+@info "done tracking memory allocation"
+
+@info "Quit julia!"
+exit()
+
+# Finally, navigate to the directory holding your source code.
+# Start julia (without command-line flags), and analyze the results using
+
+# using Coverage
+# analyze_malloc(".")  # could be "." for the current directory, or "src", etc.
+
+# This will return a vector of MallocInfo objects, specifying the number of bytes allocated
+# , the file name, and the line number. These are sorted in increasing order of allocation size.