Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

type safety improved #10

Merged
merged 7 commits into from
Jul 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
*.jl.cov
*.jl.*.cov
*.jl.mem
*.jl.*.mem
.DS_Store
Manifest.toml
results.*.json
benchmark.md
40 changes: 2 additions & 38 deletions benchmark/benchmarks.jl
Original file line number Diff line number Diff line change
@@ -1,40 +1,6 @@
using BenchmarkTools

# Add parallel workers and make package available to workers
using Distributed, CSV, GLM, Distributions, Random, SparseArrays
import HurdleDMR
addprocs(Sys.CPU_THREADS-2)
@everywhere using HurdleDMR

pminmax(x, xmin, xmax) = max(xmin, min(xmax, x))

function bdata(n, p, d; seed=13, qmin=1e-4, qmax=1.0-1e-4)
Random.seed!(seed)

λ0 = 500
m = 1 .+ rand(Poisson(λ0*log(d)),n)
covars = rand(n,p)
ηfn(vi) = exp.([0 + λ0/d*log(1+i)*sum(vi) for i=1:d])

# raw category probabilities
q = [ηfn(covars[i,:]) for i=1:n]

# rescale once and chop extremes
for i=1:n
q[i] = pminmax.(q[i]/sum(q[i]), qmin, qmax)
end

# rescale again to make sure sums to 1
for i=1:n
q[i] ./= sum(q[i])
end

counts = convert(SparseMatrixCSC{Float64,Int},hcat(broadcast((qi,mi)->rand(Multinomial(mi, qi)),q,m)...)')
# projdir = p

covars, counts
end

include("helpers.jl")

const SUITE = BenchmarkGroup()

Expand All @@ -44,9 +10,7 @@ BenchmarkTools.DEFAULT_PARAMETERS.seconds = 60

n = 300
p = 3
# covars, counts = bdata(n, p, 10^4)
# sum(all(iszero.(counts), dims=1))
# @btime m = hdmr(covars, counts)
covars, counts = bdata(n, p, 10^2)

for d in 10 .^ (1:4)
SUITE[d] = BenchmarkGroup()
Expand Down
34 changes: 34 additions & 0 deletions benchmark/helpers.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Add parallel workers and make package available to workers
using Distributed, CSV, GLM, Distributions, Random, SparseArrays
import HurdleDMR
addprocs(Sys.CPU_THREADS-2)
@everywhere using HurdleDMR

pminmax(x, xmin, xmax) = max(xmin, min(xmax, x))

function bdata(n, p, d; seed=13, qmin=1e-4, qmax=1.0-1e-4)
Random.seed!(seed)

λ0 = 500
m = 1 .+ rand(Poisson(λ0*log(d)),n)
covars = rand(n,p)
ηfn(vi) = exp.([0 + λ0/d*log(1+i)*sum(vi) for i=1:d])

# raw category probabilities
q = [ηfn(covars[i,:]) for i=1:n]

# rescale once and chop extremes
for i=1:n
q[i] = pminmax.(q[i]/sum(q[i]), qmin, qmax)
end

# rescale again to make sure sums to 1
for i=1:n
q[i] ./= sum(q[i])
end

counts = convert(SparseMatrixCSC{Float64,Int},hcat(broadcast((qi,mi)->rand(Multinomial(mi, qi)),q,m)...)')
# projdir = p

covars, counts
end
17 changes: 11 additions & 6 deletions benchmark/judge.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
# a short exmple script comparing two branches
using BenchmarkTools, PkgBenchmark
import HurdleDMR

cd(@__DIR__)

# checkout master
benchmark = benchmarkpkg("HurdleDMR", "master")
#writeresults("results.master.json", benchmark)
#benchmark = readresults("results.master.json")
env = Dict("JULIA_NUM_THREADS" => Sys.CPU_THREADS-2)
pkg = joinpath(dirname(pathof(HurdleDMR)),"..")

# now switch branches and run
target = benchmarkpkg("HurdleDMR", "typesafe_segselect")
current = benchmarkpkg(pkg, BenchmarkConfig(env = env))
writeresults("results.current.json", current)

# checkout master
baseline = benchmarkpkg(pkg, BenchmarkConfig(id = "master", env = env))
writeresults("results.baseline.json", baseline)
#baseline = readresults("results.baseline.json")

export_markdown("benchmark.md", judge(target, benchmark))
export_markdown("benchmark.md", judge(current, baseline))
43 changes: 43 additions & 0 deletions benchmark/malloc.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
##############################
# memory allocation
##############################

# # first start julia with:
# julia --track-allocation=user malloc.jl <args>
# where args is e.g. true
local_cluster = parse(Bool, ARGS[1])
@info "tracking memory allocation with local_cluster=$local_cluster"

using BenchmarkTools, Profile, Traceur, InteractiveUtils, Lasso

include("helpers.jl")

kwargs = Dict(:verbose=>false)

n = 300
p = 3
covars, counts = bdata(n, p, 10)
@assert sum(all(iszero.(counts), dims=1)) == 0 "some columns are always zero"

@info "First run is to ensure that everything is compiled (because compilation allocates memory)."
hdmr(covars, counts; local_cluster=local_cluster, kwargs...)

@info "clear stuff contaminated by compilation"
Profile.clear_malloc_data()

@info "Run your commands again"
hdmr(covars, counts; local_cluster=local_cluster, kwargs...)

@info "done tracking memory allocation"

@info "Quit julia!"
exit()

# Finally, navigate to the directory holding your source code.
# Start julia (without command-line flags), and analyze the results using

# using Coverage
# analyze_malloc(".") # could be "." for the current directory, or "src", etc.

# This will return a vector of MallocInfo objects, specifying the number of bytes allocated
# , the file name, and the line number. These are sorted in increasing order of allocation size.
Loading