diff --git a/src/glmfit.jl b/src/glmfit.jl index 236ef5ba..58cc4321 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -66,11 +66,16 @@ function wrkwt(r::GlmResp) end type GlmMod <: LinPredModel - fr::ModelFrame rr::GlmResp pp::LinPred - ff::Formula fit::Bool + + # optional + fr::ModelFrame + ff::Formula + + GlmMod(rr::GlmResp, pp::LinPred, fit::Bool, fr::ModelFrame, ff::Formula) = new(rr, pp, fit, fr, ff) + GlmMod(rr::GlmResp, pp::LinPred, fit::Bool) = new(rr, pp, fit) end function coeftable(mm::GlmMod) @@ -79,7 +84,7 @@ function coeftable(mm::GlmMod) zz = cc ./ se CoefTable(hcat(cc,se,zz,2.0 * ccdf(Normal(), abs(zz))), ["Estimate","Std.Error","z value", "Pr(>|z|)"], - coefnames(mm.fr), 4) + isdefined(mm, :fr) ? coefnames(mm.fr) : ["x$i" for i = 1:size(mm.pp.X, 2)], 4) end function confint(obj::GlmMod, level::Real) @@ -95,7 +100,7 @@ function fit(m::GlmMod; verbose::Bool=false, maxIter::Integer=30, minStepFac::Re 0 < minStepFac < 1 || error("minStepFac must be in (0, 1)") cvg = false; p = m.pp; r = m.rr - scratch = similar(p.X.m) + scratch = similar(p.X) devold = updatemu!(r, linpred(delbeta!(p, wrkresp(r), wrkwt(r), scratch))) installbeta!(p) for i=1:maxIter @@ -116,7 +121,34 @@ function fit(m::GlmMod; verbose::Bool=false, maxIter::Integer=30, minStepFac::Re m end -function glm(f::Formula, df::AbstractDataFrame, d::UnivariateDistribution, l::Link; dofit::Bool=true, wts=Float64[], offset=Float64[]) +function fit(m::GlmMod, y; wts=nothing, offset=nothing, args...) + r = m.rr + V = typeof(r.y) + r.y = isa(y, V) ? copy(y) : convert(V, y) + wts == nothing || (r.wts = isa(wts, V) ? copy(wts) : convert(V, wts)) + offset == nothing || (r.offset = isa(offset, V) ? copy(offset) : convert(V, offset)) + m.fit = false + fit(m; args...) +end + +function glm{T<:FloatingPoint,V<:FPStoredVector}(X::Matrix{T}, y::V, d::UnivariateDistribution, + l::Link=canonicallink(d); + dofit::Bool=true, + wts::V=fill!(similar(y), one(eltype(y))), + offset::V=similar(y, 0)) + size(X, 1) == size(y, 1) || DimensionMismatch("number of rows in X and y must match") + n = length(y) + length(wts) == n || error("length(wts) = $lw should be 0 or $n") + mu = mustart(d, y, wts) + rr = GlmResp{typeof(y)}(y, d, l, linkfun!(l, similar(mu), mu), mu, offset, wts) + res = GlmMod(rr, DensePredChol(X), false) + dofit ? fit(res) : res +end + +glm(X::Matrix, y::AbstractVector, args...; kwargs...) = glm(float(X), float(y), args...; kwargs...) + +function glm(f::Formula, df::AbstractDataFrame, d::UnivariateDistribution, l::Link=canonicallink(d); + dofit::Bool=true, wts=Float64[], offset=Float64[]) mf = ModelFrame(f, df) mm = ModelMatrix(mf) y = model_response(mf); T = eltype(y); @@ -130,13 +162,12 @@ function glm(f::Formula, df::AbstractDataFrame, d::UnivariateDistribution, l::Li mu = mustart(d, y, w) off = T <: Float64 ? copy(offset) : convert(Vector{T}, offset) rr = GlmResp{typeof(y)}(y, d, l, linkfun!(l, similar(mu), mu), mu, off, w) - res = GlmMod(mf, rr, DensePredChol(mm), f, false) + res = GlmMod(rr, DensePredChol(mm.m), false, mf, f) dofit ? fit(res) : res end glm(e::Expr, df::AbstractDataFrame, d::UnivariateDistribution, l::Link) = glm(Formula(e),df,d,l) glm(e::Expr, df::AbstractDataFrame, d::UnivariateDistribution) = glm(Formula(e),df,d,canonicallink(d)) -glm(f::Formula, df::AbstractDataFrame, d::UnivariateDistribution) = glm(f, df, d, canonicallink(d)) glm(s::String, df::AbstractDataFrame, d::UnivariateDistribution) = glm(Formula(parse(s)[1]), df, d) ## scale(m) -> estimate, s, of the scale parameter diff --git a/src/linpred.jl b/src/linpred.jl index bdb3f7fb..e15ae00b 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -1,7 +1,7 @@ ## Return the linear predictor vector -linpred(p::LinPred, f::Real=1.) = p.X.m * (f == 0. ? p.beta0 : fma(p.beta0, p.delbeta, f)) +linpred(p::LinPred, f::Real=1.) = p.X * (f == 0. ? p.beta0 : fma(p.beta0, p.delbeta, f)) ## Install beta0 + f*delbeta as beta0 and zero out delbeta function installbeta!(p::LinPred, f::Real=1.) @@ -13,45 +13,45 @@ end typealias BlasReal Union(Float32,Float64) type DensePredQR{T<:BlasReal} <: DensePred - X::ModelMatrix{T} # model matrix + X::Matrix{T} # model matrix beta0::Vector{T} # base coefficient vector delbeta::Vector{T} # coefficient increment qr::QRCompactWY{T} - function DensePredQR(X::ModelMatrix{T}, beta0::Vector{T}) + function DensePredQR(X::Matrix{T}, beta0::Vector{T}) n,p = size(X); length(beta0) == p || error("dimension mismatch") - new(X, beta0, zeros(T,p), qrfact(X.m)) + new(X, beta0, zeros(T,p), qrfact(X)) end end -DensePredQR{T<:BlasReal}(X::ModelMatrix{T}) = DensePredQR{T}(X, zeros(T,size(X,2))) +DensePredQR{T<:BlasReal}(X::Matrix{T}) = DensePredQR{T}(X, zeros(T,size(X,2))) cholfact{T<:FP}(p::DensePredQR{T}) = Cholesky{T}(p.qr[:R],'U') delbeta!{T<:BlasReal}(p::DensePredQR{T}, r::Vector{T}) = (p.delbeta = p.qr\r; p) type DensePredChol{T<:BlasReal} <: DensePred - X::ModelMatrix{T} # model matrix - beta0::Vector{T} # base vector for coefficients - delbeta::Vector{T} # coefficient increment + X::Matrix{T} # model matrix + beta0::Vector{T} # base vector for coefficients + delbeta::Vector{T} # coefficient increment chol::Cholesky{T} - function DensePredChol(X::ModelMatrix{T}, beta0::Vector{T}) + function DensePredChol(X::Matrix{T}, beta0::Vector{T}) n,p = size(X); length(beta0) == p || error("dimension mismatch") - new(X, beta0, zeros(T,p), cholfact(X.m'X.m)) + new(X, beta0, zeros(T,p), cholfact(X'X)) end end -DensePredChol{T<:BlasReal}(X::ModelMatrix{T}) = DensePredChol{T}(X, zeros(T,size(X,2))) +DensePredChol{T<:BlasReal}(X::Matrix{T}) = DensePredChol{T}(X, zeros(T,size(X,2))) solve!{T<:BlasReal}(C::Cholesky{T}, B::StridedVecOrMat{T}) = potrs!(C.uplo, C.UL, B) cholfact{T<:FP}(p::DensePredChol{T}) = (c = p.chol; Cholesky{T}(copy(c.UL),c.uplo)) function delbeta!{T<:BlasReal}(p::DensePredChol{T}, r::Vector{T}) - solve!(p.chol, gemv!('T', 1.0, p.X.m, r, 0.0, p.delbeta)) + solve!(p.chol, gemv!('T', 1.0, p.X, r, 0.0, p.delbeta)) p end function delbeta!{T<:BlasReal}(p::DensePredChol{T}, r::Vector{T}, wt::Vector{T}, scr::Matrix{T}) - vbroadcast!(Multiply(), scr, p.X.m, wt, 1) - fac, info = potrf!('U', gemm!('T', 'N', 1.0, scr, p.X.m, 0.0, p.chol.UL)) + vbroadcast!(Multiply(), scr, p.X, wt, 1) + fac, info = potrf!('U', gemm!('T', 'N', 1.0, scr, p.X, 0.0, p.chol.UL)) info == 0 || error("Singularity detected at column $info of weighted model matrix") solve!(p.chol, gemv!('T', 1.0, scr, r, 0.0, p.delbeta)) p @@ -73,8 +73,8 @@ cor(x::LinPredModel) = (invstd = map(RcpFun(),stderr(x)); scale!(invstd,scale!(v stderr(x::LinPredModel) = sqrt(diag(vcov(x))) function show(io::IO, obj::LinPredModel) - println(io, "$(obj.ff)\n\nCoefficients:") - print(io, coeftable(obj)) + println(io, isdefined(obj, :ff) ? obj.ff : "Explicitly specified design matrix", + "\n\nCoefficients:", coeftable(obj)) end ## function show(io::IO, obj::GlmMod) diff --git a/src/lm.jl b/src/lm.jl index 86082ec5..fb7e0158 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -37,7 +37,7 @@ cholfact(x::LmMod) = cholfact(x.pp) function lm(f::Formula, df::AbstractDataFrame) mf = ModelFrame(f, df); mm = ModelMatrix(mf) - rr = LmResp(model_response(mf)); pp = DensePredQR(mm) + rr = LmResp(model_response(mf)); pp = DensePredQR(mm.m) installbeta!(delbeta!(pp, rr.y)); updatemu!(rr, linpred(pp,0.)) LmMod(mf, rr, pp, f) end @@ -46,7 +46,7 @@ lm(f::String, df::AbstractDataFrame) = lm(Formula(parse(f)[1]), df) function lmc(f::Formula, df::AbstractDataFrame) mf = ModelFrame(f, df); mm = ModelMatrix(mf) - rr = LmResp(model_response(mf)); pp = DensePredChol(mm) + rr = LmResp(model_response(mf)); pp = DensePredChol(mm.m) installbeta!(delbeta!(pp, rr.y)); updatemu!(rr, linpred(pp,0.)) LmMod(mf, rr, pp, f) end diff --git a/test/glmFit.jl b/test/glmFit.jl index e2967262..a262864f 100644 --- a/test/glmFit.jl +++ b/test/glmFit.jl @@ -29,3 +29,15 @@ gm4 = glm(admit ~ gre + gpa + rank, df, Binomial(), CauchitLink()) gm5 = glm(admit ~ gre + gpa + rank, df, Binomial(), CloglogLink()) @test_approx_eq deviance(gm5) 458.89439629612616 +mf = ModelFrame(admit ~ gre + gpa + rank, df) +X = ModelMatrix(mf).m +gm6 = glm(X, model_response(mf), Binomial()) +@test_approx_eq deviance(gm2) 458.5174924758994 +@test_approx_eq coef(gm2) [-3.9899786606380734,0.0022644256521549043,0.8040374535155766,-0.6754428594116577,-1.3402038117481079,-1.5514636444657492] + +y = rand(0:1, size(X, 1)) +fit(gm6, y) + +gm7 = glm(X, y, Binomial()) +@test_approx_eq_eps deviance(gm6) deviance(gm7) 1e-6 +@test_approx_eq_eps coef(gm6) coef(gm7) 1e-6