Skip to content

Commit

Permalink
Batch size fixes (#547)
Browse files Browse the repository at this point in the history
* Add tests that need to pass

* Batchsize fixes

* Upper bound Mooncake versoin

* Don't test on 1.11

* Docs on 1.10

* Test on pre

* DIT
  • Loading branch information
gdalle authored Oct 8, 2024
1 parent 88c48c1 commit e52dddf
Show file tree
Hide file tree
Showing 20 changed files with 145 additions and 104 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/Documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@v2
with:
version: '1'
version: '1.10' # TODO: 1
- uses: julia-actions/cache@v1
- name: Install dependencies
run: julia --project=${{ matrix.pkg.dir}}/docs/ -e '
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/Test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
fail-fast: true
matrix:
version:
- "1"
- "1.10" # TODO: 1 (as of 2024.10.08, 1 means 1.11 and we're not ready yet)
- "lts"
- "pre"
group:
Expand Down Expand Up @@ -134,7 +134,7 @@ jobs:
fail-fast: true
matrix:
version:
- "1"
- "1.10" # TODO: 1
- "lts"
- "pre"
group:
Expand Down
4 changes: 2 additions & 2 deletions DifferentiationInterface/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DifferentiationInterface"
uuid = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
authors = ["Guillaume Dalle", "Adrian Hill"]
version = "0.6.6"
version = "0.6.7"

[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
Expand Down Expand Up @@ -56,7 +56,7 @@ ForwardDiff = "0.10.36"
LinearAlgebra = "<0.0.1,1"
Mooncake = "0.4.0"
PackageExtensionCompat = "1.0.2"
PolyesterForwardDiff = "0.1.1"
PolyesterForwardDiff = "0.1.2"
ReverseDiff = "1.15.1"
SparseArrays = "<0.0.1,1"
SparseConnectivityTracer = "0.5.0,0.6"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ using DifferentiationInterface:
JacobianPrep,
PullbackPrep,
PushforwardPrep,
SecondDerivativePrep,
dense_ad
SecondDerivativePrep
using FastDifferentiation:
derivative,
hessian,
Expand All @@ -33,6 +32,9 @@ monovec(x::Number) = [x]
myvec(x::Number) = monovec(x)
myvec(x::AbstractArray) = vec(x)

dense_ad(backend::AutoFastDifferentiation) = backend
dense_ad(backend::AutoSparse{<:AutoFastDifferentiation}) = ADTypes.dense_ad(backend)

include("onearg.jl")
include("twoarg.jl")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,22 @@ using LinearAlgebra: dot, mul!

DI.check_available(::AutoForwardDiff) = true

DI.pick_batchsize(::AutoForwardDiff{C}, dimension::Integer) where {C} = Val(C)
function DI.pick_batchsize(
::AutoForwardDiff{chunksize}, dimension::Integer
) where {chunksize}
return Val{chunksize}()
end

function DI.pick_batchsize(::AutoForwardDiff{nothing}, dimension::Integer)
# type-unstable
return Val(ForwardDiff.pickchunksize(dimension))
end

function DI.threshold_batchsize(backend::AutoForwardDiff{C1}, C2::Integer) where {C1}
C = (C1 === nothing) ? nothing : min(C1, C2)
return AutoForwardDiff(; chunksize=C, tag=backend.tag)
function DI.threshold_batchsize(
backend::AutoForwardDiff{chunksize1}, chunksize2::Integer
) where {chunksize1}
chunksize = (chunksize1 === nothing) ? nothing : min(chunksize1, chunksize2)
return AutoForwardDiff(; chunksize, tag=backend.tag)
end

include("utils.jl")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
choose_chunk(::AutoForwardDiff{nothing}, x) = Chunk(x)
choose_chunk(::AutoForwardDiff{C}, x) where {C} = Chunk{C}()
choose_chunk(::AutoForwardDiff{chunksize}, x) where {chunksize} = Chunk{chunksize}()

tag_type(f, ::AutoForwardDiff{C,T}, x) where {C,T} = T
tag_type(f, ::AutoForwardDiff{C,Nothing}, x) where {C} = typeof(Tag(f, eltype(x)))
tag_type(f, ::AutoForwardDiff{chunksize,T}, x) where {chunksize,T} = T

function tag_type(f, ::AutoForwardDiff{chunksize,Nothing}, x) where {chunksize}
return typeof(Tag(f, eltype(x)))
end

function make_dual_similar(::Type{T}, x::Number, tx::NTuple{B}) where {T,B}
return Dual{T}(x, tx...)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ using PolyesterForwardDiff: threaded_gradient!, threaded_jacobian!
using PolyesterForwardDiff.ForwardDiff: Chunk
using PolyesterForwardDiff.ForwardDiff.DiffResults: DiffResults

function single_threaded(backend::AutoPolyesterForwardDiff{C,T}) where {C,T}
return AutoForwardDiff{C,T}(backend.tag)
function single_threaded(backend::AutoPolyesterForwardDiff{chunksize,T}) where {chunksize,T}
return AutoForwardDiff{chunksize,T}(backend.tag)
end

DI.check_available(::AutoPolyesterForwardDiff) = true
Expand All @@ -33,10 +33,10 @@ function DI.pick_batchsize(backend::AutoPolyesterForwardDiff, dimension::Integer
end

function DI.threshold_batchsize(
backend::AutoPolyesterForwardDiff{C1}, C2::Integer
) where {C1}
C = (C1 === nothing) ? nothing : min(C1, C2)
return AutoPolyesterForwardDiff(; chunksize=C, tag=backend.tag)
backend::AutoPolyesterForwardDiff{chunksize1}, chunksize2::Integer
) where {chunksize1}
chunksize = (chunksize1 === nothing) ? nothing : min(chunksize1, chunksize2)
return AutoPolyesterForwardDiff(; chunksize, tag=backend.tag)
end

include("onearg.jl")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,109 +107,113 @@ end

## Gradient

struct PolyesterForwardDiffGradientPrep{chunksize} <: GradientPrep
chunk::Chunk{chunksize}
end

function DI.prepare_gradient(
f, backend::AutoPolyesterForwardDiff, x, contexts::Vararg{Context,C}
) where {C}
return DI.prepare_gradient(f, single_threaded(backend), x, contexts...)
f, ::AutoPolyesterForwardDiff{chunksize}, x, contexts::Vararg{Context,C}
) where {chunksize,C}
if isnothing(chunksize)
chunk = Chunk(x)
else
chunk = Chunk{chunksize}()
end
return PolyesterForwardDiffGradientPrep(chunk)
end

function DI.value_and_gradient!(
f,
grad,
::GradientPrep,
::AutoPolyesterForwardDiff{K},
x::AbstractVector,
prep::PolyesterForwardDiffGradientPrep,
::AutoPolyesterForwardDiff,
x,
contexts::Vararg{Context,C},
) where {K,C}
) where {C}
fc = with_contexts(f, contexts...)
threaded_gradient!(fc, grad, x, Chunk{K}())
threaded_gradient!(fc, grad, x, prep.chunk)
return fc(x), grad
end

function DI.gradient!(
f,
grad,
::GradientPrep,
::AutoPolyesterForwardDiff{K},
x::AbstractVector,
prep::PolyesterForwardDiffGradientPrep,
::AutoPolyesterForwardDiff,
x,
contexts::Vararg{Context,C},
) where {K,C}
) where {C}
fc = with_contexts(f, contexts...)
threaded_gradient!(fc, grad, x, Chunk{K}())
threaded_gradient!(fc, grad, x, prep.chunk)
return grad
end

function DI.value_and_gradient!(
function DI.value_and_gradient(
f,
grad,
prep::GradientPrep,
prep::PolyesterForwardDiffGradientPrep,
backend::AutoPolyesterForwardDiff,
x,
contexts::Vararg{Context,C},
) where {C}
return DI.value_and_gradient!(f, grad, prep, single_threaded(backend), x, contexts...)
return DI.value_and_gradient!(f, similar(x), prep, backend, x, contexts...)
end

function DI.gradient!(
function DI.gradient(
f,
grad,
prep::GradientPrep,
prep::PolyesterForwardDiffGradientPrep,
backend::AutoPolyesterForwardDiff,
x,
contexts::Vararg{Context,C},
) where {C}
return DI.gradient!(f, grad, prep, single_threaded(backend), x, contexts...)
end

function DI.value_and_gradient(
f, prep::GradientPrep, backend::AutoPolyesterForwardDiff, x, contexts::Vararg{Context,C}
) where {C}
return DI.value_and_gradient!(f, similar(x), prep, backend, x, contexts...)
end

function DI.gradient(
f, prep::GradientPrep, backend::AutoPolyesterForwardDiff, x, contexts::Vararg{Context,C}
) where {C}
return DI.gradient!(f, similar(x), prep, backend, x, contexts...)
end

## Jacobian

struct PolyesterForwardDiffOneArgJacobianPrep{chunksize} <: JacobianPrep
chunk::Chunk{chunksize}
end

function DI.prepare_jacobian(
f, ::AutoPolyesterForwardDiff, x, contexts::Vararg{Context,C}
) where {C}
return NoJacobianPrep()
f, ::AutoPolyesterForwardDiff{chunksize}, x, contexts::Vararg{Context,C}
) where {chunksize,C}
if isnothing(chunksize)
chunk = Chunk(x)
else
chunk = Chunk{chunksize}()
end
return PolyesterForwardDiffOneArgJacobianPrep(chunk)
end

function DI.value_and_jacobian!(
f,
jac::AbstractMatrix,
::NoJacobianPrep,
::AutoPolyesterForwardDiff{K},
x::AbstractArray,
jac,
prep::PolyesterForwardDiffOneArgJacobianPrep,
::AutoPolyesterForwardDiff,
x,
contexts::Vararg{Context,C},
) where {K,C}
) where {C}
fc = with_contexts(f, contexts...)
return fc(x), threaded_jacobian!(fc, jac, x, Chunk{K}())
return fc(x), threaded_jacobian!(fc, jac, x, prep.chunk)
end

function DI.jacobian!(
f,
jac::AbstractMatrix,
::NoJacobianPrep,
::AutoPolyesterForwardDiff{K},
x::AbstractArray,
jac,
prep::PolyesterForwardDiffOneArgJacobianPrep,
::AutoPolyesterForwardDiff,
x,
contexts::Vararg{Context,C},
) where {K,C}
) where {C}
fc = with_contexts(f, contexts...)
return threaded_jacobian!(fc, jac, x, Chunk{K}())
return threaded_jacobian!(fc, jac, x, prep.chunk)
end

function DI.value_and_jacobian(
f,
prep::NoJacobianPrep,
prep::PolyesterForwardDiffOneArgJacobianPrep,
backend::AutoPolyesterForwardDiff,
x::AbstractArray,
x,
contexts::Vararg{Context,C},
) where {C}
y = f(x, map(unwrap, contexts)...)
Expand All @@ -220,9 +224,9 @@ end

function DI.jacobian(
f,
prep::NoJacobianPrep,
prep::PolyesterForwardDiffOneArgJacobianPrep,
backend::AutoPolyesterForwardDiff,
x::AbstractArray,
x,
contexts::Vararg{Context,C},
) where {C}
y = f(x, map(unwrap, contexts)...)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,32 @@ end

## Jacobian

struct PolyesterForwardDiffTwoArgJacobianPrep{chunksize} <: JacobianPrep
chunk::Chunk{chunksize}
end

function DI.prepare_jacobian(
f!, y, ::AutoPolyesterForwardDiff, x, contexts::Vararg{Context,C}
) where {C}
return NoJacobianPrep()
f!, y, ::AutoPolyesterForwardDiff{chunksize}, x, contexts::Vararg{Context,C}
) where {chunksize,C}
if isnothing(chunksize)
chunk = Chunk(x)
else
chunk = Chunk{chunksize}()
end
return PolyesterForwardDiffTwoArgJacobianPrep(chunk)
end

function DI.value_and_jacobian(
f!, y, ::NoJacobianPrep, ::AutoPolyesterForwardDiff{K}, x, contexts::Vararg{Context,C}
f!,
y,
prep::PolyesterForwardDiffTwoArgJacobianPrep,
::AutoPolyesterForwardDiff{K},
x,
contexts::Vararg{Context,C},
) where {K,C}
fc! = with_contexts(f!, contexts...)
jac = similar(y, length(y), length(x))
threaded_jacobian!(fc!, y, jac, x, Chunk{K}())
threaded_jacobian!(fc!, y, jac, x, prep.chunk)
fc!(y, x)
return y, jac
end
Expand All @@ -138,36 +152,41 @@ function DI.value_and_jacobian!(
f!,
y,
jac,
::NoJacobianPrep,
prep::PolyesterForwardDiffTwoArgJacobianPrep,
::AutoPolyesterForwardDiff{K},
x,
contexts::Vararg{Context,C},
) where {K,C}
fc! = with_contexts(f!, contexts...)
threaded_jacobian!(fc!, y, jac, x, Chunk{K}())
threaded_jacobian!(fc!, y, jac, x, prep.chunk)
fc!(y, x)
return y, jac
end

function DI.jacobian(
f!, y, ::NoJacobianPrep, ::AutoPolyesterForwardDiff{K}, x, contexts::Vararg{Context,C}
) where {K,C}
f!,
y,
prep::PolyesterForwardDiffTwoArgJacobianPrep,
::AutoPolyesterForwardDiff,
x,
contexts::Vararg{Context,C},
) where {C}
fc! = with_contexts(f!, contexts...)
jac = similar(y, length(y), length(x))
threaded_jacobian!(fc!, y, jac, x, Chunk{K}())
threaded_jacobian!(fc!, y, jac, x, prep.chunk)
return jac
end

function DI.jacobian!(
f!,
y,
jac,
::NoJacobianPrep,
::AutoPolyesterForwardDiff{K},
prep::PolyesterForwardDiffTwoArgJacobianPrep,
::AutoPolyesterForwardDiff,
x,
contexts::Vararg{Context,C},
) where {K,C}
) where {C}
fc! = with_contexts(f!, contexts...)
threaded_jacobian!(fc!, y, jac, x, Chunk{K}())
threaded_jacobian!(fc!, y, jac, x, prep.chunk)
return jac
end
Loading

0 comments on commit e52dddf

Please sign in to comment.