From 605b2c9cc3eb02f3697d0861ab610c18b4474ae4 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Fri, 6 Dec 2024 16:17:04 -0600 Subject: [PATCH 1/7] [CUFFT] Preallocate a buffer for complex-to-real FFT --- lib/cufft/fft.jl | 56 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/lib/cufft/fft.jl b/lib/cufft/fft.jl index 8751ae5749..8650f22f64 100644 --- a/lib/cufft/fft.jl +++ b/lib/cufft/fft.jl @@ -34,6 +34,7 @@ mutable struct CuFFTPlan{T<:cufftNumber,S<:cufftNumber,K,inplace,N} <: Plan{S} input_size::NTuple{N,Int} # Julia size of input array output_size::NTuple{N,Int} # Julia size of output array region::Any + buffer::CuArray{S} pinv::ScaledPlan{T} # required by AbstractFFTs API, will be defined by AbstractFFTs if needed function CuFFTPlan{T,S,K,inplace,N}(handle::cufftHandle, @@ -41,7 +42,8 @@ mutable struct CuFFTPlan{T<:cufftNumber,S<:cufftNumber,K,inplace,N} <: Plan{S} ) where {T<:cufftNumber,S<:cufftNumber,K,inplace,N} abs(K) == 1 || throw(ArgumentError("FFT direction must be either -1 (forward) or +1 (inverse)")) inplace isa Bool || throw(ArgumentError("FFT inplace argument must be a Bool")) - p = new{T,S,K,inplace,N}(handle, context(), stream(), input_size, output_size, region) + buffer = CuArray{S}(0) + p = new{T,S,K,inplace,N}(handle, context(), stream(), input_size, output_size, region, buffer) finalizer(unsafe_free!, p) p end @@ -50,7 +52,8 @@ end function CuFFTPlan{T,S,K,inplace,N}(handle::cufftHandle, X::DenseCuArray{S,N}, sizey::NTuple{N,Int}, region, ) where {T<:cufftNumber,S<:cufftNumber,K,inplace,N} - CuFFTPlan{T,S,K,inplace,N}(handle, size(X), sizey, region) + buffer = CuArray{S}(0) + CuFFTPlan{T,S,K,inplace,N}(handle, size(X), sizey, region, buffer) end function CUDA.unsafe_free!(plan::CuFFTPlan) @@ -60,6 +63,7 @@ function CUDA.unsafe_free!(plan::CuFFTPlan) end plan.handle = C_NULL end + CUDA.unsafe_free!(plan.buffer) end function showfftdims(io, sz, T) @@ -157,7 +161,8 @@ function plan_fft!(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - CuFFTPlan{T,T,K,inplace,N}(handle, X, size(X), region) + buffer = CuArray{T}(undef, 0) + CuFFTPlan{T,T,K,inplace,N}(handle, X, size(X), region, buffer) end @@ -170,7 +175,8 @@ function plan_bfft!(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - CuFFTPlan{T,T,K,inplace,N}(handle, X, size(X), region) + buffer = CuArray{T}(undef, 0) + CuFFTPlan{T,T,K,inplace,N}(handle, X, size(X), region, buffer) end # out-of-place complex @@ -183,7 +189,8 @@ function plan_fft(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - CuFFTPlan{T,T,K,inplace,N}(handle, X, size(X), region) + buffer = CuArray{T}(undef, 0) + CuFFTPlan{T,T,K,inplace,N}(handle, X, size(X), region, buffer) end function plan_bfft(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} @@ -195,7 +202,8 @@ function plan_bfft(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - CuFFTPlan{T,T,K,inplace,N}(handle, size(X), size(X), region) + buffer = CuArray{T}(undef, 0) + CuFFTPlan{T,T,K,inplace,N}(handle, size(X), size(X), region, buffer) end # out-of-place real-to-complex @@ -213,7 +221,8 @@ function plan_rfft(X::DenseCuArray{T,N}, region) where {T<:cufftReals,N} ydims = collect(size(X)) ydims[region[1]] = div(ydims[region[1]],2)+1 - CuFFTPlan{complex(T),T,K,inplace,N}(handle, size(X), (ydims...,), region) + buffer = CuArray{T}(undef, size(X)) + CuFFTPlan{complex(T),T,K,inplace,N}(handle, size(X), (ydims...,), region, buffer) end function plan_brfft(X::DenseCuArray{T,N}, d::Integer, region::Any) where {T<:cufftComplexes,N} @@ -226,7 +235,8 @@ function plan_brfft(X::DenseCuArray{T,N}, d::Integer, region::Any) where {T<:cuf handle = cufftGetPlan(real(T), T, (ydims...,), region) - CuFFTPlan{real(T),T,K,inplace,N}(handle, size(X), (ydims...,), region) + buffer = CuArray{T}(undef, 0) + CuFFTPlan{real(T),T,K,inplace,N}(handle, size(X), (ydims...,), region, buffer) end @@ -309,10 +319,14 @@ function LinearAlgebra.mul!(y::DenseCuArray{T}, p::CuFFTPlan{T,S,K,inplace}, x:: ) where {T,S,K,inplace} assert_applicable(p, x, y) if !inplace && T<:Real - # Out-of-place complex-to-real FFT will always overwrite input buffer. - x = copy(x) + # Out-of-place complex-to-real FFT will always overwrite input x. + # We copy the input x in an auxiliary buffer. + z = p.buffer + copyto!(z, x) + else + z = x end - unsafe_execute_trailing!(p, x, y) + unsafe_execute_trailing!(p, z, y) y end @@ -323,13 +337,21 @@ function Base.:(*)(p::CuFFTPlan{T,S,K,true}, x::DenseCuArray{S}) where {T,S,K} end function Base.:(*)(p::CuFFTPlan{T,S,K,false}, x::DenseCuArray{S1,M}) where {T,S,K,S1,M} - if S1 != S || T<:Real - # Convert to the expected input type. Also, - # Out-of-place complex-to-real FFT will always overwrite input buffer. - x = copy1(S, x) + if T<:Real + # Out-of-place complex-to-real FFT will always overwrite input x. + # We copy the input x in an auxiliary buffer. + z = p.buffer + copyto!(z, x) + else + if S1 != S + # Convert to the expected input type. + z = copy1(S, x) + else + z = x + end end - assert_applicable(p, x) + assert_applicable(p, z) y = CuArray{T,M}(undef, p.output_size) - unsafe_execute_trailing!(p, x, y) + unsafe_execute_trailing!(p, z, y) y end From 06d482eb7d3bfccd7fc3b8f3db6cb31388e61b00 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Thu, 12 Dec 2024 18:33:46 -0600 Subject: [PATCH 2/7] Update cufft.jl --- lib/cufft/fft.jl | 74 ++++++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/lib/cufft/fft.jl b/lib/cufft/fft.jl index 8650f22f64..d212057c4c 100644 --- a/lib/cufft/fft.jl +++ b/lib/cufft/fft.jl @@ -25,7 +25,7 @@ Base.:(*)(p::ScaledPlan, x::DenseCuArray) = rmul!(p.p * x, p.scale) # N is the number of dimensions -mutable struct CuFFTPlan{T<:cufftNumber,S<:cufftNumber,K,inplace,N} <: Plan{S} +mutable struct CuFFTPlan{T<:cufftNumber,S<:cufftNumber,K,inplace,N,B} <: Plan{S} # handle to Cuda low level plan. Note that this plan sometimes has lower dimensions # to handle more transform cases such as individual directions handle::cufftHandle @@ -33,27 +33,26 @@ mutable struct CuFFTPlan{T<:cufftNumber,S<:cufftNumber,K,inplace,N} <: Plan{S} stream::CuStream input_size::NTuple{N,Int} # Julia size of input array output_size::NTuple{N,Int} # Julia size of output array - region::Any - buffer::CuArray{S} + region::NTuple{N,Int} + buffer::B # buffer for out-of-place complex-to-real FFT pinv::ScaledPlan{T} # required by AbstractFFTs API, will be defined by AbstractFFTs if needed - function CuFFTPlan{T,S,K,inplace,N}(handle::cufftHandle, - input_size::NTuple{N,Int}, output_size::NTuple{N,Int}, region - ) where {T<:cufftNumber,S<:cufftNumber,K,inplace,N} + function CuFFTPlan{T,S,K,inplace,N,B}(handle::cufftHandle, + input_size::NTuple{N,Int}, output_size::NTuple{N,Int}, + region::NTuple{N,Int}, buffer::B + ) where {T<:cufftNumber,S<:cufftNumber,K,inplace,N,B} abs(K) == 1 || throw(ArgumentError("FFT direction must be either -1 (forward) or +1 (inverse)")) inplace isa Bool || throw(ArgumentError("FFT inplace argument must be a Bool")) - buffer = CuArray{S}(0) - p = new{T,S,K,inplace,N}(handle, context(), stream(), input_size, output_size, region, buffer) + p = new{T,S,K,inplace,N,B}(handle, context(), stream(), input_size, output_size, region, buffer) finalizer(unsafe_free!, p) p end end -function CuFFTPlan{T,S,K,inplace,N}(handle::cufftHandle, X::DenseCuArray{S,N}, - sizey::NTuple{N,Int}, region, - ) where {T<:cufftNumber,S<:cufftNumber,K,inplace,N} - buffer = CuArray{S}(0) - CuFFTPlan{T,S,K,inplace,N}(handle, size(X), sizey, region, buffer) +function CuFFTPlan{T,S,K,inplace,N,B}(handle::cufftHandle, X::DenseCuArray{S,N}, + sizey::NTuple{N,Int}, region::NTuple{N,Int}, buffer::B + ) where {T<:cufftNumber,S<:cufftNumber,K,inplace,N,B} + CuFFTPlan{T,S,K,inplace,N,B}(handle, size(X), sizey, region, buffer) end function CUDA.unsafe_free!(plan::CuFFTPlan) @@ -155,65 +154,59 @@ end function plan_fft!(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} K = CUFFT_FORWARD inplace = true - region = Tuple(region) + region = NTuple{N,Int}(region) md = plan_max_dims(region, size(X)) sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - buffer = CuArray{T}(undef, 0) - CuFFTPlan{T,T,K,inplace,N}(handle, X, size(X), region, buffer) + CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, X, size(X), region, nothing) end - function plan_bfft!(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} K = CUFFT_INVERSE inplace = true - region = Tuple(region) + region = NTuple{N,Int}(region) md = plan_max_dims(region, size(X)) sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - buffer = CuArray{T}(undef, 0) - CuFFTPlan{T,T,K,inplace,N}(handle, X, size(X), region, buffer) + CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, X, size(X), region, buffer, nothing) end # out-of-place complex function plan_fft(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} K = CUFFT_FORWARD inplace = false - region = Tuple(region) + region = NTuple{N,Int}(region) md = plan_max_dims(region,size(X)) sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - buffer = CuArray{T}(undef, 0) - CuFFTPlan{T,T,K,inplace,N}(handle, X, size(X), region, buffer) + CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, X, size(X), region, buffer, nothing) end function plan_bfft(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} K = CUFFT_INVERSE inplace = false - region = Tuple(region) + region = NTuple{N,Int}(region) md = plan_max_dims(region,size(X)) sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - buffer = CuArray{T}(undef, 0) - CuFFTPlan{T,T,K,inplace,N}(handle, size(X), size(X), region, buffer) + CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, size(X), size(X), region, buffer, nothing) end # out-of-place real-to-complex function plan_rfft(X::DenseCuArray{T,N}, region) where {T<:cufftReals,N} K = CUFFT_FORWARD inplace = false - region = Tuple(region) + region = NTuple{N,Int}(region) md = plan_max_dims(region,size(X)) - # X = front_view(X, md) sizex = size(X)[1:md] handle = cufftGetPlan(complex(T), T, sizex, region) @@ -221,43 +214,44 @@ function plan_rfft(X::DenseCuArray{T,N}, region) where {T<:cufftReals,N} ydims = collect(size(X)) ydims[region[1]] = div(ydims[region[1]],2)+1 - buffer = CuArray{T}(undef, size(X)) - CuFFTPlan{complex(T),T,K,inplace,N}(handle, size(X), (ydims...,), region, buffer) + buffer = similar(X) + B = typeof(buffer) + + CuFFTPlan{complex(T),T,K,inplace,N,B}(handle, size(X), (ydims...,), region, buffer) end -function plan_brfft(X::DenseCuArray{T,N}, d::Integer, region::Any) where {T<:cufftComplexes,N} +function plan_brfft(X::DenseCuArray{T,N}, d::Integer, region) where {T<:cufftComplexes,N} K = CUFFT_INVERSE inplace = false - region = Tuple(region) + region = NTuple{N,Int}(region) ydims = collect(size(X)) ydims[region[1]] = d handle = cufftGetPlan(real(T), T, (ydims...,), region) - buffer = CuArray{T}(undef, 0) - CuFFTPlan{real(T),T,K,inplace,N}(handle, size(X), (ydims...,), region, buffer) + CuFFTPlan{real(T),T,K,inplace,N,Nothing}(handle, size(X), (ydims...,), region, nothing) end # FIXME: plan_inv methods allocate needlessly (to provide type parameters) # Perhaps use FakeArray types to avoid this. -function plan_inv(p::CuFFTPlan{T,S,CUFFT_INVERSE,inplace,N} - ) where {T<:cufftNumber,S<:cufftNumber,N,inplace} +function plan_inv(p::CuFFTPlan{T,S,CUFFT_INVERSE,inplace,N,B} + ) where {T<:cufftNumber,S<:cufftNumber,inplace,N,B} md_osz = plan_max_dims(p.region, p.output_size) sz_X = p.output_size[1:md_osz] handle = cufftGetPlan(S, T, sz_X, p.region) - ScaledPlan(CuFFTPlan{S,T,CUFFT_FORWARD,inplace,N}(handle, p.output_size, p.input_size, p.region), + ScaledPlan(CuFFTPlan{S,T,CUFFT_FORWARD,inplace,N,B}(handle, p.output_size, p.input_size, p.region, p.buffer), normalization(real(T), p.output_size, p.region)) end -function plan_inv(p::CuFFTPlan{T,S,CUFFT_FORWARD,inplace,N} - ) where {T<:cufftNumber,S<:cufftNumber,N,inplace} +function plan_inv(p::CuFFTPlan{T,S,CUFFT_FORWARD,inplace,N,B} + ) where {T<:cufftNumber,S<:cufftNumber,inplace,N,B} md_isz = plan_max_dims(p.region, p.input_size) sz_Y = p.input_size[1:md_isz] handle = cufftGetPlan(S, T, sz_Y, p.region) - ScaledPlan(CuFFTPlan{S,T,CUFFT_INVERSE,inplace,N}(handle, p.output_size, p.input_size, p.region), + ScaledPlan(CuFFTPlan{S,T,CUFFT_INVERSE,inplace,N,B}(handle, p.output_size, p.input_size, p.region, p.buffer), normalization(real(S), p.input_size, p.region)) end From e346447e825db81f9cd95b6d5de789dc345b5de6 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Thu, 12 Dec 2024 18:39:03 -0600 Subject: [PATCH 3/7] Fix new errors in fft.jl --- lib/cufft/fft.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/cufft/fft.jl b/lib/cufft/fft.jl index d212057c4c..02f3d341e9 100644 --- a/lib/cufft/fft.jl +++ b/lib/cufft/fft.jl @@ -172,7 +172,7 @@ function plan_bfft!(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, X, size(X), region, buffer, nothing) + CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, X, size(X), region, nothing) end # out-of-place complex @@ -185,7 +185,7 @@ function plan_fft(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, X, size(X), region, buffer, nothing) + CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, X, size(X), region, nothing) end function plan_bfft(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} @@ -197,7 +197,7 @@ function plan_bfft(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, size(X), size(X), region, buffer, nothing) + CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, size(X), size(X), region, nothing) end # out-of-place real-to-complex From 9952a3e29c0cf6b9d190ac86c6fb3ee286c695e1 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Thu, 12 Dec 2024 19:17:18 -0600 Subject: [PATCH 4/7] More fixes in fft.jl --- lib/cufft/fft.jl | 68 +++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/lib/cufft/fft.jl b/lib/cufft/fft.jl index 02f3d341e9..6955776254 100644 --- a/lib/cufft/fft.jl +++ b/lib/cufft/fft.jl @@ -25,7 +25,7 @@ Base.:(*)(p::ScaledPlan, x::DenseCuArray) = rmul!(p.p * x, p.scale) # N is the number of dimensions -mutable struct CuFFTPlan{T<:cufftNumber,S<:cufftNumber,K,inplace,N,B} <: Plan{S} +mutable struct CuFFTPlan{T<:cufftNumber,S<:cufftNumber,K,inplace,N,R,B} <: Plan{S} # handle to Cuda low level plan. Note that this plan sometimes has lower dimensions # to handle more transform cases such as individual directions handle::cufftHandle @@ -33,26 +33,26 @@ mutable struct CuFFTPlan{T<:cufftNumber,S<:cufftNumber,K,inplace,N,B} <: Plan{S} stream::CuStream input_size::NTuple{N,Int} # Julia size of input array output_size::NTuple{N,Int} # Julia size of output array - region::NTuple{N,Int} + region::NTuple{R,Int} buffer::B # buffer for out-of-place complex-to-real FFT pinv::ScaledPlan{T} # required by AbstractFFTs API, will be defined by AbstractFFTs if needed - function CuFFTPlan{T,S,K,inplace,N,B}(handle::cufftHandle, - input_size::NTuple{N,Int}, output_size::NTuple{N,Int}, - region::NTuple{N,Int}, buffer::B - ) where {T<:cufftNumber,S<:cufftNumber,K,inplace,N,B} + function CuFFTPlan{T,S,K,inplace,N,R,B}(handle::cufftHandle, + input_size::NTuple{N,Int}, output_size::NTuple{N,Int}, + region::NTuple{R,Int}, buffer::B + ) where {T<:cufftNumber,S<:cufftNumber,K,inplace,N,R,B} abs(K) == 1 || throw(ArgumentError("FFT direction must be either -1 (forward) or +1 (inverse)")) inplace isa Bool || throw(ArgumentError("FFT inplace argument must be a Bool")) - p = new{T,S,K,inplace,N,B}(handle, context(), stream(), input_size, output_size, region, buffer) + p = new{T,S,K,inplace,N,R,B}(handle, context(), stream(), input_size, output_size, region, buffer) finalizer(unsafe_free!, p) p end end -function CuFFTPlan{T,S,K,inplace,N,B}(handle::cufftHandle, X::DenseCuArray{S,N}, - sizey::NTuple{N,Int}, region::NTuple{N,Int}, buffer::B - ) where {T<:cufftNumber,S<:cufftNumber,K,inplace,N,B} - CuFFTPlan{T,S,K,inplace,N,B}(handle, size(X), sizey, region, buffer) +function CuFFTPlan{T,S,K,inplace,N,R,B}(handle::cufftHandle, X::DenseCuArray{S,N}, + sizey::NTuple{N,Int}, region::NTuple{R,Int}, buffer::B + ) where {T<:cufftNumber,S<:cufftNumber,K,inplace,N,R,B} + CuFFTPlan{T,S,K,inplace,N,R,B}(handle, size(X), sizey, region, buffer) end function CUDA.unsafe_free!(plan::CuFFTPlan) @@ -62,7 +62,9 @@ function CUDA.unsafe_free!(plan::CuFFTPlan) end plan.handle = C_NULL end - CUDA.unsafe_free!(plan.buffer) + if !isnothing(plan.buffer) + CUDA.unsafe_free!(plan.buffer) + end end function showfftdims(io, sz, T) @@ -154,57 +156,62 @@ end function plan_fft!(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} K = CUFFT_FORWARD inplace = true - region = NTuple{N,Int}(region) + R = length(region) + region = NTuple{R,Int}(region) md = plan_max_dims(region, size(X)) sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, X, size(X), region, nothing) + CuFFTPlan{T,T,K,inplace,N,R,Nothing}(handle, X, size(X), region, nothing) end function plan_bfft!(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} K = CUFFT_INVERSE inplace = true - region = NTuple{N,Int}(region) + R = length(region) + region = NTuple{R,Int}(region) md = plan_max_dims(region, size(X)) sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, X, size(X), region, nothing) + CuFFTPlan{T,T,K,inplace,N,R,Nothing}(handle, X, size(X), region, nothing) end # out-of-place complex function plan_fft(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} K = CUFFT_FORWARD inplace = false - region = NTuple{N,Int}(region) + R = length(region) + region = NTuple{R,Int}(region) md = plan_max_dims(region,size(X)) sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, X, size(X), region, nothing) + CuFFTPlan{T,T,K,inplace,N,R,Nothing}(handle, X, size(X), region, nothing) end function plan_bfft(X::DenseCuArray{T,N}, region) where {T<:cufftComplexes,N} K = CUFFT_INVERSE inplace = false - region = NTuple{N,Int}(region) + R = length(region) + region = NTuple{R,Int}(region) md = plan_max_dims(region,size(X)) sizex = size(X)[1:md] handle = cufftGetPlan(T, T, sizex, region) - CuFFTPlan{T,T,K,inplace,N,Nothing}(handle, size(X), size(X), region, nothing) + CuFFTPlan{T,T,K,inplace,N,R,Nothing}(handle, size(X), size(X), region, nothing) end # out-of-place real-to-complex function plan_rfft(X::DenseCuArray{T,N}, region) where {T<:cufftReals,N} K = CUFFT_FORWARD inplace = false - region = NTuple{N,Int}(region) + R = length(region) + region = NTuple{R,Int}(region) md = plan_max_dims(region,size(X)) sizex = size(X)[1:md] @@ -217,41 +224,42 @@ function plan_rfft(X::DenseCuArray{T,N}, region) where {T<:cufftReals,N} buffer = similar(X) B = typeof(buffer) - CuFFTPlan{complex(T),T,K,inplace,N,B}(handle, size(X), (ydims...,), region, buffer) + CuFFTPlan{complex(T),T,K,inplace,N,R,B}(handle, size(X), (ydims...,), region, buffer) end function plan_brfft(X::DenseCuArray{T,N}, d::Integer, region) where {T<:cufftComplexes,N} K = CUFFT_INVERSE inplace = false - region = NTuple{N,Int}(region) + R = length(region) + region = NTuple{R,Int}(region) ydims = collect(size(X)) ydims[region[1]] = d handle = cufftGetPlan(real(T), T, (ydims...,), region) - CuFFTPlan{real(T),T,K,inplace,N,Nothing}(handle, size(X), (ydims...,), region, nothing) + CuFFTPlan{real(T),T,K,inplace,N,R,Nothing}(handle, size(X), (ydims...,), region, nothing) end # FIXME: plan_inv methods allocate needlessly (to provide type parameters) # Perhaps use FakeArray types to avoid this. -function plan_inv(p::CuFFTPlan{T,S,CUFFT_INVERSE,inplace,N,B} - ) where {T<:cufftNumber,S<:cufftNumber,inplace,N,B} +function plan_inv(p::CuFFTPlan{T,S,CUFFT_INVERSE,inplace,N,R,B} + ) where {T<:cufftNumber,S<:cufftNumber,inplace,N,R,B} md_osz = plan_max_dims(p.region, p.output_size) sz_X = p.output_size[1:md_osz] handle = cufftGetPlan(S, T, sz_X, p.region) - ScaledPlan(CuFFTPlan{S,T,CUFFT_FORWARD,inplace,N,B}(handle, p.output_size, p.input_size, p.region, p.buffer), + ScaledPlan(CuFFTPlan{S,T,CUFFT_FORWARD,inplace,N,R,B}(handle, p.output_size, p.input_size, p.region, p.buffer), normalization(real(T), p.output_size, p.region)) end -function plan_inv(p::CuFFTPlan{T,S,CUFFT_FORWARD,inplace,N,B} - ) where {T<:cufftNumber,S<:cufftNumber,inplace,N,B} +function plan_inv(p::CuFFTPlan{T,S,CUFFT_FORWARD,inplace,N,R,B} + ) where {T<:cufftNumber,S<:cufftNumber,inplace,N,R,B} md_isz = plan_max_dims(p.region, p.input_size) sz_Y = p.input_size[1:md_isz] handle = cufftGetPlan(S, T, sz_Y, p.region) - ScaledPlan(CuFFTPlan{S,T,CUFFT_INVERSE,inplace,N,B}(handle, p.output_size, p.input_size, p.region, p.buffer), + ScaledPlan(CuFFTPlan{S,T,CUFFT_INVERSE,inplace,N,R,B}(handle, p.output_size, p.input_size, p.region, p.buffer), normalization(real(S), p.input_size, p.region)) end From 4ab92e7f6cefc9fc7427d1a258bcc5e0fa18cf1d Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Thu, 12 Dec 2024 19:41:11 -0600 Subject: [PATCH 5/7] Allocate a buffer in both plan_rfft and plan_brfft --- lib/cufft/fft.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/cufft/fft.jl b/lib/cufft/fft.jl index 6955776254..ff1fefbc1b 100644 --- a/lib/cufft/fft.jl +++ b/lib/cufft/fft.jl @@ -221,12 +221,15 @@ function plan_rfft(X::DenseCuArray{T,N}, region) where {T<:cufftReals,N} ydims = collect(size(X)) ydims[region[1]] = div(ydims[region[1]],2)+1 + # The buffer is not needed for real-to-complex (`mul!`), + # but it’s required for complex-to-real (`ldiv!`). buffer = similar(X) B = typeof(buffer) CuFFTPlan{complex(T),T,K,inplace,N,R,B}(handle, size(X), (ydims...,), region, buffer) end +# out-of-place complex-to-real function plan_brfft(X::DenseCuArray{T,N}, d::Integer, region) where {T<:cufftComplexes,N} K = CUFFT_INVERSE inplace = false @@ -238,7 +241,10 @@ function plan_brfft(X::DenseCuArray{T,N}, d::Integer, region) where {T<:cufftCom handle = cufftGetPlan(real(T), T, (ydims...,), region) - CuFFTPlan{real(T),T,K,inplace,N,R,Nothing}(handle, size(X), (ydims...,), region, nothing) + buffer = similar(X) + B = typeof(buffer) + + CuFFTPlan{real(T),T,K,inplace,N,R,B}(handle, size(X), (ydims...,), region, buffer) end From bfb0a4552e083166de76308dbf1deefaa2593dd9 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Thu, 12 Dec 2024 20:00:56 -0600 Subject: [PATCH 6/7] Allocate a buffer in both plan_rfft and plan_brfft --- lib/cufft/fft.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/cufft/fft.jl b/lib/cufft/fft.jl index ff1fefbc1b..ce6bdea746 100644 --- a/lib/cufft/fft.jl +++ b/lib/cufft/fft.jl @@ -219,11 +219,11 @@ function plan_rfft(X::DenseCuArray{T,N}, region) where {T<:cufftReals,N} handle = cufftGetPlan(complex(T), T, sizex, region) ydims = collect(size(X)) - ydims[region[1]] = div(ydims[region[1]],2)+1 + ydims[region[1]] = div(ydims[region[1]], 2) + 1 # The buffer is not needed for real-to-complex (`mul!`), # but it’s required for complex-to-real (`ldiv!`). - buffer = similar(X) + buffer = CuArray{complex(T)}(undef, ydims...) B = typeof(buffer) CuFFTPlan{complex(T),T,K,inplace,N,R,B}(handle, size(X), (ydims...,), region, buffer) @@ -241,7 +241,7 @@ function plan_brfft(X::DenseCuArray{T,N}, d::Integer, region) where {T<:cufftCom handle = cufftGetPlan(real(T), T, (ydims...,), region) - buffer = similar(X) + buffer = CuArray{T}(undef, size(X)) B = typeof(buffer) CuFFTPlan{real(T),T,K,inplace,N,R,B}(handle, size(X), (ydims...,), region, buffer) From 737b35c81a3f06428df83fbc6d2c005e56f16f89 Mon Sep 17 00:00:00 2001 From: Alexis Montoison <35051714+amontoison@users.noreply.github.com> Date: Sat, 14 Dec 2024 05:30:41 -0600 Subject: [PATCH 7/7] Update lib/cufft/fft.jl Co-authored-by: Tim Besard --- lib/cufft/fft.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/cufft/fft.jl b/lib/cufft/fft.jl index ce6bdea746..c235ac2b93 100644 --- a/lib/cufft/fft.jl +++ b/lib/cufft/fft.jl @@ -34,7 +34,7 @@ mutable struct CuFFTPlan{T<:cufftNumber,S<:cufftNumber,K,inplace,N,R,B} <: Plan{ input_size::NTuple{N,Int} # Julia size of input array output_size::NTuple{N,Int} # Julia size of output array region::NTuple{R,Int} - buffer::B # buffer for out-of-place complex-to-real FFT + buffer::B # buffer for out-of-place complex-to-real FFT, or `nothing` if not needed pinv::ScaledPlan{T} # required by AbstractFFTs API, will be defined by AbstractFFTs if needed function CuFFTPlan{T,S,K,inplace,N,R,B}(handle::cufftHandle,