Toeplitz gram (#57)

* replace zeros with undef array in the constructor; speedup: 500x with trj = rand(3, 72960) .- 0.5 and shape = (192,192,192) * Moved the caluclation to a separate function for reusability; added multihreading; some minor tweaks to improve speed * add non-allocating constructor * Bump SpecialFunctions compat * Implement ND Toeplitz constructors + non-allocating apply function * test Toeplitz * export functions * add docstrings * typo * calculateToeplitzKernel! -> in place writing of the kernel * changed compat and CI to Julia 1.6 and up * bumped julia in the appveyor CI as well... * bugfix
JuliaMath · Dec 31, 2021 · 101d9d9 · 101d9d9
1 parent 29e5bee
commit 101d9d9
Show file tree

Hide file tree

Showing 11 changed files with 496 additions and 93 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,5 @@
 *.DS_Store
 docs/build/*
 *.h5
-
+settings.json
 Manifest.toml
diff --git a/Project.toml b/Project.toml
@@ -19,20 +19,20 @@ ThreadedSparseCSR = "39591389-4884-462a-89e7-9edfce40b6eb"
 
 [compat]
 CUDA = "1.3.3, 2.3, 3.1"
+DataFrames = "1.3.1"
 FFTW = "0.2, 1"
 Graphics = "0.4, 1.0"
 LoopVectorization = "0.12"
 Polyester = "0.5.5, 0.6"
 SpecialFunctions = "0.8, 0.10, 1, 2"
 ThreadedSparseCSR = "0.1.1"
 julia = "1.6"
-DataFrames = "1.3.1"
 
 [extras]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-NFFT3 = "53104703-03e8-40a5-ab01-812303a44cae"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+NFFT3 = "53104703-03e8-40a5-ab01-812303a44cae"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
 test = ["Test", "BenchmarkTools", "NFFT3", "DataFrames"]
diff --git a/docs/make.jl b/docs/make.jl
@@ -1,14 +1,24 @@
 using Documenter, NFFT
 
-makedocs(
+DocMeta.setdocmeta!(NFFT, :DocTestSetup, :(using NFFT); recursive=true)
+
+makedocs(;
+    doctest = false,
     modules = [NFFT],
     sitename = "NFFT",
-    authors = "Tobias Knopp,...",
+    authors = "Tobias Knopp and contributors",
+    repo="https://github.com/tknopp/NFFT.jl/blob/{commit}{path}#{line}",
+    format=Documenter.HTML(;
+        prettyurls=get(ENV, "CI", "false") == "true",
+        canonical="https://tknopp.github.io/NFFT.jl",
+        assets=String[],
+    ),
     pages = [
         "Home" => "index.md",
         "Overview" => "overview.md",
         "Directional" => "directional.md",
-        "Density" => "density.md"
+        "Density" => "density.md",
+        "API" => "api.md",
     ]
 )
 

diff --git a/docs/src/api.md b/docs/src/api.md
@@ -0,0 +1,15 @@
+```@meta
+Author = "Tobias Knopp and contributors"
+CurrentModule = NFFT
+```
+
+# API
+
+In the following, you find the documentation of some, and hopefully soon all, exported functions of the [NFFT.jl](https://github.com/tknopp/NFFT.jl) package:
+
+```@index
+```
+
+```@autodocs
+Modules = [NFFT]
+```
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -34,3 +34,4 @@ the [issue tracker](https://github.com/tknopp/NFFT.jl/issues) to contact us.
 ## Contributors
 
 * [Tobias Knopp](https://www.tuhh.de/ibi/people/tobias-knopp-head-of-institute.html)
+* [Jakob Assländer](https://med.nyu.edu/faculty/jakob-asslaender)
diff --git a/src/CpuNFFT.jl b/src/CpuNFFT.jl
@@ -1,3 +1,4 @@
+using Polyester
 
 #=
 Some internal documentation (especially for people familiar with the nfft)
@@ -38,34 +39,42 @@ mutable struct NFFTPlan{D,DIM,T} <: AbstractNFFTPlan{D,DIM,T}
     tmpVec::Array{Complex{T},D}
     B::SparseMatrixCSC{T,Int64}
 end
-    
+
 function Base.copy(p::NFFTPlan{D,0,T}) where {D,T}
     tmpVec = similar(p.tmpVec)
-
-    FP = plan_fft!(tmpVec; flags=p.forwardFFT.flags)
-    BP = plan_bfft!(tmpVec; flags=p.backwardFFT.flags)
-
-    return  NFFTPlan{D,0,T}(p.N, p.M, p.x, p.m, p.sigma, p.n, p.K, p.windowLUT,
-                    p.windowHatInvLUT, FP, BP , tmpVec, p.B)
+    windowLUT = copy(p.windowLUT)
+    windowHatInvLUT = copy(p.windowHatInvLUT)
+    B = copy(p.B)
+    x = copy(p.x)
+
+    FP = plan_fft!(tmpVec; flags = p.forwardFFT.flags)
+    BP = plan_bfft!(tmpVec; flags = p.backwardFFT.flags)
+
+    return NFFTPlan{D,0,T}(p.N, p.M, x, p.m, p.sigma, p.n, p.K, windowLUT,
+        windowHatInvLUT, FP, BP, tmpVec, B)
 end
-    
+
 function Base.copy(p::NFFTPlan{D,DIM,T}) where {D,DIM,T}
     tmpVec = similar(p.tmpVec)
-
-    FP = plan_fft!(tmpVec, DIM; flags=p.forwardFFT.flags)
-    BP = plan_bfft!(tmpVec, DIM; flags=p.backwardFFT.flags)
-
-    return  NFFTPlan{D,DIM,T}(p.N, p.M, p.x, p.m, p.sigma, p.n, p.K, p.windowLUT,
-                    p.windowHatInvLUT, FP, BP , tmpVec, p.B)
+    windowLUT = copy(p.windowLUT)
+    windowHatInvLUT = copy(p.windowHatInvLUT)
+    B = copy(p.B)
+    x = copy(p.x)
+
+    FP = plan_fft!(tmpVec, DIM; flags = p.forwardFFT.flags)
+    BP = plan_bfft!(tmpVec, DIM; flags = p.backwardFFT.flags)
+
+    return NFFTPlan{D,DIM,T}(p.N, p.M, x, p.m, p.sigma, p.n, p.K, windowLUT,
+        windowHatInvLUT, FP, BP, tmpVec, B)
 end
-    
-@inline dim(::NFFTPlan{D,DIM}) where {D, DIM} = DIM
-    
+
+@inline dim(::NFFTPlan{D,DIM}) where {D,DIM} = DIM
+
 
 ##############
 # constructors
 ##############
-function NFFTPlan(x::Matrix{T}, N::NTuple{D,Int}, m=4, sigma=2.0,
+function NFFTPlan(x::Matrix{T}, N::NTuple{D,Int}, m = 4, sigma = 2.0,
                 window=:kaiser_bessel, K=2000;
                 precompute::PrecomputeFlags=LUT, sortNodes=false, kwargs...) where {D,T}
 
@@ -82,41 +91,43 @@ function NFFTPlan(x::Matrix{T}, N::NTuple{D,Int}, m=4, sigma=2.0,
 
     tmpVec = Array{Complex{T}}(undef, n)
 
-    M = size(x,2)
+    M = size(x, 2)
 
     FP = plan_fft!(tmpVec; kwargs...)
     BP = plan_bfft!(tmpVec; kwargs...)
 
     # Sort nodes in lexicographic way
     if sortNodes
-      x .= sortslices(x,dims=2)
-    end
+        x .= sortslices(x,dims=2)
+      end
 
-    # Create lookup table
-    win, win_hat = getWindow(window)
+    windowLUT, windowHatInvLUT, B = calcLookUpTable(x, N, n, m, sigma, window, K; precompute=precompute)
 
-    windowLUT = Vector{Vector{T}}(undef,D)
-    windowHatInvLUT = Vector{Vector{T}}(undef,D)
-    for d=1:D
-        windowHatInvLUT[d] = zeros(T, N[d])
-        for k=1:N[d]
-            windowHatInvLUT[d][k] = 1. / win_hat(k-1-N[d]÷2, n[d], m, sigma)
-        end
-    end
+    NFFTPlan{D,0,T}(N, M, x, m, sigma, n, K, windowLUT, windowHatInvLUT, FP, BP, tmpVec, B)
+end
 
-    if precompute == LUT
-        precomputeLUT(win, windowLUT, n, m, sigma, K, T)
-        B = sparse([],[],T[])
-    elseif precompute == FULL
-        B = precomputeB(win, x, N, n, m, M, sigma, K, T)
-    elseif precompute == FULL_LUT
-        precomputeLUT(win, windowLUT, n, m, sigma, K, T)
-        B = precomputeB(windowLUT, x, N, n, m, M, sigma, K, T)
+function NFFTPlan!(p::AbstractNFFTPlan{D,DIM,T}, x::Matrix{T}, window = :kaiser_bessel; sortNodes=false) where {D,DIM,T}
+
+    if isempty(p.B)
+        precompute = LUT
     else
-        error("precompute = $precompute not supported by NFFT.jl!")
+        precompute = FULL
     end
 
-    NFFTPlan{D,0,T}(N, M, x, m, sigma, n, K, windowLUT, windowHatInvLUT, FP, BP, tmpVec, B )
+    # Sort nodes in lexicographic way
+    if sortNodes
+        x .= sortslices(x,dims=2)
+      end
+
+    windowLUT, windowHatInvLUT, B = calcLookUpTable(x, p.N, p.n, p.m, p.sigma, window, p.K; precompute=precompute)
+
+    p.M = size(x, 2)
+    p.windowLUT = windowLUT
+    p.windowHatInvLUT = windowHatInvLUT
+    p.B = B
+    p.x = x
+
+    return p
 end
 
 # Directional NFFT
@@ -141,37 +152,20 @@ function NFFTPlan(x::AbstractVector{T}, dim::Integer, N::NTuple{D,Int64}, m=4,
 
     sz = [N...]
     sz[dim] = n[dim]
-    tmpVec = Array{Complex{T}}(undef,sz...)
+    tmpVec = Array{Complex{T}}(undef, sz...)
 
     M = length(x)
 
     FP = plan_fft!(tmpVec, dim; kwargs...)
     BP = plan_bfft!(tmpVec, dim; kwargs...)
 
-    # Create lookup table
-    win, win_hat = getWindow(window)
+    windowLUT, windowHatInvLUT, B = calcLookUpTable(x, (N[dim],), (n[dim],), m, sigma, window, K, precompute=LUT)
 
-    windowLUT = Vector{Vector{T}}(undef,1)
-    Z = round(Int, 3*K/2)
-    windowLUT[1] = zeros(T, Z)
-    for l = 1:Z
-        y = ((l-1) / (K-1)) * m/n[dim]
-        windowLUT[1][l] = win(y, n[dim], m, sigma)
-    end
-
-    windowHatInvLUT = Vector{Vector{T}}(undef,1)
-    windowHatInvLUT[1] = zeros(T, N[dim])
-    for k = 1:N[dim]
-        windowHatInvLUT[1][k] = 1. / win_hat(k-1-N[dim]/2, n[dim], m, sigma)
-    end
-
-    B = sparse([],[],T[])
-
-    NFFTPlan{D,dim,T}(N, M, reshape(x,1,M), m, sigma, n, K, windowLUT,
-                      windowHatInvLUT, FP, BP, tmpVec, B)
+    NFFTPlan{D,dim,T}(N, M, reshape(x, 1, M), m, sigma, n, K, windowLUT,
+        windowHatInvLUT, FP, BP, tmpVec, B)
 end
 
-function Base.show(io::IO, p::NFFTPlan{D,0}) where D
+function Base.show(io::IO, p::NFFTPlan{D,0}) where {D}
     print(io, "NFFTPlan with ", p.M, " sampling points for ", p.N, " array")
 end
 
@@ -182,8 +176,42 @@ end
 size(p::NFFTPlan) = p.N
 numFourierSamples(p::NFFTPlan) = p.M
 
+
+################
+# helper function
+################
+function calcLookUpTable(x::Union{Matrix{T},Vector{T}}, N::NTuple{D,Int}, n, m = 4, sigma = 2.0, window = :kaiser_bessel, K = 2000; precompute::PrecomputeFlags = LUT) where {T,D}
+
+    win, win_hat = getWindow(window)
+    M = size(x, 2)
+
+    windowLUT = Vector{Vector{T}}(undef, D)
+    windowHatInvLUT = Vector{Vector{T}}(undef, D)
+    for d = 1:D
+        windowHatInvLUT[d] = Vector{T}(undef, N[d])
+        @batch for k = 1:N[d]
+            windowHatInvLUT[d][k] = 1 / win_hat(k - 1 - N[d] / 2, n[d], m, sigma)
+        end
+    end
+
+    if precompute == LUT
+        precomputeLUT(win, windowLUT, n, m, sigma, K, T)
+        B = sparse([],[],T[])
+    elseif precompute == FULL
+        B = precomputeB(win, x, N, n, m, M, sigma, K, T)
+    elseif precompute == FULL_LUT
+        precomputeLUT(win, windowLUT, n, m, sigma, K, T)
+        B = precomputeB(windowLUT, x, N, n, m, M, sigma, K, T)
+    else
+        error("precompute = $precompute not supported by NFFT.jl!")
+    end
+    return (windowLUT, windowHatInvLUT, B)
+end
+
+
+
 ################
-# nfft functions 
+# nfft functions
 ################
 """
         nfft!(p, f, fHat) -> fHat
@@ -192,7 +220,7 @@ Calculate the NFFT of `f` with plan `p` and store the result in `fHat`.
 
 Both `f` and `fHat` must be complex arrays.
 """
-function nfft!(p::NFFTPlan{D,DIM,T}, f::AbstractArray, fHat::StridedArray; 
+function nfft!(p::NFFTPlan{D,DIM,T}, f::AbstractArray, fHat::StridedArray;
                verbose=false, timing::Union{Nothing,TimingStats} = nothing) where {D,DIM,T}
     consistencyCheck(p, f, fHat)
 
@@ -205,7 +233,7 @@ function nfft!(p::NFFTPlan{D,DIM,T}, f::AbstractArray, fHat::StridedArray;
     end
     t3 = @elapsed @inbounds convolve!(p, p.tmpVec, fHat)
     if verbose
-      @info "Timing: apod=$t1 fft=$t2 conv=$t3"
+        @info "Timing: apod=$t1 fft=$t2 conv=$t3"
     end
     if timing != nothing
       timing.conv = t3
@@ -222,7 +250,7 @@ Calculate the adjoint NFFT of `fHat` and store the result in `f`.
 
 Both `f` and `fHat` must be complex arrays.
 """
-function nfft_adjoint!(p::NFFTPlan, fHat::AbstractArray, f::StridedArray; 
+function nfft_adjoint!(p::NFFTPlan, fHat::AbstractArray, f::StridedArray;
                        verbose=false, timing::Union{Nothing,TimingStats} = nothing)
     consistencyCheck(p, f, fHat)
 
@@ -234,7 +262,7 @@ function nfft_adjoint!(p::NFFTPlan, fHat::AbstractArray, f::StridedArray;
     end
     t3 = @elapsed @inbounds apodization_adjoint!(p, p.tmpVec, f)
     if verbose
-      @info "Timing: conv=$t1 fft=$t2 apod=$t3"
+        @info "Timing: conv=$t1 fft=$t2 apod=$t3"
     end
     if timing != nothing
       timing.conv_adjoint = t1

diff --git a/src/NFFT.jl b/src/NFFT.jl
@@ -12,9 +12,10 @@ using ThreadedSparseCSR
 import ThreadedSparseCSR: SparseMatrixCSR, sparsecsr
 using CUDA
 using Graphics: @mustimplement
+import Base.size
 
 export AbstractNFFTPlan, plan_nfft, nfft, nfft_adjoint, ndft, ndft_adjoint, TimingStats
-import Base.size
+export calculateToeplitzKernel, calculateToeplitzKernel!, convolveToeplitzKernel!
 
 @enum PrecomputeFlags begin
   LUT = 1
@@ -57,10 +58,10 @@ end
 
 compute a plan for the NFFT of a size-`N` array at the nodes contained in `x`.
 
-The computing device (CPU or GPU) can be set using the keyworkd argument `device` 
+The computing device (CPU or GPU) can be set using the keyworkd argument `device`
 to NFFT.CPU or NFFT.CUDAGPU
 """
-function plan_nfft(x::Matrix{T}, N::NTuple{D,Int}, rest...; device::Device=CPU, 
+function plan_nfft(x::Matrix{T}, N::NTuple{D,Int}, rest...; device::Device=CPU,
                    timing::Union{Nothing,TimingStats} = nothing, kargs...) where {T,D}
   t = @elapsed begin
     if device==CPU
@@ -86,7 +87,7 @@ and along the direction `dim`.
 The computing device (CPU or GPU) can be set using the keyworkd argument `device`.
 Currently only NFFT.CPU is supported.
 """
-function plan_nfft(x::Vector{T}, dim::Integer, N::NTuple{D,Int}, rest...; device::Device=CPU, 
+function plan_nfft(x::Vector{T}, dim::Integer, N::NTuple{D,Int}, rest...; device::Device=CPU,
                    timing::Union{Nothing,TimingStats} = nothing, kargs...) where {T,D}
   t = @elapsed begin
     if device==CPU
@@ -103,7 +104,7 @@ function plan_nfft(x::Vector{T}, dim::Integer, N::NTuple{D,Int}, rest...; device
   return p
 end
 
-function plan_nfft(x::Matrix{T}, dim::Integer, N::NTuple{D,Int}, rest...; device::Device=CPU, 
+function plan_nfft(x::Matrix{T}, dim::Integer, N::NTuple{D,Int}, rest...; device::Device=CPU,
                    timing::Union{Nothing,TimingStats} = nothing, kargs...) where {T,D}
   t = @elapsed begin
     if size(x,1) != 1 && size(x,2) != 1
@@ -158,6 +159,7 @@ include("directional.jl")
 include("multidimensional.jl")
 include("samplingDensity.jl")
 include("NDFT.jl")
+include("Toeplitz.jl")
 
 
 function __init__()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -34,3 +34,4 @@ the [issue tracker](https://github.com/tknopp/NFFT.jl/issues) to contact us.
		## Contributors

		* [Tobias Knopp](https://www.tuhh.de/ibi/people/tobias-knopp-head-of-institute.html)
		* [Jakob Assländer](https://med.nyu.edu/faculty/jakob-asslaender)