Skip to content

Commit

Permalink
Toeplitz gram (#57)
Browse files Browse the repository at this point in the history
* replace zeros with undef array in the constructor; speedup: 500x with trj = rand(3, 72960) .- 0.5 and shape = (192,192,192)

* Moved the caluclation to a separate function for reusability; added multihreading; some minor tweaks to improve speed

* add non-allocating constructor

* Bump SpecialFunctions compat

* Implement ND Toeplitz constructors + non-allocating apply function

* test Toeplitz

* export functions

* add docstrings

* typo

* calculateToeplitzKernel! -> in place writing of the kernel

* changed compat and CI to Julia 1.6 and up

* bumped julia in the appveyor CI as well...

* bugfix
  • Loading branch information
JakobAsslaender authored Dec 31, 2021
1 parent 29e5bee commit 101d9d9
Show file tree
Hide file tree
Showing 11 changed files with 496 additions and 93 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
*.DS_Store
docs/build/*
*.h5

settings.json
Manifest.toml
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,20 @@ ThreadedSparseCSR = "39591389-4884-462a-89e7-9edfce40b6eb"

[compat]
CUDA = "1.3.3, 2.3, 3.1"
DataFrames = "1.3.1"
FFTW = "0.2, 1"
Graphics = "0.4, 1.0"
LoopVectorization = "0.12"
Polyester = "0.5.5, 0.6"
SpecialFunctions = "0.8, 0.10, 1, 2"
ThreadedSparseCSR = "0.1.1"
julia = "1.6"
DataFrames = "1.3.1"

[extras]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
NFFT3 = "53104703-03e8-40a5-ab01-812303a44cae"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
NFFT3 = "53104703-03e8-40a5-ab01-812303a44cae"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "BenchmarkTools", "NFFT3", "DataFrames"]
16 changes: 13 additions & 3 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
using Documenter, NFFT

makedocs(
DocMeta.setdocmeta!(NFFT, :DocTestSetup, :(using NFFT); recursive=true)

makedocs(;
doctest = false,
modules = [NFFT],
sitename = "NFFT",
authors = "Tobias Knopp,...",
authors = "Tobias Knopp and contributors",
repo="https://github.com/tknopp/NFFT.jl/blob/{commit}{path}#{line}",
format=Documenter.HTML(;
prettyurls=get(ENV, "CI", "false") == "true",
canonical="https://tknopp.github.io/NFFT.jl",
assets=String[],
),
pages = [
"Home" => "index.md",
"Overview" => "overview.md",
"Directional" => "directional.md",
"Density" => "density.md"
"Density" => "density.md",
"API" => "api.md",
]
)

Expand Down
15 changes: 15 additions & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
```@meta
Author = "Tobias Knopp and contributors"
CurrentModule = NFFT
```

# API

In the following, you find the documentation of some, and hopefully soon all, exported functions of the [NFFT.jl](https://github.com/tknopp/NFFT.jl) package:

```@index
```

```@autodocs
Modules = [NFFT]
```
1 change: 1 addition & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ the [issue tracker](https://github.com/tknopp/NFFT.jl/issues) to contact us.
## Contributors

* [Tobias Knopp](https://www.tuhh.de/ibi/people/tobias-knopp-head-of-institute.html)
* [Jakob Assländer](https://med.nyu.edu/faculty/jakob-asslaender)
164 changes: 96 additions & 68 deletions src/CpuNFFT.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using Polyester

#=
Some internal documentation (especially for people familiar with the nfft)
Expand Down Expand Up @@ -38,34 +39,42 @@ mutable struct NFFTPlan{D,DIM,T} <: AbstractNFFTPlan{D,DIM,T}
tmpVec::Array{Complex{T},D}
B::SparseMatrixCSC{T,Int64}
end

function Base.copy(p::NFFTPlan{D,0,T}) where {D,T}
tmpVec = similar(p.tmpVec)

FP = plan_fft!(tmpVec; flags=p.forwardFFT.flags)
BP = plan_bfft!(tmpVec; flags=p.backwardFFT.flags)

return NFFTPlan{D,0,T}(p.N, p.M, p.x, p.m, p.sigma, p.n, p.K, p.windowLUT,
p.windowHatInvLUT, FP, BP , tmpVec, p.B)
windowLUT = copy(p.windowLUT)
windowHatInvLUT = copy(p.windowHatInvLUT)
B = copy(p.B)
x = copy(p.x)

FP = plan_fft!(tmpVec; flags = p.forwardFFT.flags)
BP = plan_bfft!(tmpVec; flags = p.backwardFFT.flags)

return NFFTPlan{D,0,T}(p.N, p.M, x, p.m, p.sigma, p.n, p.K, windowLUT,
windowHatInvLUT, FP, BP, tmpVec, B)
end

function Base.copy(p::NFFTPlan{D,DIM,T}) where {D,DIM,T}
tmpVec = similar(p.tmpVec)

FP = plan_fft!(tmpVec, DIM; flags=p.forwardFFT.flags)
BP = plan_bfft!(tmpVec, DIM; flags=p.backwardFFT.flags)

return NFFTPlan{D,DIM,T}(p.N, p.M, p.x, p.m, p.sigma, p.n, p.K, p.windowLUT,
p.windowHatInvLUT, FP, BP , tmpVec, p.B)
windowLUT = copy(p.windowLUT)
windowHatInvLUT = copy(p.windowHatInvLUT)
B = copy(p.B)
x = copy(p.x)

FP = plan_fft!(tmpVec, DIM; flags = p.forwardFFT.flags)
BP = plan_bfft!(tmpVec, DIM; flags = p.backwardFFT.flags)

return NFFTPlan{D,DIM,T}(p.N, p.M, x, p.m, p.sigma, p.n, p.K, windowLUT,
windowHatInvLUT, FP, BP, tmpVec, B)
end
@inline dim(::NFFTPlan{D,DIM}) where {D, DIM} = DIM

@inline dim(::NFFTPlan{D,DIM}) where {D,DIM} = DIM


##############
# constructors
##############
function NFFTPlan(x::Matrix{T}, N::NTuple{D,Int}, m=4, sigma=2.0,
function NFFTPlan(x::Matrix{T}, N::NTuple{D,Int}, m = 4, sigma = 2.0,
window=:kaiser_bessel, K=2000;
precompute::PrecomputeFlags=LUT, sortNodes=false, kwargs...) where {D,T}

Expand All @@ -82,41 +91,43 @@ function NFFTPlan(x::Matrix{T}, N::NTuple{D,Int}, m=4, sigma=2.0,

tmpVec = Array{Complex{T}}(undef, n)

M = size(x,2)
M = size(x, 2)

FP = plan_fft!(tmpVec; kwargs...)
BP = plan_bfft!(tmpVec; kwargs...)

# Sort nodes in lexicographic way
if sortNodes
x .= sortslices(x,dims=2)
end
x .= sortslices(x,dims=2)
end

# Create lookup table
win, win_hat = getWindow(window)
windowLUT, windowHatInvLUT, B = calcLookUpTable(x, N, n, m, sigma, window, K; precompute=precompute)

windowLUT = Vector{Vector{T}}(undef,D)
windowHatInvLUT = Vector{Vector{T}}(undef,D)
for d=1:D
windowHatInvLUT[d] = zeros(T, N[d])
for k=1:N[d]
windowHatInvLUT[d][k] = 1. / win_hat(k-1-N[d]÷2, n[d], m, sigma)
end
end
NFFTPlan{D,0,T}(N, M, x, m, sigma, n, K, windowLUT, windowHatInvLUT, FP, BP, tmpVec, B)
end

if precompute == LUT
precomputeLUT(win, windowLUT, n, m, sigma, K, T)
B = sparse([],[],T[])
elseif precompute == FULL
B = precomputeB(win, x, N, n, m, M, sigma, K, T)
elseif precompute == FULL_LUT
precomputeLUT(win, windowLUT, n, m, sigma, K, T)
B = precomputeB(windowLUT, x, N, n, m, M, sigma, K, T)
function NFFTPlan!(p::AbstractNFFTPlan{D,DIM,T}, x::Matrix{T}, window = :kaiser_bessel; sortNodes=false) where {D,DIM,T}

if isempty(p.B)
precompute = LUT
else
error("precompute = $precompute not supported by NFFT.jl!")
precompute = FULL
end

NFFTPlan{D,0,T}(N, M, x, m, sigma, n, K, windowLUT, windowHatInvLUT, FP, BP, tmpVec, B )
# Sort nodes in lexicographic way
if sortNodes
x .= sortslices(x,dims=2)
end

windowLUT, windowHatInvLUT, B = calcLookUpTable(x, p.N, p.n, p.m, p.sigma, window, p.K; precompute=precompute)

p.M = size(x, 2)
p.windowLUT = windowLUT
p.windowHatInvLUT = windowHatInvLUT
p.B = B
p.x = x

return p
end

# Directional NFFT
Expand All @@ -141,37 +152,20 @@ function NFFTPlan(x::AbstractVector{T}, dim::Integer, N::NTuple{D,Int64}, m=4,

sz = [N...]
sz[dim] = n[dim]
tmpVec = Array{Complex{T}}(undef,sz...)
tmpVec = Array{Complex{T}}(undef, sz...)

M = length(x)

FP = plan_fft!(tmpVec, dim; kwargs...)
BP = plan_bfft!(tmpVec, dim; kwargs...)

# Create lookup table
win, win_hat = getWindow(window)
windowLUT, windowHatInvLUT, B = calcLookUpTable(x, (N[dim],), (n[dim],), m, sigma, window, K, precompute=LUT)

windowLUT = Vector{Vector{T}}(undef,1)
Z = round(Int, 3*K/2)
windowLUT[1] = zeros(T, Z)
for l = 1:Z
y = ((l-1) / (K-1)) * m/n[dim]
windowLUT[1][l] = win(y, n[dim], m, sigma)
end

windowHatInvLUT = Vector{Vector{T}}(undef,1)
windowHatInvLUT[1] = zeros(T, N[dim])
for k = 1:N[dim]
windowHatInvLUT[1][k] = 1. / win_hat(k-1-N[dim]/2, n[dim], m, sigma)
end

B = sparse([],[],T[])

NFFTPlan{D,dim,T}(N, M, reshape(x,1,M), m, sigma, n, K, windowLUT,
windowHatInvLUT, FP, BP, tmpVec, B)
NFFTPlan{D,dim,T}(N, M, reshape(x, 1, M), m, sigma, n, K, windowLUT,
windowHatInvLUT, FP, BP, tmpVec, B)
end

function Base.show(io::IO, p::NFFTPlan{D,0}) where D
function Base.show(io::IO, p::NFFTPlan{D,0}) where {D}
print(io, "NFFTPlan with ", p.M, " sampling points for ", p.N, " array")
end

Expand All @@ -182,8 +176,42 @@ end
size(p::NFFTPlan) = p.N
numFourierSamples(p::NFFTPlan) = p.M


################
# helper function
################
function calcLookUpTable(x::Union{Matrix{T},Vector{T}}, N::NTuple{D,Int}, n, m = 4, sigma = 2.0, window = :kaiser_bessel, K = 2000; precompute::PrecomputeFlags = LUT) where {T,D}

win, win_hat = getWindow(window)
M = size(x, 2)

windowLUT = Vector{Vector{T}}(undef, D)
windowHatInvLUT = Vector{Vector{T}}(undef, D)
for d = 1:D
windowHatInvLUT[d] = Vector{T}(undef, N[d])
@batch for k = 1:N[d]
windowHatInvLUT[d][k] = 1 / win_hat(k - 1 - N[d] / 2, n[d], m, sigma)
end
end

if precompute == LUT
precomputeLUT(win, windowLUT, n, m, sigma, K, T)
B = sparse([],[],T[])
elseif precompute == FULL
B = precomputeB(win, x, N, n, m, M, sigma, K, T)
elseif precompute == FULL_LUT
precomputeLUT(win, windowLUT, n, m, sigma, K, T)
B = precomputeB(windowLUT, x, N, n, m, M, sigma, K, T)
else
error("precompute = $precompute not supported by NFFT.jl!")
end
return (windowLUT, windowHatInvLUT, B)
end



################
# nfft functions
# nfft functions
################
"""
nfft!(p, f, fHat) -> fHat
Expand All @@ -192,7 +220,7 @@ Calculate the NFFT of `f` with plan `p` and store the result in `fHat`.
Both `f` and `fHat` must be complex arrays.
"""
function nfft!(p::NFFTPlan{D,DIM,T}, f::AbstractArray, fHat::StridedArray;
function nfft!(p::NFFTPlan{D,DIM,T}, f::AbstractArray, fHat::StridedArray;
verbose=false, timing::Union{Nothing,TimingStats} = nothing) where {D,DIM,T}
consistencyCheck(p, f, fHat)

Expand All @@ -205,7 +233,7 @@ function nfft!(p::NFFTPlan{D,DIM,T}, f::AbstractArray, fHat::StridedArray;
end
t3 = @elapsed @inbounds convolve!(p, p.tmpVec, fHat)
if verbose
@info "Timing: apod=$t1 fft=$t2 conv=$t3"
@info "Timing: apod=$t1 fft=$t2 conv=$t3"
end
if timing != nothing
timing.conv = t3
Expand All @@ -222,7 +250,7 @@ Calculate the adjoint NFFT of `fHat` and store the result in `f`.
Both `f` and `fHat` must be complex arrays.
"""
function nfft_adjoint!(p::NFFTPlan, fHat::AbstractArray, f::StridedArray;
function nfft_adjoint!(p::NFFTPlan, fHat::AbstractArray, f::StridedArray;
verbose=false, timing::Union{Nothing,TimingStats} = nothing)
consistencyCheck(p, f, fHat)

Expand All @@ -234,7 +262,7 @@ function nfft_adjoint!(p::NFFTPlan, fHat::AbstractArray, f::StridedArray;
end
t3 = @elapsed @inbounds apodization_adjoint!(p, p.tmpVec, f)
if verbose
@info "Timing: conv=$t1 fft=$t2 apod=$t3"
@info "Timing: conv=$t1 fft=$t2 apod=$t3"
end
if timing != nothing
timing.conv_adjoint = t1
Expand Down
12 changes: 7 additions & 5 deletions src/NFFT.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ using ThreadedSparseCSR
import ThreadedSparseCSR: SparseMatrixCSR, sparsecsr
using CUDA
using Graphics: @mustimplement
import Base.size

export AbstractNFFTPlan, plan_nfft, nfft, nfft_adjoint, ndft, ndft_adjoint, TimingStats
import Base.size
export calculateToeplitzKernel, calculateToeplitzKernel!, convolveToeplitzKernel!

@enum PrecomputeFlags begin
LUT = 1
Expand Down Expand Up @@ -57,10 +58,10 @@ end
compute a plan for the NFFT of a size-`N` array at the nodes contained in `x`.
The computing device (CPU or GPU) can be set using the keyworkd argument `device`
The computing device (CPU or GPU) can be set using the keyworkd argument `device`
to NFFT.CPU or NFFT.CUDAGPU
"""
function plan_nfft(x::Matrix{T}, N::NTuple{D,Int}, rest...; device::Device=CPU,
function plan_nfft(x::Matrix{T}, N::NTuple{D,Int}, rest...; device::Device=CPU,
timing::Union{Nothing,TimingStats} = nothing, kargs...) where {T,D}
t = @elapsed begin
if device==CPU
Expand All @@ -86,7 +87,7 @@ and along the direction `dim`.
The computing device (CPU or GPU) can be set using the keyworkd argument `device`.
Currently only NFFT.CPU is supported.
"""
function plan_nfft(x::Vector{T}, dim::Integer, N::NTuple{D,Int}, rest...; device::Device=CPU,
function plan_nfft(x::Vector{T}, dim::Integer, N::NTuple{D,Int}, rest...; device::Device=CPU,
timing::Union{Nothing,TimingStats} = nothing, kargs...) where {T,D}
t = @elapsed begin
if device==CPU
Expand All @@ -103,7 +104,7 @@ function plan_nfft(x::Vector{T}, dim::Integer, N::NTuple{D,Int}, rest...; device
return p
end

function plan_nfft(x::Matrix{T}, dim::Integer, N::NTuple{D,Int}, rest...; device::Device=CPU,
function plan_nfft(x::Matrix{T}, dim::Integer, N::NTuple{D,Int}, rest...; device::Device=CPU,
timing::Union{Nothing,TimingStats} = nothing, kargs...) where {T,D}
t = @elapsed begin
if size(x,1) != 1 && size(x,2) != 1
Expand Down Expand Up @@ -158,6 +159,7 @@ include("directional.jl")
include("multidimensional.jl")
include("samplingDensity.jl")
include("NDFT.jl")
include("Toeplitz.jl")


function __init__()
Expand Down
Loading

0 comments on commit 101d9d9

Please sign in to comment.