Skip to content

Commit 26680c9

Browse files
authored
Merge pull request #116 from FourierFlows/gpu
Towards full GPU functionality
2 parents 1a1e23c + fbbe85b commit 26680c9

18 files changed

+456
-321
lines changed

REQUIRE

Lines changed: 0 additions & 4 deletions
This file was deleted.

src/CuFourierFlows.jl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
using .CuArrays
2+
3+
# Discard `effort` argument for CuArrays
4+
plan_flows_fft(a::CuArray, effort) = plan_fft(a)
5+
plan_flows_rfft(a::CuArray, effort) = plan_rfft(a)
6+
7+
OneDGrid(dev::GPU, args...; kwargs...) = OneDGrid(args...; ArrayType=CuArray, kwargs...)
8+
TwoDGrid(dev::GPU, args...; kwargs...) = TwoDGrid(args...; ArrayType=CuArray, kwargs...)
9+
10+
function Base.zeros(::GPU, T, dims)
11+
a = CuArray{T}(undef, dims...)
12+
a .= 0
13+
return a
14+
end
15+
16+
ArrayType(::GPU, T, dim) = CuArray{T, dim}
17+
ArrayType(::GPU) = CuArray
18+
19+
supersize(a::CuArray) = size(a)
20+
21+
getetdcoeffs(dt, L::CuArray; kwargs...) =
22+
(CuArray(ζ) for ζ in getetdcoeffs(dt, Array(L); kwargs...))
23+
24+
makefilter(K::CuArray; kwargs...) = CuArray(makefilter(Array(K); kwargs...))
25+
26+
function makefilter(g::AbstractGrid{Tg, <:CuArray}, T, sz; kwargs...) where Tg
27+
CuArray(ones(T, sz)) .* makefilter(g; realvars=sz[1]==g.nkr, kwargs...)
28+
end

src/FourierFlows.jl

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
module FourierFlows
22

33
export
4+
# Helper variables and macros for determining if machine is CUDA-enabled.
5+
HAVE_CUDA,
6+
@hascuda,
7+
8+
Device,
9+
CPU,
10+
GPU,
11+
ArrayType,
12+
413
cxtype,
514
fltype,
615
innereltype,
@@ -27,8 +36,10 @@ export
2736
savediagnostic,
2837

2938
@zeros,
39+
@devzeros,
3040
@createarrays,
3141
@superzeros,
42+
devzeros,
3243
superzeros,
3344
supersize,
3445

@@ -47,22 +58,25 @@ using
4758
FFTW,
4859
JLD2,
4960
Statistics,
50-
Interpolations
61+
Interpolations,
62+
Requires
5163

5264
import Base: resize!, getindex, setindex!, lastindex, push!, append!
5365

5466
using Base: fieldnames
5567

5668
using LinearAlgebra: mul!, ldiv!
5769

58-
abstract type AbstractGrid{T} end
59-
abstract type AbstractTwoDGrid{T} <: AbstractGrid{T} end
60-
abstract type AbstractOneDGrid{T} <: AbstractGrid{T} end
70+
abstract type AbstractGrid{T, Ta} end
6171
abstract type AbstractTimeStepper{T} end
6272
abstract type AbstractParams end
6373
abstract type AbstractVars end
6474
abstract type AbstractDiagnostic end
6575

76+
abstract type Device end
77+
struct CPU <: Device end
78+
struct GPU <: Device end
79+
6680
# The main show
6781
include("problem.jl")
6882
include("domains.jl")
@@ -74,4 +88,21 @@ include("timesteppers.jl")
7488
# Physics
7589
include("diffusion.jl")
7690

91+
# Import CUDA utilities if cuda is detected.
92+
const HAVE_CUDA = try
93+
using CuArrays
94+
true
95+
catch
96+
false
97+
end
98+
99+
macro hascuda(ex)
100+
return HAVE_CUDA ? :($(esc(ex))) : :(nothing)
101+
end
102+
103+
104+
function __init__()
105+
@require CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae" include("CuFourierFlows.jl")
106+
end
107+
77108
end # module

src/diffusion.jl

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,48 +25,55 @@ function Problem(;
2525
kappa = 0,
2626
dt = 0.01,
2727
stepper = "RK4",
28-
T = Float64
28+
T = Float64,
29+
dev = CPU()
2930
)
3031

31-
grid = OneDGrid(nx, Lx; T=T)
32-
params = Params(kappa)
33-
vars = Vars(grid)
34-
eqn = Equation(kappa, grid)
32+
grid = OneDGrid(dev, nx, Lx; T=T)
33+
params = Params(dev, kappa)
34+
vars = Vars(dev, grid)
35+
eqn = DiffusionEquation(dev, kappa, grid)
3536

36-
FourierFlows.Problem(eqn, stepper, dt, grid, vars, params)
37+
FourierFlows.Problem(eqn, stepper, dt, grid, vars, params, dev)
3738
end
3839

3940
struct Params{T} <: AbstractParams
4041
kappa::T
4142
end
4243

44+
Params(dev, kappa::Number) = Params(kappa)
45+
Params(dev, kappa::AbstractArray) = Params(ArrayType(dev)(kappa))
46+
4347
"""
4448
Equation(p, g)
4549
4650
Returns the equation for constant diffusivity problem with params p and grid g.
4751
"""
48-
function Equation(kappa::T, g) where T<:Number
49-
FourierFlows.Equation(-kappa*g.kr.^2, calcN!, g)
52+
function DiffusionEquation(dev::Device, kappa::T, grid) where T<:Number
53+
L = zeros(dev, T, grid.nkr)
54+
@. L = -kappa * grid.kr^2
55+
FourierFlows.Equation(L, calcN!, grid)
5056
end
5157

52-
function Equation(kappa::T, g::AbstractGrid{Tg}) where {T<:AbstractArray,Tg}
53-
FourierFlows.Equation(0, calcN!, g; dims=(g.nkr,), T=cxtype(Tg))
58+
function DiffusionEquation(dev::Device, kappa::T, grid::AbstractGrid{Tg}) where {T<:AbstractArray, Tg}
59+
FourierFlows.Equation(0, calcN!, grid; dims=(grid.nkr,), T=cxtype(Tg))
5460
end
5561

56-
# Construct Vars types
57-
const physicalvars = [:c, :cx]
58-
const fouriervars = [:ch, :cxh]
59-
60-
eval(varsexpression(:Vars, physicalvars, fouriervars))
62+
struct Vars{Aphys, Atrans} <: AbstractVars
63+
c :: Aphys
64+
cx :: Aphys
65+
ch :: Atrans
66+
cxh :: Atrans
67+
end
6168

6269
"""
63-
Vars(g)
70+
Vars(dev, grid)
6471
6572
Returns the vars for constant diffusivity problem on grid g.
6673
"""
67-
function Vars(g::AbstractGrid{T}) where T
68-
@zeros T g.nx c cx
69-
@zeros Complex{T} g.nkr ch cxh
74+
function Vars(::Dev, grid::AbstractGrid{T}) where {Dev, T}
75+
@devzeros Dev T grid.nx c cx
76+
@devzeros Dev Complex{T} grid.nkr ch cxh
7077
Vars(c, cx, ch, cxh)
7178
end
7279

src/domains.jl

Lines changed: 48 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1+
plan_flows_fft(a::Array, effort) = plan_fft(a; flags=effort)
2+
plan_flows_rfft(a::Array, effort) = plan_rfft(a; flags=effort)
3+
14
"""
25
ZeroDGrid()
36
47
Constructs a placeholder grid object for "0D" problems (in other words, systems of ODEs).
58
"""
6-
struct ZeroDGrid{T} <: AbstractGrid{T} end
9+
struct ZeroDGrid{T, Ta} <: AbstractGrid{T, Ta} end
710

811
function getaliasedwavenumbers(nk, nkr, aliasfraction)
912
# Index endpoints for aliased i, j wavenumbers
@@ -28,65 +31,67 @@ Constructs a OneDGrid object with size `Lx`, resolution `nx`, and leftmost
2831
position `x0`. FFT plans are generated for `nthreads` CPUs using
2932
FFTW flag `effort`.
3033
"""
31-
struct OneDGrid{T<:AbstractFloat, Ta<:AbstractArray, Tfft, Trfft} <: AbstractOneDGrid{T}
32-
nx::Int
33-
nk::Int
34-
nkr::Int
34+
struct OneDGrid{T<:AbstractFloat, Ta<:AbstractArray, Tfft, Trfft} <: AbstractGrid{T, Ta}
35+
nx :: Int
36+
nk :: Int
37+
nkr :: Int
3538

36-
dx::T
37-
Lx::T
39+
dx :: T
40+
Lx :: T
3841

39-
x::Ta
40-
k::Ta
41-
kr::Ta
42-
invksq::Ta
43-
invkrsq::Ta
42+
x :: Ta
43+
k :: Ta
44+
kr :: Ta
45+
invksq :: Ta
46+
invkrsq :: Ta
4447

45-
fftplan::Tfft
46-
rfftplan::Trfft
48+
fftplan :: Tfft
49+
rfftplan :: Trfft
4750

4851
# Range objects that access the aliased part of the wavenumber range
49-
kalias::UnitRange{Int}
50-
kralias::UnitRange{Int}
52+
kalias :: UnitRange{Int}
53+
kralias :: UnitRange{Int}
5154
end
5255

53-
function OneDGrid(nx, Lx; x0=-Lx/2, nthreads=Sys.CPU_THREADS, effort=FFTW.MEASURE, T=Float64, dealias=1/3)
56+
function OneDGrid(nx, Lx; x0=-Lx/2, nthreads=Sys.CPU_THREADS, effort=FFTW.MEASURE, T=Float64, dealias=1/3,
57+
ArrayType=Array)
5458

5559
dx = Lx/nx
56-
x = Array{T}(range(x0, step=dx, length=nx))
60+
x = ArrayType{T}(range(x0, step=dx, length=nx))
5761

5862
nk = nx
5963
nkr = Int(nx/2+1)
6064

6165
i₁ = 0:Int(nx/2)
6266
i₂ = Int(-nx/2+1):-1
63-
k = Array{T}(2π/Lx*cat(i₁, i₂; dims=1))
64-
kr = Array{T}(2π/Lx*cat(i₁; dims=1))
67+
k = ArrayType{T}(2π/Lx*cat(i₁, i₂; dims=1))
68+
kr = ArrayType{T}(2π/Lx*cat(i₁; dims=1))
6569

6670
invksq = @. 1/k^2
6771
invkrsq = @. 1/kr^2
6872
invksq[1] = 0
6973
invkrsq[1] = 0
7074

7175
FFTW.set_num_threads(nthreads)
72-
fftplan = plan_fft(Array{Complex{T},1}(undef, nx); flags=effort)
73-
rfftplan = plan_rfft(Array{T,1}(undef, nx); flags=effort)
76+
fftplan = plan_flows_fft(ArrayType{Complex{T}, 1}(undef, nx), effort)
77+
rfftplan = plan_flows_rfft(ArrayType{T, 1}(undef, nx), effort)
7478

7579
kalias, kralias = getaliasedwavenumbers(nk, nkr, dealias)
7680

7781
Ta = typeof(x)
7882
Tfft = typeof(fftplan)
7983
Trfft = typeof(rfftplan)
8084

81-
OneDGrid{T, Ta, Tfft, Trfft}(nx, nk, nkr, dx, Lx, x, k, kr, invksq, invkrsq, fftplan, rfftplan, kalias, kralias)
85+
OneDGrid{T, Ta, Tfft, Trfft}(nx, nk, nkr, dx, Lx, x, k, kr,
86+
invksq, invkrsq, fftplan, rfftplan, kalias, kralias)
8287
end
8388

8489
"""
8590
TwoDGrid(nx, Lx, ny=nx, Ly=Lx; x0=-Lx/2, y0=-Ly/2, nthreads=Sys.CPU_THREADS, effort=FFTW.MEASURE)
8691
8792
Constructs a TwoDGrid object.
8893
"""
89-
struct TwoDGrid{T<:AbstractFloat, Ta<:AbstractArray, Tfft, Trfft} <: AbstractTwoDGrid{T}
94+
struct TwoDGrid{T<:AbstractFloat, Ta<:AbstractArray, Tfft, Trfft} <: AbstractGrid{T, Ta}
9095
nx::Int
9196
ny::Int
9297
nk::Int
@@ -117,8 +122,9 @@ struct TwoDGrid{T<:AbstractFloat, Ta<:AbstractArray, Tfft, Trfft} <: AbstractTwo
117122
lalias::UnitRange{Int}
118123
end
119124

120-
function TwoDGrid(nx, Lx, ny=nx, Ly=Lx; x0=-Lx/2, y0=-Ly/2, nthreads=Sys.CPU_THREADS, effort=FFTW.MEASURE, T=Float64,
121-
dealias=1/3)
125+
function TwoDGrid(nx, Lx, ny=nx, Ly=Lx; x0=-Lx/2, y0=-Ly/2, nthreads=Sys.CPU_THREADS,
126+
effort=FFTW.MEASURE, T=Float64, dealias=1/3, ArrayType=Array)
127+
122128
dx = Lx/nx
123129
dy = Ly/ny
124130

@@ -127,18 +133,18 @@ function TwoDGrid(nx, Lx, ny=nx, Ly=Lx; x0=-Lx/2, y0=-Ly/2, nthreads=Sys.CPU_THR
127133
nkr = Int(nx/2+1)
128134

129135
# Physical grid
130-
x = Array{T}(reshape(range(x0, step=dx, length=nx), (nx, 1)))
131-
y = Array{T}(reshape(range(y0, step=dy, length=ny), (1, ny)))
136+
x = ArrayType{T}(reshape(range(x0, step=dx, length=nx), (nx, 1)))
137+
y = ArrayType{T}(reshape(range(y0, step=dy, length=ny), (1, ny)))
132138

133139
# Wavenubmer grid
134140
i₁ = 0:Int(nx/2)
135141
i₂ = Int(-nx/2+1):-1
136142
j₁ = 0:Int(ny/2)
137143
j₂ = Int(-ny/2+1):-1
138144

139-
k = Array{T}(reshape(2π/Lx*cat(i₁, i₂, dims=1), (nk, 1)))
140-
l = Array{T}(reshape(2π/Ly*cat(j₁, j₂, dims=1), (1, nl)))
141-
kr = Array{T}(reshape(2π/Lx*cat(i₁, dims=1), (nkr, 1)))
145+
k = ArrayType{T}(reshape(2π/Lx*cat(i₁, i₂, dims=1), (nk, 1)))
146+
l = ArrayType{T}(reshape(2π/Ly*cat(j₁, j₂, dims=1), (1, nl)))
147+
kr = ArrayType{T}(reshape(2π/Lx*cat(i₁, dims=1), (nkr, 1)))
142148

143149
Ksq = @. k^2 + l^2
144150
invKsq = @. 1/Ksq
@@ -150,8 +156,8 @@ function TwoDGrid(nx, Lx, ny=nx, Ly=Lx; x0=-Lx/2, y0=-Ly/2, nthreads=Sys.CPU_THR
150156

151157
# FFT plans
152158
FFTW.set_num_threads(nthreads)
153-
fftplan = plan_fft(Array{Complex{T},2}(undef, nx, ny); flags=effort)
154-
rfftplan = plan_rfft(Array{T,2}(undef, nx, ny); flags=effort)
159+
fftplan = plan_flows_fft(ArrayType{Complex{T}, 2}(undef, nx, ny), effort)
160+
rfftplan = plan_flows_rfft(ArrayType{T, 2}(undef, nx, ny), effort)
155161

156162
# Index endpoints for aliasfrac i, j wavenumbers
157163
kalias, kralias = getaliasedwavenumbers(nk, nkr, dealias)
@@ -165,15 +171,18 @@ function TwoDGrid(nx, Lx, ny=nx, Ly=Lx; x0=-Lx/2, y0=-Ly/2, nthreads=Sys.CPU_THR
165171
fftplan, rfftplan, kalias, kralias, lalias)
166172
end
167173

174+
OneDGrid(dev::CPU, args...; kwargs...) = OneDGrid(args...; ArrayType=Array, kwargs...)
175+
TwoDGrid(dev::CPU, args...; kwargs...) = TwoDGrid(args...; ArrayType=Array, kwargs...)
176+
168177
"""
169178
gridpoints(g)
170179
171180
Returns the collocation points of the grid `g` in 2D arrays `X, Y`.
172181
"""
173-
function gridpoints(g)
182+
function gridpoints(g::AbstractGrid{T, A}) where {T, A}
174183
X = [ g.x[i] for i=1:g.nx, j=1:g.ny]
175184
Y = [ g.y[j] for i=1:g.nx, j=1:g.ny]
176-
X, Y
185+
A(X), A(Y)
177186
end
178187

179188
"""
@@ -211,7 +220,7 @@ for K>innerK, thus removing high-wavenumber content from a spectrum it is multip
211220
The decay rate is determined by order and outerK determines the outer wavenumber at which
212221
the filter is smaller than Float64 machine precision.
213222
"""
214-
function makefilter(K::AbstractArray; order=4, innerK=0.65, outerK=1)
223+
function makefilter(K::Array; order=4, innerK=0.65, outerK=1)
215224
TK = typeof(K)
216225
K = Array(K)
217226
decay = 15*log(10) / (outerK-innerK)^order # decay rate for filtering function
@@ -220,16 +229,16 @@ function makefilter(K::AbstractArray; order=4, innerK=0.65, outerK=1)
220229
TK(filt)
221230
end
222231

223-
function makefilter(g::AbstractTwoDGrid; realvars=true, kwargs...)
232+
function makefilter(g::TwoDGrid; realvars=true, kwargs...)
224233
K = realvars ?
225234
@.(sqrt((g.kr*g.dx/π)^2 + (g.l*g.dy/π)^2)) : @.(sqrt((g.k*g.dx/π)^2 + (g.l*g.dy/π)^2))
226235
makefilter(K; kwargs...)
227236
end
228237

229-
function makefilter(g::AbstractOneDGrid; realvars=true, kwargs...)
238+
function makefilter(g::OneDGrid; realvars=true, kwargs...)
230239
K = realvars ? g.kr*g.dx/π : @.(abs(g.k*g.dx/π))
231240
makefilter(K; kwargs...)
232241
end
233242

234-
makefilter(g, T, sz; kwargs...) = ones(T, sz).*makefilter(g; realvars=sz[1]==g.nkr, kwargs...)
243+
makefilter(g, T, sz; kwargs...) = ones(T, sz) .* makefilter(g; realvars=sz[1]==g.nkr, kwargs...)
235244
makefilter(eq) = makefilter(eq.grid, fltype(eq.T), eq.dims)

0 commit comments

Comments
 (0)