FourierFlows
diff --git a/‎REQUIRE
Lines changed: 0 additions & 4 deletions b/‎REQUIRE
Lines changed: 0 additions & 4 deletions
diff --git a/‎src/CuFourierFlows.jl
Lines changed: 28 additions & 0 deletions b/‎src/CuFourierFlows.jl
Lines changed: 28 additions & 0 deletions
diff --git a/‎src/FourierFlows.jl
Lines changed: 35 additions & 4 deletions b/‎src/FourierFlows.jl
Lines changed: 35 additions & 4 deletions
diff --git a/‎src/diffusion.jl
Lines changed: 26 additions & 19 deletions b/‎src/diffusion.jl
Lines changed: 26 additions & 19 deletions
diff --git a/‎src/domains.jl
Lines changed: 48 additions & 39 deletions b/‎src/domains.jl
Lines changed: 48 additions & 39 deletions
@@ -0,0 +1,28 @@
+using .CuArrays
+
+# Discard `effort` argument for CuArrays
+plan_flows_fft(a::CuArray, effort) = plan_fft(a)
+plan_flows_rfft(a::CuArray, effort) = plan_rfft(a)
+
+OneDGrid(dev::GPU, args...; kwargs...) = OneDGrid(args...; ArrayType=CuArray, kwargs...)
+TwoDGrid(dev::GPU, args...; kwargs...) = TwoDGrid(args...; ArrayType=CuArray, kwargs...)
+
+function Base.zeros(::GPU, T, dims)
+    a = CuArray{T}(undef, dims...)
+    a .= 0
+    return a
+end
+
+ArrayType(::GPU, T, dim) = CuArray{T, dim}
+ArrayType(::GPU) = CuArray
+
+supersize(a::CuArray) = size(a)
+
+getetdcoeffs(dt, L::CuArray; kwargs...) = 
+    (CuArray(ζ) for ζ in getetdcoeffs(dt, Array(L); kwargs...))
+
+makefilter(K::CuArray; kwargs...) = CuArray(makefilter(Array(K); kwargs...))
+
+function makefilter(g::AbstractGrid{Tg, <:CuArray}, T, sz; kwargs...) where Tg
+    CuArray(ones(T, sz)) .* makefilter(g; realvars=sz[1]==g.nkr, kwargs...)
+end
@@ -1,6 +1,15 @@
 module FourierFlows
 
 export
+  # Helper variables and macros for determining if machine is CUDA-enabled.
+  HAVE_CUDA,
+  @hascuda,
+  
+  Device,
+  CPU,
+  GPU,
+  ArrayType,
+
   cxtype,
   fltype,
   innereltype,
@@ -27,8 +36,10 @@ export
   savediagnostic,
 
   @zeros,
+  @devzeros,
   @createarrays,
   @superzeros,
+  devzeros,
   superzeros,
   supersize,
 
@@ -47,22 +58,25 @@ using
   FFTW,
   JLD2,
   Statistics,
-  Interpolations
+  Interpolations,
+  Requires
 
 import Base: resize!, getindex, setindex!, lastindex, push!, append!
 
 using Base: fieldnames
 
 using LinearAlgebra: mul!, ldiv!
 
-abstract type AbstractGrid{T} end
-abstract type AbstractTwoDGrid{T} <: AbstractGrid{T} end
-abstract type AbstractOneDGrid{T} <: AbstractGrid{T} end
+abstract type AbstractGrid{T, Ta} end
 abstract type AbstractTimeStepper{T} end
 abstract type AbstractParams end
 abstract type AbstractVars end
 abstract type AbstractDiagnostic end
 
+abstract type Device end
+struct CPU <: Device end
+struct GPU <: Device end
+
 # The main show
 include("problem.jl")
 include("domains.jl")
@@ -74,4 +88,21 @@ include("timesteppers.jl")
 # Physics
 include("diffusion.jl")
 
+# Import CUDA utilities if cuda is detected.
+const HAVE_CUDA = try
+    using CuArrays
+    true
+catch
+    false
+end
+
+macro hascuda(ex)
+    return HAVE_CUDA ? :($(esc(ex))) : :(nothing)
+end
+
+
+function __init__()
+    @require CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae" include("CuFourierFlows.jl")
+end
+
 end # module
@@ -25,48 +25,55 @@ function Problem(;
          kappa = 0,
             dt = 0.01,
        stepper = "RK4",
-             T = Float64
+             T = Float64,
+           dev = CPU()
   )
 
-    grid = OneDGrid(nx, Lx; T=T)
-  params = Params(kappa)
-    vars = Vars(grid)
-     eqn = Equation(kappa, grid)
+    grid = OneDGrid(dev, nx, Lx; T=T)
+  params = Params(dev, kappa)
+    vars = Vars(dev, grid)
+     eqn = DiffusionEquation(dev, kappa, grid)
 
-  FourierFlows.Problem(eqn, stepper, dt, grid, vars, params)
+  FourierFlows.Problem(eqn, stepper, dt, grid, vars, params, dev)
 end
 
 struct Params{T} <: AbstractParams
   kappa::T
 end
 
+Params(dev, kappa::Number) = Params(kappa)
+Params(dev, kappa::AbstractArray) = Params(ArrayType(dev)(kappa))
+
 """
     Equation(p, g)
 
 Returns the equation for constant diffusivity problem with params p and grid g.
 """
-function Equation(kappa::T, g) where T<:Number
-  FourierFlows.Equation(-kappa*g.kr.^2, calcN!, g)
+function DiffusionEquation(dev::Device, kappa::T, grid) where T<:Number
+  L = zeros(dev, T, grid.nkr)
+  @. L = -kappa * grid.kr^2
+  FourierFlows.Equation(L, calcN!, grid)
 end
 
-function Equation(kappa::T, g::AbstractGrid{Tg}) where {T<:AbstractArray,Tg}
-  FourierFlows.Equation(0, calcN!, g; dims=(g.nkr,), T=cxtype(Tg))
+function DiffusionEquation(dev::Device, kappa::T, grid::AbstractGrid{Tg}) where {T<:AbstractArray, Tg}
+  FourierFlows.Equation(0, calcN!, grid; dims=(grid.nkr,), T=cxtype(Tg))
 end
 
-# Construct Vars types
-const physicalvars = [:c, :cx]
-const  fouriervars = [:ch, :cxh]
-
-eval(varsexpression(:Vars, physicalvars, fouriervars))
+struct Vars{Aphys, Atrans} <: AbstractVars
+    c :: Aphys
+   cx :: Aphys
+   ch :: Atrans
+  cxh :: Atrans
+end
 
 """
-    Vars(g)
+    Vars(dev, grid)
 
 Returns the vars for constant diffusivity problem on grid g.
 """
-function Vars(g::AbstractGrid{T}) where T
-  @zeros T g.nx c cx
-  @zeros Complex{T} g.nkr ch cxh
+function Vars(::Dev, grid::AbstractGrid{T}) where {Dev, T}
+  @devzeros Dev T grid.nx c cx
+  @devzeros Dev Complex{T} grid.nkr ch cxh
   Vars(c, cx, ch, cxh)
 end
 
 
@@ -1,9 +1,12 @@
+plan_flows_fft(a::Array, effort) = plan_fft(a; flags=effort)
+plan_flows_rfft(a::Array, effort) = plan_rfft(a; flags=effort)
+
 """
     ZeroDGrid()
 
 Constructs a placeholder grid object for "0D" problems (in other words, systems of ODEs).
 """
-struct ZeroDGrid{T} <: AbstractGrid{T} end
+struct ZeroDGrid{T, Ta} <: AbstractGrid{T, Ta} end
 
 function getaliasedwavenumbers(nk, nkr, aliasfraction)
   # Index endpoints for aliased i, j wavenumbers
@@ -28,65 +31,67 @@ Constructs a OneDGrid object with size `Lx`, resolution `nx`, and leftmost
 position `x0`. FFT plans are generated for `nthreads` CPUs using
 FFTW flag `effort`.
 """
-struct OneDGrid{T<:AbstractFloat, Ta<:AbstractArray, Tfft, Trfft} <: AbstractOneDGrid{T}
-  nx::Int
-  nk::Int
-  nkr::Int
+struct OneDGrid{T<:AbstractFloat, Ta<:AbstractArray, Tfft, Trfft} <: AbstractGrid{T, Ta}
+        nx :: Int
+        nk :: Int
+       nkr :: Int
 
-  dx::T
-  Lx::T
+        dx :: T
+        Lx :: T
 
-  x::Ta
-  k::Ta
-  kr::Ta
-  invksq::Ta
-  invkrsq::Ta
+         x :: Ta
+         k :: Ta
+        kr :: Ta
+    invksq :: Ta
+   invkrsq :: Ta
 
-  fftplan::Tfft
-  rfftplan::Trfft
+   fftplan :: Tfft
+  rfftplan :: Trfft
 
   # Range objects that access the aliased part of the wavenumber range
-  kalias::UnitRange{Int}
-  kralias::UnitRange{Int}
+    kalias :: UnitRange{Int}
+   kralias :: UnitRange{Int}
 end
 
-function OneDGrid(nx, Lx; x0=-Lx/2, nthreads=Sys.CPU_THREADS, effort=FFTW.MEASURE, T=Float64, dealias=1/3)
+function OneDGrid(nx, Lx; x0=-Lx/2, nthreads=Sys.CPU_THREADS, effort=FFTW.MEASURE, T=Float64, dealias=1/3,
+                  ArrayType=Array)
 
   dx = Lx/nx
-  x = Array{T}(range(x0, step=dx, length=nx))
+  x = ArrayType{T}(range(x0, step=dx, length=nx))
 
   nk = nx
   nkr = Int(nx/2+1)
 
   i₁ = 0:Int(nx/2)
   i₂ = Int(-nx/2+1):-1
-   k = Array{T}(2π/Lx*cat(i₁, i₂; dims=1))
-  kr = Array{T}(2π/Lx*cat(i₁; dims=1))
+   k = ArrayType{T}(2π/Lx*cat(i₁, i₂; dims=1))
+  kr = ArrayType{T}(2π/Lx*cat(i₁; dims=1))
 
    invksq = @. 1/k^2
   invkrsq = @. 1/kr^2
    invksq[1] = 0
   invkrsq[1] = 0
 
   FFTW.set_num_threads(nthreads)
-   fftplan = plan_fft(Array{Complex{T},1}(undef, nx); flags=effort)
-  rfftplan = plan_rfft(Array{T,1}(undef, nx); flags=effort)
+   fftplan = plan_flows_fft(ArrayType{Complex{T}, 1}(undef, nx), effort)
+  rfftplan = plan_flows_rfft(ArrayType{T, 1}(undef, nx), effort)
 
   kalias, kralias = getaliasedwavenumbers(nk, nkr, dealias)
 
   Ta = typeof(x)
   Tfft = typeof(fftplan)
   Trfft = typeof(rfftplan)
 
-  OneDGrid{T, Ta, Tfft, Trfft}(nx, nk, nkr, dx, Lx, x, k, kr, invksq, invkrsq, fftplan, rfftplan, kalias, kralias)
+  OneDGrid{T, Ta, Tfft, Trfft}(nx, nk, nkr, dx, Lx, x, k, kr, 
+                               invksq, invkrsq, fftplan, rfftplan, kalias, kralias)
 end
 
 """
     TwoDGrid(nx, Lx, ny=nx, Ly=Lx; x0=-Lx/2, y0=-Ly/2, nthreads=Sys.CPU_THREADS, effort=FFTW.MEASURE)
 
 Constructs a TwoDGrid object.
 """
-struct TwoDGrid{T<:AbstractFloat, Ta<:AbstractArray, Tfft, Trfft} <: AbstractTwoDGrid{T}
+struct TwoDGrid{T<:AbstractFloat, Ta<:AbstractArray, Tfft, Trfft} <: AbstractGrid{T, Ta}
   nx::Int
   ny::Int
   nk::Int
@@ -117,8 +122,9 @@ struct TwoDGrid{T<:AbstractFloat, Ta<:AbstractArray, Tfft, Trfft} <: AbstractTwo
   lalias::UnitRange{Int}
 end
 
-function TwoDGrid(nx, Lx, ny=nx, Ly=Lx; x0=-Lx/2, y0=-Ly/2, nthreads=Sys.CPU_THREADS, effort=FFTW.MEASURE, T=Float64,
-                  dealias=1/3)
+function TwoDGrid(nx, Lx, ny=nx, Ly=Lx; x0=-Lx/2, y0=-Ly/2, nthreads=Sys.CPU_THREADS, 
+                  effort=FFTW.MEASURE, T=Float64, dealias=1/3, ArrayType=Array)
+                  
   dx = Lx/nx
   dy = Ly/ny
 
@@ -127,18 +133,18 @@ function TwoDGrid(nx, Lx, ny=nx, Ly=Lx; x0=-Lx/2, y0=-Ly/2, nthreads=Sys.CPU_THR
   nkr = Int(nx/2+1)
 
   # Physical grid
-  x = Array{T}(reshape(range(x0, step=dx, length=nx), (nx, 1)))
-  y = Array{T}(reshape(range(y0, step=dy, length=ny), (1, ny)))
+  x = ArrayType{T}(reshape(range(x0, step=dx, length=nx), (nx, 1)))
+  y = ArrayType{T}(reshape(range(y0, step=dy, length=ny), (1, ny)))
 
   # Wavenubmer grid
   i₁ = 0:Int(nx/2)
   i₂ = Int(-nx/2+1):-1
   j₁ = 0:Int(ny/2)
   j₂ = Int(-ny/2+1):-1
 
-   k = Array{T}(reshape(2π/Lx*cat(i₁, i₂, dims=1), (nk, 1)))
-   l = Array{T}(reshape(2π/Ly*cat(j₁, j₂, dims=1), (1, nl)))
-  kr = Array{T}(reshape(2π/Lx*cat(i₁, dims=1), (nkr, 1)))
+   k = ArrayType{T}(reshape(2π/Lx*cat(i₁, i₂, dims=1), (nk, 1)))
+   l = ArrayType{T}(reshape(2π/Ly*cat(j₁, j₂, dims=1), (1, nl)))
+  kr = ArrayType{T}(reshape(2π/Lx*cat(i₁, dims=1), (nkr, 1)))
 
      Ksq = @. k^2 + l^2
   invKsq = @. 1/Ksq
@@ -150,8 +156,8 @@ function TwoDGrid(nx, Lx, ny=nx, Ly=Lx; x0=-Lx/2, y0=-Ly/2, nthreads=Sys.CPU_THR
 
   # FFT plans
   FFTW.set_num_threads(nthreads)
-  fftplan = plan_fft(Array{Complex{T},2}(undef, nx, ny); flags=effort)
-  rfftplan = plan_rfft(Array{T,2}(undef, nx, ny); flags=effort)
+  fftplan = plan_flows_fft(ArrayType{Complex{T}, 2}(undef, nx, ny), effort)
+  rfftplan = plan_flows_rfft(ArrayType{T, 2}(undef, nx, ny), effort)
 
   # Index endpoints for aliasfrac i, j wavenumbers
   kalias, kralias = getaliasedwavenumbers(nk, nkr, dealias)
@@ -165,15 +171,18 @@ function TwoDGrid(nx, Lx, ny=nx, Ly=Lx; x0=-Lx/2, y0=-Ly/2, nthreads=Sys.CPU_THR
            fftplan, rfftplan, kalias, kralias, lalias)
 end
 
+OneDGrid(dev::CPU, args...; kwargs...) = OneDGrid(args...; ArrayType=Array, kwargs...)
+TwoDGrid(dev::CPU, args...; kwargs...) = TwoDGrid(args...; ArrayType=Array, kwargs...)
+
 """
     gridpoints(g)
 
 Returns the collocation points of the grid `g` in 2D arrays `X, Y`.
 """
-function gridpoints(g)
+function gridpoints(g::AbstractGrid{T, A}) where {T, A}
   X = [ g.x[i] for i=1:g.nx, j=1:g.ny]
   Y = [ g.y[j] for i=1:g.nx, j=1:g.ny]
-  X, Y
+  A(X), A(Y)
 end
 
 """
@@ -211,7 +220,7 @@ for K>innerK, thus removing high-wavenumber content from a spectrum it is multip
 The decay rate is determined by order and outerK determines the outer wavenumber at which
 the filter is smaller than Float64 machine precision.
 """
-function makefilter(K::AbstractArray; order=4, innerK=0.65, outerK=1)
+function makefilter(K::Array; order=4, innerK=0.65, outerK=1)
   TK = typeof(K)
   K = Array(K)
   decay = 15*log(10) / (outerK-innerK)^order # decay rate for filtering function
@@ -220,16 +229,16 @@ function makefilter(K::AbstractArray; order=4, innerK=0.65, outerK=1)
   TK(filt)
 end
 
-function makefilter(g::AbstractTwoDGrid; realvars=true, kwargs...)
+function makefilter(g::TwoDGrid; realvars=true, kwargs...)
   K = realvars ?
       @.(sqrt((g.kr*g.dx/π)^2 + (g.l*g.dy/π)^2)) : @.(sqrt((g.k*g.dx/π)^2 + (g.l*g.dy/π)^2))
   makefilter(K; kwargs...)
 end
 
-function makefilter(g::AbstractOneDGrid; realvars=true, kwargs...)
+function makefilter(g::OneDGrid; realvars=true, kwargs...)
   K = realvars ? g.kr*g.dx/π : @.(abs(g.k*g.dx/π))
   makefilter(K; kwargs...)
 end
 
-makefilter(g, T, sz; kwargs...) = ones(T, sz).*makefilter(g; realvars=sz[1]==g.nkr, kwargs...)
+makefilter(g, T, sz; kwargs...) = ones(T, sz) .* makefilter(g; realvars=sz[1]==g.nkr, kwargs...)
 makefilter(eq) = makefilter(eq.grid, fltype(eq.T), eq.dims)