Merge pull request #295 from FluxML/develop

yuehhua · web-flow · commit c306989f3aeb · 2022-05-20T02:17:56.000+08:00
Add SAGEConv
diff --git a/Project.toml b/Project.toml
@@ -21,6 +21,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Word2Vec = "c64b6f0f-98cd-51d1-af78-58ae84944834"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
@@ -37,6 +38,7 @@ NNlib = "0.8"
 NNlibCUDA = "0.2"
 Optimisers = "0.2"
 Reexport = "1.1"
+StatsBase = "0.33"
 Word2Vec = "0.5"
 Zygote = "0.6"
 julia = "1.6"
diff --git a/src/GeometricFlux.jl b/src/GeometricFlux.jl
@@ -2,7 +2,7 @@ module GeometricFlux
 
 using DelimitedFiles
 using SparseArrays
-using Statistics: mean
+using Statistics, StatsBase
 using LinearAlgebra
 using Random
 using Reexport
@@ -41,6 +41,9 @@ export
     EdgeConv,
     GINConv,
     CGConv,
+    SAGEConv,
+    MeanAggregator, MeanPoolAggregator, MaxPoolAggregator,
+    LSTMAggregator,
 
     # layer/pool
     GlobalPool,
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -765,3 +765,177 @@ function Base.show(io::IO, l::CGConv)
     edge_dim = d - 2*node_dim
     print(io, "CGConv(node dim=", node_dim, ", edge dim=", edge_dim, ")")
 end
+
+"""
+    SAGEConv(in => out, σ=identity, aggr=mean; normalize=true, project=false,
+             bias=true, num_sample=10, init=glorot_uniform)
+
+SAmple and aggreGatE convolutional layer for GraphSAGE network.
+
+# Arguments
+
+- `in`: The dimension of input features.
+- `out`: The dimension of output features.
+- `σ`: Activation function.
+- `aggr`: An aggregate function applied to the result of message function. `mean`, `max`,
+`LSTM` and `GCNConv` are available.
+- `normalize::Bool`: Whether to normalize features across all nodes or not.
+- `project::Bool`: Whether to project, i.e. `Dense(in, in)`, before aggregation.
+- `bias`: Add learnable bias.
+- `num_sample::Int`: Number of samples for each node from their neighbors.
+- `init`: Weights' initializer.
+
+# Examples
+
+```jldoctest
+julia> SAGEConv(1024=>256, relu)
+SAGEConv(1024 => 256, relu, aggr=mean, normalize=true, #sample=10)
+
+julia> SAGEConv(1024=>256, relu, num_sample=5)
+SAGEConv(1024 => 256, relu, aggr=mean, normalize=true, #sample=5)
+
+julia> MeanAggregator(1024=>256, relu, normalize=false)
+SAGEConv(1024 => 256, relu, aggr=mean, normalize=false, #sample=10)
+
+julia> MeanPoolAggregator(1024=>256, relu)
+SAGEConv(1024 => 256, relu, project=Dense(1024 => 1024), aggr=mean, normalize=true, #sample=10)
+
+julia> MaxPoolAggregator(1024=>256, relu)
+SAGEConv(1024 => 256, relu, project=Dense(1024 => 1024), aggr=max, normalize=true, #sample=10)
+
+julia> LSTMAggregator(1024=>256, relu)
+SAGEConv(1024 => 256, relu, aggr=LSTMCell(1024 => 1024), normalize=true, #sample=10)
+```
+
+See also [`WithGraph`](@ref) for training layer with static graph and [`MeanAggregator`](@ref),
+[`MeanPoolAggregator`](@ref), [`MaxPoolAggregator`](@ref) and [`LSTMAggregator`](@ref).
+"""
+struct SAGEConv{A,B,F,P,O} <: MessagePassing
+    weight1::A
+    weight2::A
+    bias::B
+    σ::F
+    proj::P
+    aggr::O
+    normalize::Bool
+    num_sample::Int
+end
+
+function SAGEConv(ch::Pair{Int,Int}, σ=identity, aggr=mean;
+                  normalize::Bool=true, project::Bool=false, bias::Bool=true,
+                  num_sample::Int=10, init=glorot_uniform)
+    in, out = ch
+    weight1 = init(out, in)
+    weight2 = init(out, in)
+    bias = Flux.create_bias(weight1, bias, out)
+    proj = project ? Dense(in, in) : identity
+    return SAGEConv(weight1, weight2, bias, σ, proj, aggr, normalize, num_sample)
+end
+
+@functor SAGEConv
+
+message(l::SAGEConv, x_i, x_j::AbstractArray, e) = l.proj(x_j)
+
+function aggregate_neighbors(l::SAGEConv, el::NamedTuple, aggr, E)
+    batch_size = size(E)[end]
+    sample_idx = sample_node_index(E, l.num_sample; dims=2)
+    idx = ntuple(i -> (i == 2) ? sample_idx : Colon(), ndims(E))
+    dstsize = (size(E, 1), el.N, batch_size)  # ensure outcome has the same dimension as x in update
+    xs = batched_index(el.xs[sample_idx], batch_size)
+    Ē = _scatter(aggr, E[idx...], xs, dstsize)
+    return Ē
+end
+
+function aggregate_neighbors(l::SAGEConv, el::NamedTuple, aggr, E::AbstractMatrix)
+    sample_idx = sample_node_index(E, l.num_sample; dims=2)
+    idx = ntuple(i -> (i == 2) ? sample_idx : Colon(), ndims(E))
+    dstsize = (size(E, 1), el.N)  # ensure outcome has the same dimension as x in update
+    Ē = _scatter(aggr, E[idx...], el.xs[sample_idx], dstsize)
+    return Ē
+end
+
+aggregate_neighbors(::SAGEConv, el::NamedTuple, lstm::Flux.LSTMCell, E::AbstractArray) =
+    throw(ArgumentError("SAGEConv with LSTM aggregator does not support batch learning."))
+
+function aggregate_neighbors(::SAGEConv, el::NamedTuple, lstm::Flux.LSTMCell, E::AbstractMatrix)
+    sample_idx = sample_node_index(E, el.N; dims=2)
+    idx = ntuple(i -> (i == 2) ? sample_idx : Colon(), ndims(E))
+    state, Ē = lstm(lstm.state0, E[idx...])
+    return Ē
+end
+
+function update(l::SAGEConv, m::AbstractArray, x::AbstractArray)
+    y = l.σ.(_matmul(l.weight1, x) + _matmul(l.weight2, m) .+ l.bias)
+    l.normalize && (y = l2normalize(y; dims=2))  # across all nodes
+    return y
+end
+
+# For variable graph
+function (l::SAGEConv)(fg::AbstractFeaturedGraph)
+    nf = node_feature(fg)
+    GraphSignals.check_num_nodes(fg, nf)
+    _, V, _ = propagate(l, graph(fg), nothing, nf, nothing, l.aggr, nothing, nothing)
+    return ConcreteFeaturedGraph(fg, nf=V)
+end
+
+# For static graph
+function (l::SAGEConv)(el::NamedTuple, x::AbstractArray)
+    GraphSignals.check_num_nodes(el.N, x)
+    _, V, _ = propagate(l, el, nothing, x, nothing, l.aggr, nothing, nothing)
+    return V
+end
+
+function Base.show(io::IO, l::SAGEConv)
+    out_channel, in_channel = size(l.weight1)
+    print(io, "SAGEConv(", in_channel, " => ", out_channel)
+    l.σ == identity || print(io, ", ", l.σ)
+    l.proj == identity || print(io, ", project=", l.proj)
+    print(io, ", aggr=", l.aggr)
+    print(io, ", normalize=", l.normalize)
+    print(io, ", #sample=", l.num_sample)
+    print(io, ")")
+end
+
+"""
+    MeanAggregator(in => out, σ=identity; normalize=true, project=false,
+                   bias=true, num_sample=10, init=glorot_uniform)
+
+SAGEConv with mean aggregator.
+
+See also [`SAGEConv`](@ref).
+"""
+MeanAggregator(args...; kwargs...) = SAGEConv(args..., mean; kwargs...)
+
+"""
+    MeanAggregator(in => out, σ=identity; normalize=true,
+                   bias=true, num_sample=10, init=glorot_uniform)
+
+SAGEConv with meanpool aggregator.
+
+See also [`SAGEConv`](@ref).
+"""
+MeanPoolAggregator(args...; kwargs...) = SAGEConv(args..., mean; project=true, kwargs...)
+
+"""
+    MeanAggregator(in => out, σ=identity; normalize=true,
+                   bias=true, num_sample=10, init=glorot_uniform)
+
+SAGEConv with maxpool aggregator.
+
+See also [`SAGEConv`](@ref).
+"""
+MaxPoolAggregator(args...; kwargs...) = SAGEConv(args..., max; project=true, kwargs...)
+
+
+"""
+    LSTMAggregator(in => out, σ=identity; normalize=true, project=false,
+                   bias=true, num_sample=10, init=glorot_uniform)
+
+SAGEConv with LSTM aggregator.
+
+See also [`SAGEConv`](@ref).
+"""
+function LSTMAggregator(args...; kwargs...)
+    in_ch = args[1][1]
+    return SAGEConv(args..., Flux.LSTMCell(in_ch, in_ch); kwargs...)
+end
diff --git a/src/operation.jl b/src/operation.jl
@@ -22,6 +22,11 @@ aggregate(::typeof(max), X) = maximum(X, dims=2)
 aggregate(::typeof(min), X) = minimum(X, dims=2)
 aggregate(::typeof(mean), X) = mean(X, dims=2)
 
+function l2normalize(X::AbstractArray; dims=1)
+    l2norm = .√(sum(abs2, X, dims=dims))
+    return X ./ l2norm
+end
+
 function incidence_matrix(xs::AbstractVector{T}, N) where {T}
     A = similar(xs, T, size(xs, 1), N)
     copyto!(A, Array(I(N))[Array(xs), :])
diff --git a/src/sampling.jl b/src/sampling.jl
@@ -53,3 +53,10 @@ function alias_sample(J::AbstractVector{<:Integer}, q::AbstractVector{<:Real})
         return J[small_index]
     end
 end
+
+function sample_node_index(X::AbstractArray, num_sample::Int; dims::Int=1)
+    n = size(X, dims)
+    return StatsBase.sample(1:n, num_sample, replace=false)
+end
+
+@non_differentiable sample_node_index(x...)
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
@@ -377,4 +377,51 @@
             @test length(g.grads) == 4
         end
     end
+
+    @testset "SAGEConv" begin
+        aggregators = [MeanAggregator, MeanPoolAggregator, MaxPoolAggregator,
+                       LSTMAggregator]
+        @testset "layer without graph" begin
+            for conv in aggregators
+                l = conv(in_channel=>out_channel, relu, num_sample=3)
+
+                X = rand(T, in_channel, N)
+                fg = FeaturedGraph(adj, nf=X)
+                fg_ = l(fg)
+                @test size(node_feature(fg_)) == (out_channel, N)
+                @test_throws MethodError l(X)
+
+                g = Zygote.gradient(() -> sum(node_feature(l(fg))), Flux.params(l))
+                if l.proj == identity
+                    if conv == LSTMAggregator
+                        @test length(g.grads) == 10
+                    else
+                        @test length(g.grads) == 5
+                    end
+                else
+                    @test length(g.grads) == 7
+                end
+            end
+        end
+        
+        @testset "layer with static graph" begin
+            for conv in aggregators
+                X = rand(T, in_channel, N, batch_size)
+                l = WithGraph(fg, conv(in_channel=>out_channel, relu, num_sample=3))
+                if conv == LSTMAggregator
+                    @test_throws ArgumentError l(X)
+                else
+                    Y = l(X)
+                    @test size(Y) == (out_channel, N, batch_size)
+
+                    g = Zygote.gradient(() -> sum(l(X)), Flux.params(l))
+                    if l.layer.proj == identity
+                        @test length(g.grads) == 3
+                    else
+                        @test length(g.grads) == 5
+                    end
+                end
+            end
+        end
+    end
 end