Merge pull request #251 from FluxML/examples

yuehhua · web-flow · commit 2763c22fa800 · 2021-11-21T23:15:03.000+08:00
Update GCN model
diff --git a/examples/gcn.jl b/examples/gcn.jl
@@ -1,4 +1,5 @@
 using GeometricFlux
+using GraphSignals
 using Flux
 using Flux: onehotbatch, onecold, logitcrossentropy, throttle
 using Flux: @epochs
@@ -7,9 +8,6 @@ using Statistics
 using SparseArrays
 using Graphs.SimpleGraphs
 using CUDA
-using Random
-
-Random.seed!([0x6044b4da, 0xd873e4f9, 0x59d90c0a, 0xde01aa81])
 
 @load "data/cora_features.jld2" features
 @load "data/cora_labels.jld2" labels
@@ -19,21 +17,25 @@ num_nodes = 2708
 num_features = 1433
 hidden = 16
 target_catg = 7
-epochs = 100
+epochs = 200
+λ = 5e-4
 
 ## Preprocessing data
 train_X = Matrix{Float32}(features) |> gpu  # dim: num_features * num_nodes
 train_y = Matrix{Float32}(labels) |> gpu  # dim: target_catg * num_nodes
-fg = FeaturedGraph(g) |> gpu
+fg = FeaturedGraph(g)  # pass to gpu together in model layers
 
 ## Model
 model = Chain(GCNConv(fg, num_features=>hidden, relu),
               Dropout(0.5),
               GCNConv(fg, hidden=>target_catg),
-              ) |> gpu
+              ) |> gpu;
+# do not show model architecture, showing CuSparseMatrix will trigger errors
 
 ## Loss
-loss(x, y) = logitcrossentropy(model(x), y)
+l2norm(x) = sum(abs2, x)
+# cross entropy with first layer L2 regularization 
+loss(x, y) = logitcrossentropy(model(x), y) + λ*sum(l2norm, Flux.params(model[1]))
 accuracy(x, y) = mean(onecold(softmax(cpu(model(x)))) .== onecold(cpu(y)))
 
 
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -36,6 +36,8 @@ GCNConv(ch::Pair{Int,Int}, σ = identity; kwargs...) =
 
 @functor GCNConv
 
+Flux.trainable(l::GCNConv) = (l.weight, l.bias)
+
 function (l::GCNConv)(fg::FeaturedGraph, x::AbstractMatrix)
     Ã = Zygote.ignore() do
         GraphSignals.normalized_adjacency_matrix(fg, eltype(x); selfloop=true)
@@ -87,6 +89,8 @@ ChebConv(ch::Pair{Int,Int}, k::Int; kwargs...) =
 
 @functor ChebConv
 
+Flux.trainable(l::ChebConv) = (l.weight, l.bias)
+
 function (c::ChebConv)(fg::FeaturedGraph, X::AbstractMatrix{T}) where T
     GraphSignals.check_num_nodes(fg, X)
     @assert size(X, 1) == size(c.weight, 2) "Input feature size must match input channel size."
@@ -155,6 +159,8 @@ GraphConv(ch::Pair{Int,Int}, σ=identity, aggr=+; kwargs...) =
 
 @functor GraphConv
 
+Flux.trainable(l::GraphConv) = (l.weight1, l.weight2, l.bias)
+
 message(gc::GraphConv, x_i, x_j::AbstractVector, e_ij) = gc.weight2 * x_j
 
 update(gc::GraphConv, m::AbstractVector, x::AbstractVector) = gc.σ.(gc.weight1*x .+ m .+ gc.bias)
@@ -224,6 +230,8 @@ GATConv(ch::Pair{Int,Int}; kwargs...) = GATConv(NullGraph(), ch; kwargs...)
 
 @functor GATConv
 
+Flux.trainable(l::GATConv) = (l.weight, l.bias, l.a)
+
 # Here the α that has not been softmaxed is the first number of the output message
 function message(gat::GATConv, x_i::AbstractVector, x_j::AbstractVector)
     x_i = reshape(gat.weight*x_i, :, gat.heads)
@@ -319,6 +327,8 @@ GatedGraphConv(out_ch::Int, num_layers::Int; kwargs...) =
 
 @functor GatedGraphConv
 
+Flux.trainable(l::GatedGraphConv) = (l.weight, l.gru)
+
 message(ggc::GatedGraphConv, x_i, x_j::AbstractVector, e_ij) = x_j
 
 update(ggc::GatedGraphConv, m::AbstractVector, x) = m
@@ -376,6 +386,8 @@ EdgeConv(nn; kwargs...) = EdgeConv(NullGraph(), nn; kwargs...)
 
 @functor EdgeConv
 
+Flux.trainable(l::EdgeConv) = (l.nn,)
+
 message(ec::EdgeConv, x_i::AbstractVector, x_j::AbstractVector, e_ij) = ec.nn(vcat(x_i, x_j .- x_i))
 update(ec::EdgeConv, m::AbstractVector, x) = m
 
@@ -423,13 +435,13 @@ function GINConv(nn, eps::Real=0f0)
     GINConv(NullGraph(), nn, eps)
 end
 
+@functor GINConv
+
 Flux.trainable(g::GINConv) = (fg=g.fg, nn=g.nn)
 
 message(g::GINConv, x_i::AbstractVector, x_j::AbstractVector) = x_j 
 update(g::GINConv, m::AbstractVector, x) = g.nn((1 + g.eps) * x + m)
 
-@functor GINConv
-
 function (g::GINConv)(fg::FeaturedGraph, X::AbstractMatrix)
     gf = graph(fg)
     GraphSignals.check_num_nodes(gf, X)
@@ -474,6 +486,8 @@ end
 
 @functor CGConv
 
+Flux.trainable(l::CGConv) = (l.Wf, l.Ws, l.bf, l.bs)
+
 function CGConv(fg::G, dims::NTuple{2,Int};
                 init=glorot_uniform, bias=true, as_edge=false) where {G<:AbstractFeaturedGraph}
     node_dim, edge_dim = dims