liugangcode
diff --git a/‎docs/source/api/generator.rst
Lines changed: 8 additions & 1 deletion b/‎docs/source/api/generator.rst
Lines changed: 8 additions & 1 deletion
diff --git a/‎docs/source/overview.rst
Lines changed: 2 additions & 2 deletions b/‎docs/source/overview.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/generator/run_gdss.py
Lines changed: 75 additions & 0 deletions b/‎tests/generator/run_gdss.py
Lines changed: 75 additions & 0 deletions
diff --git a/‎torch_molecule/__init__.py
Lines changed: 2 additions & 0 deletions b/‎torch_molecule/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎torch_molecule/generator/digress/modeling_digress.py
Lines changed: 2 additions & 2 deletions b/‎torch_molecule/generator/digress/modeling_digress.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎torch_molecule/generator/gdss/__init__.py
Lines changed: 3 additions & 0 deletions b/‎torch_molecule/generator/gdss/__init__.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎torch_molecule/generator/gdss/layers.py
Lines changed: 257 additions & 0 deletions b/‎torch_molecule/generator/gdss/layers.py
Lines changed: 257 additions & 0 deletions
@@ -44,12 +44,19 @@ Modeling Molecules as Graphs with GNN / Transformer-based Generators
    :undoc-members:
    :show-inheritance:
 
+.. rubric:: GDSS for score-based molecular generation
+.. autoclass:: torch_molecule.generator.gdss.modeling_gdss.GDSSMolecularGenerator
+   :exclude-members: fitting_epoch, fitting_loss, save_to_hf, load_from_hf
+   :members: fit, generate
+   :undoc-members:
+   :show-inheritance:
+
 Modeling Molecules as Graphs with Heuristic-based Generators
 ------------------------------------------------------------
 
 .. rubric:: Graph Genetic Algorithm for Un/Multi-conditional Molecular Generation
 .. autoclass:: torch_molecule.generator.graph_ga.modeling_graph_ga.GraphGAMolecularGenerator
-   :exclude-members: fitting_epoch, fitting_loss, push_to_huggingface, load_from_huggingface
+   :exclude-members: fitting_epoch, fitting_loss, save_to_hf, load_from_hf
    :members: fit, generate
    :undoc-members:
    :show-inheritance:
 
@@ -7,8 +7,8 @@ Model Persistence
 ^^^^^^^^^^^^^^^^^
 - ``load_from_local``: Load a saved model from a local file
 - ``save_to_local``: Save the current model to a local file
-- ``load_from_huggingface``: Load a model from a Hugging Face repository
-- ``push_to_huggingface``: Push the current model to a Hugging Face repository
+- ``load_from_hf``: Load a model from a Hugging Face repository
+- ``save_to_hf``: Push the current model to a Hugging Face repository
 - ``load``: Load a model from either local storage or Hugging Face
 - ``save``: Save the model to either local storage or Hugging Face
 
 
@@ -0,0 +1,75 @@
+import os
+import numpy as np
+from tqdm import tqdm
+
+import torch
+from torch_molecule import GDSSMolecularGenerator
+
+EPOCHS = 500
+BATCH_SIZE = 16
+
+def test_gdss_generator():
+    # Test data
+    smiles_list = [
+        'CNC[C@H]1OCc2cnnn2CCCC(=O)N([C@H](C)CO)C[C@@H]1C',
+        'CNC[C@@H]1OCc2cnnn2CCCC(=O)N([C@H](C)CO)C[C@H]1C',
+        'C[C@H]1CN([C@@H](C)CO)C(=O)CCCn2cc(nn2)CO[C@@H]1CN(C)C(=O)CCC(F)(F)F',
+        'CC1=CC=C(C=C1)C2=CC(=NN2C3=CC=C(C=C3)S(=O)(=O)N)C(F)(F)F'
+    ]
+    smiles_list = smiles_list * 25  # Create 100 molecules for training
+
+    # 1. Basic initialization test
+    print("\n=== Testing GDSS model initialization ===")
+    model = GDSSMolecularGenerator(
+        num_layer=3,
+        hidden_size_adj=8,
+        hidden_size=16,
+        attention_dim=16,
+        num_head=4,
+        batch_size=BATCH_SIZE,
+        epochs=EPOCHS,
+        learning_rate=0.005,
+        verbose=True
+    )
+    print("GDSS Model initialized successfully")
+
+    # 2. Basic fitting test
+    print("\n=== Testing GDSS model fitting ===")
+    model.fit(smiles_list)
+    print("GDSS Model fitting completed")
+
+    # 3. Generation test
+    print("\n=== Testing GDSS model generation ===")
+    generated_smiles = model.generate(batch_size=BATCH_SIZE)
+    print(f"Generated {len(generated_smiles)} molecules")
+    print("Example generated SMILES:", generated_smiles[:5])
+
+    # 4. Model saving and loading test
+    print("\n=== Testing GDSS model saving and loading ===")
+    save_path = "gdss_test_model.pt"
+    model.save_to_local(save_path)
+    print(f"GDSS Model saved to {save_path}")
+
+    new_model = GDSSMolecularGenerator()
+    new_model.load_from_local(save_path)
+    print("GDSS Model loaded successfully")
+
+    # Test generation with loaded model
+    generated_smiles = new_model.generate(batch_size=BATCH_SIZE)
+    print(f"Generated molecules with loaded model: {len(generated_smiles)}")
+    print("Example generated SMILES:", generated_smiles[:5])
+
+    # 5. Test generation with specific number of nodes
+    print("\n=== Testing generation with specific node counts ===")
+    num_nodes = np.array([[20], [25], [30], [35]])  # Specify different node counts
+    generated_smiles = model.generate(num_nodes=num_nodes)
+    print(f"Generated molecules with specific node counts: {len(generated_smiles)}")
+    print("Example generated SMILES:", generated_smiles)
+
+    # Clean up
+    if os.path.exists(save_path):
+        os.remove(save_path)
+        print(f"Cleaned up {save_path}")
+
+if __name__ == "__main__":
+    test_gdss_generator()
@@ -29,6 +29,7 @@
 from .generator.graph_ga import GraphGAMolecularGenerator
 from .generator.digress import DigressMolecularGenerator
 from .generator.molgpt import MolGPTMolecularGenerator
+from .generator.gdss import GDSSMolecularGenerator
 
 __all__ = [
     # 'BaseMolecularPredictor',
@@ -55,4 +56,5 @@
     'GraphGAMolecularGenerator',
     'DigressMolecularGenerator',
     'MolGPTMolecularGenerator',
+    'GDSSMolecularGenerator',
 ]
@@ -274,9 +274,9 @@ def fit(self, X_train: List[str]) -> "DigressMolecularGenerator":
         self.fitting_epoch = 0
         for epoch in range(self.epochs):
             train_losses = self._train_epoch(train_loader, optimizer, epoch)
-            self.fitting_loss.append(np.mean(train_losses))
+            self.fitting_loss.append(np.mean(train_losses).item())
             if scheduler:
-                scheduler.step(np.mean(train_losses))
+                scheduler.step(np.mean(train_losses).item())
 
         self.fitting_epoch = epoch
         self.is_fitted_ = True
 
@@ -0,0 +1,3 @@
+from .modeling_gdss import GDSSMolecularGenerator
+
+__all__ = ['GDSSMolecularGenerator']
@@ -0,0 +1,257 @@
+import math
+import torch
+from torch.nn import Parameter
+import torch.nn.functional as F
+from typing import Any
+from .utils import mask_adjs, mask_x
+
+def glorot(tensor):
+    if tensor is not None:
+        stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1)))
+        tensor.data.uniform_(-stdv, stdv)
+
+def zeros(tensor):
+    if tensor is not None:
+        tensor.data.fill_(0)
+
+def reset(value: Any):
+    if hasattr(value, 'reset_parameters'):
+        value.reset_parameters()
+    else:
+        for child in value.children() if hasattr(value, 'children') else []:
+            reset(child)
+
+# -------- GCN layer --------
+class DenseGCNConv(torch.nn.Module):
+    r"""See :class:`torch_geometric.nn.conv.GCNConv`.
+    """
+    def __init__(self, in_channels, out_channels, improved=False, bias=True):
+        super(DenseGCNConv, self).__init__()
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.improved = improved
+
+        self.weight = Parameter(torch.Tensor(self.in_channels, out_channels))
+
+        if bias:
+            self.bias = Parameter(torch.Tensor(out_channels))
+        else:
+            self.register_parameter('bias', None)
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        glorot(self.weight)
+        zeros(self.bias)
+
+
+    def forward(self, x, adj, mask=None, add_loop=True):
+        r"""
+        Args:
+            x (Tensor): Node feature tensor :math:`\mathbf{X} \in \mathbb{R}^{B
+                \times N \times F}`, with batch-size :math:`B`, (maximum)
+                number of nodes :math:`N` for each graph, and feature
+                dimension :math:`F`.
+            adj (Tensor): Adjacency tensor :math:`\mathbf{A} \in \mathbb{R}^{B
+                \times N \times N}`. The adjacency tensor is broadcastable in
+                the batch dimension, resulting in a shared adjacency matrix for
+                the complete batch.
+            mask (BoolTensor, optional): Mask matrix
+                :math:`\mathbf{M} \in {\{ 0, 1 \}}^{B \times N}` indicating
+                the valid nodes for each graph. (default: :obj:`None`)
+            add_loop (bool, optional): If set to :obj:`False`, the layer will
+                not automatically add self-loops to the adjacency matrices.
+                (default: :obj:`True`)
+        """
+        x = x.unsqueeze(0) if x.dim() == 2 else x
+        adj = adj.unsqueeze(0) if adj.dim() == 2 else adj
+        B, N, _ = adj.size()
+
+        if add_loop:
+            adj = adj.clone()
+            idx = torch.arange(N, dtype=torch.long, device=adj.device)
+            adj[:, idx, idx] = 1 if not self.improved else 2
+
+        out = torch.matmul(x, self.weight)
+        deg_inv_sqrt = adj.sum(dim=-1).clamp(min=1).pow(-0.5)
+
+        adj = deg_inv_sqrt.unsqueeze(-1) * adj * deg_inv_sqrt.unsqueeze(-2)
+        out = torch.matmul(adj, out)
+
+        if self.bias is not None:
+            out = out + self.bias
+
+        if mask is not None:
+            out = out * mask.view(B, N, 1).to(x.dtype)
+
+        return out
+
+
+    def __repr__(self):
+        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
+                                   self.out_channels)
+
+# -------- MLP layer --------
+class MLP(torch.nn.Module):
+    def __init__(self, num_layers, input_dim, hidden_dim, output_dim, use_bn=False, activate_func=F.relu):
+        """
+            num_layers: number of layers in the neural networks (EXCLUDING the input layer). If num_layers=1, this reduces to linear model.
+            input_dim: dimensionality of input features
+            hidden_dim: dimensionality of hidden units at ALL layers
+            output_dim: number of classes for prediction
+            num_classes: the number of classes of input, to be treated with different gains and biases,
+                    (see the definition of class `ConditionalLayer1d`)
+        """
+
+        super(MLP, self).__init__()
+
+        self.linear_or_not = True  # default is linear model
+        self.num_layers = num_layers
+        self.use_bn = use_bn
+        self.activate_func = activate_func
+
+        if num_layers < 1:
+            raise ValueError("number of layers should be positive!")
+        elif num_layers == 1:
+            # Linear model
+            self.linear = torch.nn.Linear(input_dim, output_dim)
+        else:
+            # Multi-layer model
+            self.linear_or_not = False
+            self.linears = torch.nn.ModuleList()
+
+            self.linears.append(torch.nn.Linear(input_dim, hidden_dim))
+            for layer in range(num_layers - 2):
+                self.linears.append(torch.nn.Linear(hidden_dim, hidden_dim))
+            self.linears.append(torch.nn.Linear(hidden_dim, output_dim))
+
+            if self.use_bn:
+                self.batch_norms = torch.nn.ModuleList()
+                for layer in range(num_layers - 1):
+                    self.batch_norms.append(torch.nn.BatchNorm1d(hidden_dim))
+
+
+    def forward(self, x):
+        """
+        :param x: [num_classes * batch_size, N, F_i], batch of node features
+            note that in self.cond_layers[layer],
+            `x` is splited into `num_classes` groups in dim=0,
+            and then treated with different gains and biases
+        """
+        if self.linear_or_not:
+            # If linear model
+            return self.linear(x)
+        else:
+            # If MLP
+            h = x
+            for layer in range(self.num_layers - 1):
+                h = self.linears[layer](h)
+                if self.use_bn:
+                    h = self.batch_norms[layer](h)
+                h = self.activate_func(h)
+            return self.linears[self.num_layers - 1](h)
+
+
+# -------- Graph Multi-Head Attention (GMH) --------
+# -------- From Baek et al. (2021) --------
+class Attention(torch.nn.Module):
+    def __init__(self, in_dim, attn_dim, out_dim, num_heads=4, conv='GCN'):
+        super(Attention, self).__init__()
+        self.num_heads = num_heads
+        self.attn_dim = attn_dim
+        self.out_dim = out_dim
+        self.conv = conv
+
+        self.gnn_q, self.gnn_k, self.gnn_v = self.get_gnn(in_dim, attn_dim, out_dim, conv)
+        self.activation = torch.tanh 
+        self.softmax_dim = 2
+
+    def forward(self, x, adj, flags, attention_mask=None):
+        if self.conv == 'GCN':
+            Q = self.gnn_q(x, adj) 
+            K = self.gnn_k(x, adj) 
+        else:
+            Q = self.gnn_q(x) 
+            K = self.gnn_k(x)
+
+        V = self.gnn_v(x, adj) 
+        dim_split = self.attn_dim // self.num_heads
+        Q_ = torch.cat(Q.split(dim_split, 2), 0)
+        K_ = torch.cat(K.split(dim_split, 2), 0)
+
+        if attention_mask is not None:
+            attention_mask = torch.cat([attention_mask for _ in range(self.num_heads)], 0)
+            attention_score = Q_.bmm(K_.transpose(1,2))/math.sqrt(self.out_dim)
+            A = self.activation( attention_mask + attention_score )
+        else:
+            A = self.activation( Q_.bmm(K_.transpose(1,2))/math.sqrt(self.out_dim) ) # (B x num_heads) x N x N
+        
+        # -------- (B x num_heads) x N x N --------
+        A = A.view(-1, *adj.shape)
+        A = A.mean(dim=0)
+        A = (A + A.transpose(-1,-2))/2 
+
+        return V, A 
+
+    def get_gnn(self, in_dim, attn_dim, out_dim, conv='GCN'):
+
+        if conv == 'GCN':
+            gnn_q = DenseGCNConv(in_dim, attn_dim)
+            gnn_k = DenseGCNConv(in_dim, attn_dim)
+            gnn_v = DenseGCNConv(in_dim, out_dim)
+
+            return gnn_q, gnn_k, gnn_v
+
+        elif conv == 'MLP':
+            num_layers=2
+            gnn_q = MLP(num_layers, in_dim, 2*attn_dim, attn_dim, activate_func=torch.tanh)
+            gnn_k = MLP(num_layers, in_dim, 2*attn_dim, attn_dim, activate_func=torch.tanh)
+            gnn_v = DenseGCNConv(in_dim, out_dim)
+
+            return gnn_q, gnn_k, gnn_v
+
+        else:
+            raise NotImplementedError(f'{conv} not implemented.')
+
+
+# -------- Layer of ScoreNetworkA --------
+class AttentionLayer(torch.nn.Module):
+    def __init__(self, num_linears, conv_input_dim, attn_dim, conv_output_dim, input_dim, output_dim, 
+                    num_heads=4, conv='GCN'):
+        super(AttentionLayer, self).__init__()
+        self.attn = torch.nn.ModuleList()
+        for _ in range(input_dim):
+            self.attn_dim = attn_dim 
+            self.attn.append(Attention(conv_input_dim, self.attn_dim, conv_output_dim,
+                                        num_heads=num_heads, conv=conv))
+
+        self.hidden_dim = 2*max(input_dim, output_dim)
+        self.mlp = MLP(num_linears, 2*input_dim, self.hidden_dim, output_dim, use_bn=False, activate_func=F.elu)
+        self.multi_channel = MLP(2, input_dim*conv_output_dim, self.hidden_dim, conv_output_dim, 
+                                    use_bn=False, activate_func=F.elu)
+
+    def forward(self, x, adj, flags):
+        """
+
+        :param x:  B x N x F_i
+        :param adj: B x C_i x N x N
+        :return: x_out: B x N x F_o, adj_out: B x C_o x N x N
+        """
+        mask_list = []
+        x_list = []
+        for _ in range(len(self.attn)):
+            _x, mask = self.attn[_](x, adj[:,_,:,:], flags)
+            mask_list.append(mask.unsqueeze(-1))
+            x_list.append(_x)
+        x_out = mask_x(self.multi_channel(torch.cat(x_list, dim=-1)), flags)
+        x_out = torch.tanh(x_out)
+
+        mlp_in = torch.cat([torch.cat(mask_list, dim=-1), adj.permute(0,2,3,1)], dim=-1)
+        shape = mlp_in.shape
+        mlp_out = self.mlp(mlp_in.view(-1, shape[-1]))
+        _adj = mlp_out.view(shape[0], shape[1], shape[2], -1).permute(0,3,1,2)
+        _adj = _adj + _adj.transpose(-1,-2)
+        adj_out = mask_adjs(_adj, flags)
+
+        return x_out, adj_out
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .modeling_gdss import GDSSMolecularGenerator`
	`2`	`+`
	`3`	`+__all__ = ['GDSSMolecularGenerator']`