Skip to content

Commit d6c0a33

Browse files
committed
debug test and remove dependency pandas
1 parent 7b1e424 commit d6c0a33

23 files changed

+110
-806
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@
5656
| BFGNNMolecularPredictor | torch-scatter |
5757
| GRINMolecularPredictor | torch-scatter |
5858

59+
**For models that require `torch-scatter`: Install using the following command: `pip install torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html`, e.g.,
60+
61+
> `pip install torch-scatter -f https://data.pyg.org/whl/torch-2.7.1+cu128.html`
62+
63+
**For models that require `transformers`:** `pip install transformers`
64+
5965
## Usage
6066

6167
Refer to the `tests` folder for more use cases.

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ dependencies = [
1919
"torch>=2.2.0",
2020
"torch-geometric>=2.6.1",
2121
"numpy",
22-
"pandas>=2.2.3",
2322
"click",
2423
"huggingface_hub>=0.22.2",
2524
"joblib>=1.3.2",

tests/encoder/graphmae.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import numpy as np
2-
import pandas as pd
2+
import csv
33
import os
44
from torch_molecule import GraphMAEMolecularEncoder
55

6+
EPOCHS = 5
7+
68
def test_graphmae_encoder():
79
# Load molecules from CSV file
810
data_path = "data/molecule100.csv"
@@ -17,16 +19,20 @@ def test_graphmae_encoder():
1719
"CCN", # Ethylamine
1820
]
1921
else:
20-
df = pd.read_csv(data_path)
21-
molecules = df['smiles'].tolist()[:50] # Use first 50 molecules
22+
molecules = []
23+
with open(data_path, 'r') as file:
24+
csv_reader = csv.DictReader(file)
25+
for i, row in enumerate(csv_reader):
26+
if i >= 50: # Use first 50 molecules
27+
break
28+
molecules.append(row['smiles'])
2229
print(f"Loaded {len(molecules)} molecules from {data_path}")
23-
2430
# Initialize GraphMAE model
2531
model = GraphMAEMolecularEncoder(
2632
num_layer=3,
2733
hidden_size=128,
2834
batch_size=16,
29-
epochs=30, # Small number for testing
35+
epochs=EPOCHS, # Small number for testing
3036
mask_rate=0.15,
3137
verbose=True,
3238
# device="cpu"
@@ -79,16 +85,21 @@ def test_graphmae_with_edge_masking():
7985
"CCN", # Ethylamine
8086
]
8187
else:
82-
df = pd.read_csv(data_path)
83-
molecules = df['smiles'].tolist()[:50] # Use first 50 molecules
88+
molecules = []
89+
with open(data_path, 'r') as file:
90+
csv_reader = csv.DictReader(file)
91+
for i, row in enumerate(csv_reader):
92+
if i >= 50: # Use first 50 molecules
93+
break
94+
molecules.append(row['smiles'])
8495
print(f"Loaded {len(molecules)} molecules from {data_path}")
8596

8697
# Initialize GraphMAE model with edge masking enabled
8798
model = GraphMAEMolecularEncoder(
8899
num_layer=3,
89100
hidden_size=128,
90101
batch_size=16,
91-
epochs=30, # Small number for testing
102+
epochs=EPOCHS, # Small number for testing
92103
mask_rate=0.15,
93104
mask_edge=True, # Enable edge masking
94105
verbose=True,

tests/encoder/hfpretrained.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def test_hf_pretrained_encoder():
2424
{"repo_id": "DeepChem/ChemBERTa-10M-MTR", "model_name": "ChemBERTa_10M_MTR"},
2525
{"repo_id": "DeepChem/ChemBERTa-10M-MLM", "model_name": "ChemBERTa_10M_MLM"},
2626
{"repo_id": "DeepChem/ChemBERTa-5M-MLM", "model_name": "ChemBERTa_5M_MLM"},
27-
{"repo_id": "DeepChem/ChemBERTa-5M-MTR", "model_name": "ChemBERTa_5M_MTR"}
27+
{"repo_id": "DeepChem/ChemBERTa-5M-MTR", "model_name": "ChemBERTa_5M_MTR"},
2828
{"repo_id": "seyonec/ChemBERTa-zinc-base-v1", "model_name": "ChemBERTa_zinc_base_v1"},
2929
{"repo_id": "unikei/bert-base-smiles", "model_name": "bert-base-smiles"}
3030
]

tests/encoder/test_run_all.py

Lines changed: 0 additions & 181 deletions
This file was deleted.

tests/generator/digress.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from torch_molecule import DigressMolecularGenerator
77
from torch_molecule.utils.search import ParameterType, ParameterSpec
88

9-
EPOCHS = 10
9+
EPOCHS = 5
1010
BATCH_SIZE = 32
1111

1212
def test_digress_generator():
@@ -24,7 +24,6 @@ def test_digress_generator():
2424
model = DigressMolecularGenerator(
2525
hidden_size_X=256,
2626
hidden_size_E=128,
27-
hidden_size_y=128,
2827
num_layer=5,
2928
dropout=0.1,
3029
timesteps=500,

tests/generator/gdss.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import torch
66
from torch_molecule import GDSSMolecularGenerator
77

8-
EPOCHS = 500
8+
EPOCHS = 5
99
BATCH_SIZE = 16
1010

1111
def test_gdss_generator():

tests/generator/graphdit.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import torch
66
from torch_molecule import GraphDITMolecularGenerator
7-
from torch_molecule.utils.search import ParameterType, ParameterSpec
87

98
EPOCHS = 2
109
BATCH_SIZE = 32

tests/generator/jtvae.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
import numpy as np
3-
import pandas as pd
3+
import csv
44
from tqdm import tqdm
55

66
import torch
@@ -15,8 +15,12 @@ def test_jtvae_generator():
1515
"data", "polymer100.csv")
1616
print(f"Loading data from: {data_path}")
1717

18-
df = pd.read_csv(data_path)
19-
smiles_list = df['smiles'].tolist()
18+
# Read CSV without pandas
19+
smiles_list = []
20+
with open(data_path, 'r', encoding='utf-8') as csvfile:
21+
reader = csv.DictReader(csvfile)
22+
for row in reader:
23+
smiles_list.append(row['smiles'])
2024

2125
print(f"Loaded {len(smiles_list)} molecules")
2226
print(f"First 3 SMILES: {smiles_list[:3]}")

tests/generator/lstm.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import os
2+
import csv
23
import numpy as np
3-
import pandas as pd
44
from tqdm import tqdm
55

66
import torch
77
from torch_molecule.generator.lstm import LSTMMolecularGenerator
88

9-
EPOCHS = 1000 # Reduced for faster testing
9+
# EPOCHS = 1000 # Reduced for faster testing
10+
EPOCHS = 5
1011
BATCH_SIZE = 24
1112

1213
def test_lstm_generator():
@@ -15,12 +16,21 @@ def test_lstm_generator():
1516
"data", "polymer100.csv")
1617
print(f"Loading data from: {data_path}")
1718

18-
df = pd.read_csv(data_path)
19-
smiles_list = df['smiles'].tolist()
19+
# Read CSV without pandas
20+
smiles_list = []
21+
properties = []
22+
property_columns = []
2023

21-
# Extract property columns (all columns except 'smiles')
22-
property_columns = [col for col in df.columns if col != 'smiles']
23-
properties = df[property_columns].values.tolist()
24+
with open(data_path, 'r') as csvfile:
25+
reader = csv.DictReader(csvfile)
26+
# Get property columns (all columns except 'smiles')
27+
property_columns = [col for col in reader.fieldnames if col != 'smiles']
28+
29+
for row in reader:
30+
smiles_list.append(row['smiles'])
31+
# Extract property values for this row
32+
row_properties = [float(row[col]) for col in property_columns]
33+
properties.append(row_properties)
2434

2535
print(f"Loaded {len(smiles_list)} molecules with {len(property_columns)} properties")
2636
print(f"Property columns: {property_columns}")

0 commit comments

Comments
 (0)