Skip to content

Commit b7f8ff9

Browse files
bittremieuxmelihyilmazgithub-actions[bot]
authored
Prepare for release v4.1.0 (#296)
* Remove `train_from_scratch` config option (#275) Instead of having to specify `train_from_scratch` in the config file, training will proceed from an existing model weights file if this is given as an argument to `casanovo train`. Fixes #263. * Stabilize torch.topk() behavior (#290) * Add epsilon to index zero * Fix typo * Use base PyTorch for repeating along the vocabulary size * Combine masking steps * Lint with updated black version * Lint test files * Add topk unit test * Fix lint * Add fixme comment for future * Update changelog * Generate new screengrabs with rich-codex --------- Co-authored-by: Wout Bittremieux <wout@bittremieux.be> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> * Update changelog --------- Co-authored-by: Melih Yilmaz <32707537+melihyilmaz@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent dae8392 commit b7f8ff9

File tree

11 files changed

+113
-198
lines changed

11 files changed

+113
-198
lines changed

CHANGELOG.md

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,22 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66

77
## [Unreleased]
88

9+
## [4.1.0] - 2024-02-16
10+
11+
### Changed
12+
13+
- Instead of having to specify `train_from_scratch` in the config file, training will proceed from an existing model weights file if this is given as an argument to `casanovo train`.
14+
15+
### Fixed
16+
17+
- Fixed beam search decoding error due to non-deterministic selection of beams with equal scores.
18+
19+
## [4.0.1] - 2023-12-25
20+
21+
### Fixed
22+
23+
- Fix automatic PyPI upload.
24+
925
## [4.0.0] - 2023-12-22
1026

1127
### Added
@@ -217,7 +233,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
217233

218234
- Initial Casanovo version.
219235

220-
[Unreleased]: https://github.com/Noble-Lab/casanovo/compare/v4.0.0...HEAD
236+
[Unreleased]: https://github.com/Noble-Lab/casanovo/compare/v4.1.0...HEAD
237+
[4.1.0]: https://github.com/Noble-Lab/casanovo/compare/v4.0.1...v4.1.0
238+
[4.0.1]: https://github.com/Noble-Lab/casanovo/compare/v4.0.0...v4.0.1
221239
[4.0.0]: https://github.com/Noble-Lab/casanovo/compare/v3.5.0...v4.0.0
222240
[3.5.0]: https://github.com/Noble-Lab/casanovo/compare/v3.4.0...v3.5.0
223241
[3.4.0]: https://github.com/Noble-Lab/casanovo/compare/v3.3.0...v3.4.0

casanovo/config.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ class Config:
6565
top_match=int,
6666
max_epochs=int,
6767
num_sanity_val_steps=int,
68-
train_from_scratch=bool,
6968
save_top_k=int,
7069
model_save_folder_path=str,
7170
val_check_interval=int,

casanovo/config.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,6 @@ train_batch_size: 32
9999
max_epochs: 30
100100
# Number of validation steps to run before training begins
101101
num_sanity_val_steps: 0
102-
# Set to "False" to further train a pre-trained Casanovo model
103-
train_from_scratch: True
104102
# Calculate peptide and amino acid precision during training. this
105103
# is expensive, so we recommend against it.
106104
calculate_precision: False

casanovo/denovo/model.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -607,21 +607,17 @@ def _get_topk_beams(
607607
scores[:, step, :, :], "B V S -> B (V S)"
608608
)
609609

610-
# Mask out terminated beams. Include precursor m/z tolerance induced
611-
# termination.
612-
# TODO: `clone()` is necessary to get the correct output with n_beams=1.
613-
# An alternative implementation using base PyTorch instead of einops
614-
# might be more efficient.
615-
finished_mask = einops.repeat(
616-
finished_beams, "(B S) -> B (V S)", S=beam, V=vocab
617-
).clone()
610+
# Find all still active beams by masking out terminated beams.
611+
active_mask = (
612+
~finished_beams.reshape(batch, beam).repeat(1, vocab)
613+
).float()
618614
# Mask out the index '0', i.e. padding token, by default.
619-
finished_mask[:, :beam] = True
615+
# FIXME: Set this to a very small, yet non-zero value, to only
616+
# get padding after stop token.
617+
active_mask[:, :beam] = 1e-8
620618

621619
# Figure out the top K decodings.
622-
_, top_idx = torch.topk(
623-
step_scores.nanmean(dim=1) * (~finished_mask).float(), beam
624-
)
620+
_, top_idx = torch.topk(step_scores.nanmean(dim=1) * active_mask, beam)
625621
v_idx, s_idx = np.unravel_index(top_idx.cpu(), (vocab, beam))
626622
s_idx = einops.rearrange(s_idx, "B S -> (B S)")
627623
b_idx = einops.repeat(torch.arange(batch), "B -> (B S)", S=beam)

casanovo/denovo/model_runner.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -252,16 +252,16 @@ def initialize_model(self, train: bool) -> None:
252252
calculate_precision=self.config.calculate_precision,
253253
)
254254

255-
from_scratch = (
256-
self.config.train_from_scratch,
257-
self.model_filename is None,
258-
)
259-
if train and any(from_scratch):
260-
self.model = Spec2Pep(**model_params)
261-
return
262-
elif self.model_filename is None:
263-
logger.error("A model file must be provided")
264-
raise ValueError("A model file must be provided")
255+
if self.model_filename is None:
256+
# Train a model from scratch if no model file is provided.
257+
if train:
258+
self.model = Spec2Pep(**model_params)
259+
return
260+
# Else we're not training, so a model file must be provided.
261+
else:
262+
logger.error("A model file must be provided")
263+
raise ValueError("A model file must be provided")
264+
# Else a model file is provided (to continue training or for inference).
265265

266266
if not Path(self.model_filename).exists():
267267
logger.error(

casanovo/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Small utility functions."""
1+
"""Small utility functions"""
22

33
import logging
44
import os

docs/images/help.svg

Lines changed: 11 additions & 153 deletions
Loading

tests/conftest.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,6 @@ def tiny_config(tmp_path):
223223
"weight_decay": 1e-5,
224224
"train_batch_size": 32,
225225
"num_sanity_val_steps": 0,
226-
"train_from_scratch": True,
227226
"calculate_precision": False,
228227
"residues": {
229228
"G": 57.021464,

tests/unit_tests/test_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Test configuration loading."""
1+
"""Test configuration loading"""
22

33
import pytest
44
import yaml

tests/unit_tests/test_runner.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,35 +7,39 @@
77
from casanovo.denovo.model_runner import ModelRunner
88

99

10-
def test_initialize_model(tmp_path):
11-
"""Test that"""
10+
def test_initialize_model(tmp_path, mgf_small):
11+
"""Test initializing a new or existing model."""
1212
config = Config()
13-
config.train_from_scratch = False
13+
# No model filename given, so train from scratch.
1414
ModelRunner(config=config).initialize_model(train=True)
1515

16+
# No model filename given during inference = error.
1617
with pytest.raises(ValueError):
1718
ModelRunner(config=config).initialize_model(train=False)
1819

19-
with pytest.raises(FileNotFoundError):
20-
runner = ModelRunner(config=config, model_filename="blah")
21-
runner.initialize_model(train=True)
22-
20+
# Non-existing model filename given during inference = error.
2321
with pytest.raises(FileNotFoundError):
2422
runner = ModelRunner(config=config, model_filename="blah")
2523
runner.initialize_model(train=False)
2624

27-
# This should work now:
28-
config.train_from_scratch = True
29-
runner = ModelRunner(config=config, model_filename="blah")
25+
# Train a quick model.
26+
config.max_epochs = 1
27+
config.n_layers = 1
28+
ckpt = tmp_path / "existing.ckpt"
29+
with ModelRunner(config=config) as runner:
30+
runner.train([mgf_small], [mgf_small])
31+
runner.trainer.save_checkpoint(ckpt)
32+
33+
# Resume training from previous model.
34+
runner = ModelRunner(config=config, model_filename=str(ckpt))
3035
runner.initialize_model(train=True)
3136

32-
# But this should still fail:
33-
with pytest.raises(FileNotFoundError):
34-
runner = ModelRunner(config=config, model_filename="blah")
35-
runner.initialize_model(train=False)
37+
# Inference with previous model.
38+
runner = ModelRunner(config=config, model_filename=str(ckpt))
39+
runner.initialize_model(train=False)
3640

3741
# If the model initialization throws and EOFError, then the Spec2Pep model
38-
# has tried to load the weights:
42+
# has tried to load the weights.
3943
weights = tmp_path / "blah"
4044
weights.touch()
4145
with pytest.raises(EOFError):
@@ -44,7 +48,7 @@ def test_initialize_model(tmp_path):
4448

4549

4650
def test_save_and_load_weights(tmp_path, mgf_small, tiny_config):
47-
"""Test saving aloading weights"""
51+
"""Test saving and loading weights"""
4852
config = Config(tiny_config)
4953
config.max_epochs = 1
5054
config.n_layers = 1

0 commit comments

Comments
 (0)