|
4 | 4 | ###
|
5 | 5 |
|
6 | 6 | ###
|
7 |
| -# The following parameters can be modified when running inference or |
8 |
| -# when fine-tuning an existing Casanovo model. |
| 7 | +# The following parameters can be modified when running inference or when |
| 8 | +# fine-tuning an existing Casanovo model. |
9 | 9 | ###
|
10 | 10 |
|
11 |
| -# Max absolute difference allowed with respect to observed precursor m/z |
| 11 | +# Max absolute difference allowed with respect to observed precursor m/z. |
12 | 12 | # Predictions outside the tolerance range are assigned a negative peptide score.
|
13 | 13 | precursor_mass_tol: 50 # ppm
|
14 |
| -# Isotopes to consider when comparing predicted and observed precursor m/z's |
| 14 | +# Isotopes to consider when comparing predicted and observed precursor m/z's. |
15 | 15 | isotope_error_range: [0, 1]
|
16 |
| -# The minimum length of predicted peptides |
| 16 | +# The minimum length of predicted peptides. |
17 | 17 | min_peptide_len: 6
|
18 |
| -# Number of spectra in one inference batch |
| 18 | +# Number of spectra in one inference batch. |
19 | 19 | predict_batch_size: 1024
|
20 |
| -# Number of beams used in beam search |
| 20 | +# Number of beams used in beam search. |
21 | 21 | n_beams: 1
|
22 |
| -# Number of PSMs for each spectrum |
| 22 | +# Number of PSMs for each spectrum. |
23 | 23 | top_match: 1
|
24 | 24 | # The hardware accelerator to use. Must be one of:
|
25 |
| -# "cpu", "gpu", "tpu", "ipu", "hpu", "mps", or "auto" |
| 25 | +# "cpu", "gpu", "tpu", "ipu", "hpu", "mps", or "auto". |
26 | 26 | accelerator: "auto"
|
27 |
| -# The devices to use. Can be set to a positive number int, |
28 |
| -# or the value -1 to indicate all available devices should be used, |
29 |
| -# If left empty, the appropriate number will be automatically |
30 |
| -# selected for automatic selected on the chosen accelerator. |
| 27 | +# The devices to use. Can be set to a positive number int, or the value -1 to |
| 28 | +# indicate all available devices should be used. If left empty, the appropriate |
| 29 | +# number will be automatically selected for based on the chosen accelerator. |
31 | 30 | devices:
|
32 | 31 |
|
33 | 32 | ###
|
34 | 33 | # The following parameters should only be modified if you are training a new
|
35 | 34 | # Casanovo model from scratch.
|
36 | 35 | ###
|
37 | 36 |
|
38 |
| -# Random seed to ensure reproducible results |
| 37 | +# Random seed to ensure reproducible results. |
39 | 38 | random_seed: 454
|
40 | 39 |
|
41 | 40 | # OUTPUT OPTIONS
|
42 |
| -# Logging frequency in training steps |
| 41 | +# Logging frequency in training steps. |
43 | 42 | n_log: 1
|
44 |
| -# Tensorboard directory to use for keeping track of training metrics |
| 43 | +# Tensorboard directory to use for keeping track of training metrics. |
45 | 44 | tb_summarywriter:
|
46 |
| -# Save the top k model checkpoints during training. -1 saves all, and |
47 |
| -# leaving this field empty saves none. |
| 45 | +# Save the top k model checkpoints during training. -1 saves all, and leaving |
| 46 | +# this field empty saves none. |
48 | 47 | save_top_k: 5
|
49 |
| -# Path to saved checkpoints |
| 48 | +# Path to saved checkpoints. |
50 | 49 | model_save_folder_path: ""
|
51 |
| -# Model validation and checkpointing frequency in training steps |
| 50 | +# Model validation and checkpointing frequency in training steps. |
52 | 51 | val_check_interval: 50_000
|
53 | 52 |
|
54 | 53 | # SPECTRUM PROCESSING OPTIONS
|
55 |
| -# Number of the most intense peaks to retain, any remaining peaks are discarded |
| 54 | +# Number of the most intense peaks to retain, any remaining peaks are discarded. |
56 | 55 | n_peaks: 150
|
57 |
| -# Min peak m/z allowed, peaks with smaller m/z are discarded |
| 56 | +# Min peak m/z allowed, peaks with smaller m/z are discarded. |
58 | 57 | min_mz: 50.0
|
59 |
| -# Max peak m/z allowed, peaks with larger m/z are discarded |
| 58 | +# Max peak m/z allowed, peaks with larger m/z are discarded. |
60 | 59 | max_mz: 2500.0
|
61 |
| -# Min peak intensity allowed, less intense peaks are discarded |
| 60 | +# Min peak intensity allowed, less intense peaks are discarded. |
62 | 61 | min_intensity: 0.01
|
63 |
| -# Max absolute m/z difference allowed when removing the precursor peak |
| 62 | +# Max absolute m/z difference allowed when removing the precursor peak. |
64 | 63 | remove_precursor_tol: 2.0 # Da
|
65 |
| -# Max precursor charge allowed, spectra with larger charge are skipped |
| 64 | +# Max precursor charge allowed, spectra with larger charge are skipped. |
66 | 65 | max_charge: 10
|
67 | 66 |
|
68 | 67 | # MODEL ARCHITECTURE OPTIONS
|
69 |
| -# Dimensionality of latent representations, i.e. peak embeddings |
| 68 | +# Dimensionality of latent representations, i.e. peak embeddings. |
70 | 69 | dim_model: 512
|
71 |
| -# Number of attention heads |
| 70 | +# Number of attention heads. |
72 | 71 | n_head: 8
|
73 |
| -# Dimensionality of fully connected layers |
| 72 | +# Dimensionality of fully connected layers. |
74 | 73 | dim_feedforward: 1024
|
75 |
| -# Number of transformer layers in spectrum encoder and peptide decoder |
| 74 | +# Number of transformer layers in spectrum encoder and peptide decoder. |
76 | 75 | n_layers: 9
|
77 |
| -# Dropout rate for model weights |
| 76 | +# Dropout rate for model weights. |
78 | 77 | dropout: 0.0
|
79 |
| -# Number of dimensions to use for encoding peak intensity |
80 |
| -# Projected up to ``dim_model`` by default and summed with the peak m/z encoding |
| 78 | +# Number of dimensions to use for encoding peak intensity. |
| 79 | +# Projected up to `dim_model` by default and summed with the peak m/z encoding. |
81 | 80 | dim_intensity:
|
82 |
| -# Max decoded peptide length |
| 81 | +# Max decoded peptide length. |
83 | 82 | max_length: 100
|
84 |
| -# Number of warmup iterations for learning rate scheduler |
| 83 | +# The number of iterations for the linear warm-up of the learning rate. |
85 | 84 | warmup_iters: 100_000
|
86 |
| -# Max number of iterations for learning rate scheduler |
87 |
| -max_iters: 600_000 |
88 |
| -# Learning rate for weight updates during training |
| 85 | +# The number of iterations for the cosine half period of the learning rate. |
| 86 | +cosine_schedule_period_iters: 600_000 |
| 87 | +# Learning rate for weight updates during training. |
89 | 88 | learning_rate: 5e-4
|
90 |
| -# Regularization term for weight updates |
| 89 | +# Regularization term for weight updates. |
91 | 90 | weight_decay: 1e-5
|
92 |
| -# Amount of label smoothing when computing the training loss |
| 91 | +# Amount of label smoothing when computing the training loss. |
93 | 92 | train_label_smoothing: 0.01
|
94 | 93 |
|
95 | 94 | # TRAINING/INFERENCE OPTIONS
|
96 |
| -# Number of spectra in one training batch |
| 95 | +# Number of spectra in one training batch. |
97 | 96 | train_batch_size: 32
|
98 |
| -# Max number of training epochs |
| 97 | +# Max number of training epochs. |
99 | 98 | max_epochs: 30
|
100 |
| -# Number of validation steps to run before training begins |
| 99 | +# Number of validation steps to run before training begins. |
101 | 100 | num_sanity_val_steps: 0
|
102 |
| -# Calculate peptide and amino acid precision during training. this |
103 |
| -# is expensive, so we recommend against it. |
| 101 | +# Calculate peptide and amino acid precision during training. |
| 102 | +# This is expensive, so we recommend against it. |
104 | 103 | calculate_precision: False
|
105 | 104 |
|
106 | 105 | # AMINO ACID AND MODIFICATION VOCABULARY
|
|
0 commit comments