|
1 | 1 | """The command line entry point for casanovo"""
|
| 2 | +from email.policy import default |
2 | 3 | import click, logging
|
3 |
| -from casanovo.denovo import train, test_evaluate, test_denovo |
4 |
| - |
| 4 | +import yaml |
| 5 | +import os |
| 6 | +from casanovo.denovo import train, evaluate, denovo |
5 | 7 |
|
| 8 | +#Required options |
6 | 9 | @click.command()
|
7 |
| -@click.option("--mode", default='eval', help="Choose to train a model or test denovo predictions") |
8 |
| -@click.option("--model_path", help="Specify path to pre-trained model weights for testing or to continue to train") |
9 |
| -@click.option("--train_data_path", help="Specify path to mgf files to be used as training data") |
10 |
| -@click.option("--val_data_path", help="Specify path to mgf files to be used as validation data") |
11 |
| -@click.option("--test_data_path", help="Specify path to mgf files to be used as test data") |
12 |
| -@click.option("--config_path", help="Specify path to config file which includes data and model related options") |
13 |
| -@click.option("--output_path", help="Specify path to output de novo sequences") |
| 10 | +@click.option("--mode", required=True, default='eval', help="Choose on a high level what the program will do. \"train\" will train a model from scratch or continue training a pre-trained model. \"eval\" will evaluate de novo sequencing performance of a pre-trained model (peptide annotations are needed for spectra). \"denovo\" will run de novo sequencing without evaluation (specificy directory path for output csv file with de novo sequences).", type=click.Choice(['train', 'eval', 'denovo'])) |
| 11 | +@click.option("--model_path", required=True, help="Specify path to pre-trained model weights (.ckpt file) for testing or to continue to train.", type=click.Path(exists=True, dir_okay=False, file_okay=True)) |
| 12 | +#Base options |
| 13 | +@click.option("--train_data_path", help="Specify path to .mgf files to be used as training data", type=click.Path(exists=True, dir_okay=True, file_okay=False)) |
| 14 | +@click.option("--val_data_path", help="Specify path to .mgf files to be used as validation data", type=click.Path(exists=True, dir_okay=True, file_okay=False)) |
| 15 | +@click.option("--test_data_path", help="Specify path to .mgf files to be used as test data", type=click.Path(exists=True, dir_okay=True, file_okay=False)) |
| 16 | +@click.option("--config_path", help="Specify path to custom config file which includes data and model related options. If not included, the default config.yaml will be used.", type=click.Path(exists=True, dir_okay=False, file_okay=True)) |
| 17 | +@click.option("--output_path", help="Specify path to output de novo sequences. Output format is .csv", type=click.Path(exists=True, dir_okay=True, file_okay=False)) |
| 18 | +#De Novo sequencing options |
| 19 | +@click.option("--preprocess_spec", default=None, help="True if spectra data should be preprocessed, False if using preprocessed data.", type=click.BOOL) |
| 20 | +@click.option("--num_workers", default=None, help="Number of workers to use for spectra reading.", type=click.INT) |
| 21 | +@click.option("--gpus", default=(), help="Specify gpus for usage. For multiple gpus, use format: --gpus=0 --gpus=1 --gpus=2... etc. etc.", type=click.INT, multiple=True) |
14 | 22 |
|
15 | 23 | def main(
|
| 24 | + #Req + base vars |
16 | 25 | mode,
|
17 | 26 | model_path,
|
18 | 27 | train_data_path,
|
19 | 28 | val_data_path,
|
20 | 29 | test_data_path,
|
21 | 30 | config_path,
|
22 |
| - output_path |
| 31 | + output_path, |
| 32 | + #De Novo vars |
| 33 | + preprocess_spec, |
| 34 | + num_workers, |
| 35 | + gpus |
23 | 36 | ):
|
24 |
| - """The command line function""" |
| 37 | + """ |
| 38 | + The command line function for casanovo. De Novo Mass Spectrometry Peptide Sequencing with a Transformer Model. |
| 39 | + |
| 40 | + \b |
| 41 | + Training option requirements: |
| 42 | + mode, model_path, train_data_path, val_data_path, config_path |
| 43 | + |
| 44 | + \b |
| 45 | + Evaluation option requirements: |
| 46 | + mode, model_path, test_data_path, config_path |
| 47 | + |
| 48 | + \b |
| 49 | + De Novo option requirements: |
| 50 | + mode, model_path, test_data_path, config_path, output_path |
| 51 | + """ |
25 | 52 | logging.basicConfig(
|
26 | 53 | level=logging.INFO,
|
27 | 54 | format="%(levelname)s: %(message)s",
|
28 | 55 | )
|
| 56 | + if config_path == None: |
| 57 | + abs_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.yaml') |
| 58 | + with open(abs_path) as f: |
| 59 | + config = yaml.safe_load(f) |
| 60 | + else: |
| 61 | + with open(config_path) as f: |
| 62 | + config = yaml.safe_load(f) |
| 63 | + |
| 64 | + if(preprocess_spec != None): |
| 65 | + config['preprocess_spec'] = preprocess_spec |
| 66 | + if(num_workers != None): |
| 67 | + config['num_workers'] = num_workers |
| 68 | + if(gpus != ()): |
| 69 | + config['gpus'] = gpus |
29 | 70 | if mode == 'train':
|
30 |
| - |
| 71 | + |
31 | 72 | logging.info('Training Casanovo...')
|
32 |
| - train(train_data_path, val_data_path, model_path, config_path) |
| 73 | + train(train_data_path, val_data_path, model_path, config) |
33 | 74 |
|
34 | 75 | elif mode == 'eval':
|
35 | 76 |
|
36 | 77 | logging.info('Evaluating Casanovo...')
|
37 |
| - test_evaluate(test_data_path, model_path, config_path) |
| 78 | + evaluate(test_data_path, model_path, config) |
38 | 79 |
|
39 | 80 | elif mode == 'denovo':
|
40 | 81 |
|
41 | 82 | logging.info('De novo sequencing with Casanovo...')
|
42 |
| - test_denovo(test_data_path, model_path, config_path, output_path) |
| 83 | + denovo(test_data_path, model_path, config, output_path) |
43 | 84 |
|
44 | 85 | pass
|
45 | 86 |
|
|
0 commit comments