|
7 | 7 | This is a module intended to be used as a part of a pipeline.
|
8 | 8 |
|
9 | 9 | This can be used individually by calling the command:
|
10 |
| - python /path/to/fundis_haplotype_phaser.py -i /path/to/input_dir -p 80 |
11 |
| - python /path/to/fundis_haplotype_phaser.py --input_dir /path/to/input_dir --percent_system_use 80 |
| 10 | + python /path/to/fundis_haplotype_phaser.py -i /path/to/input_dir -p 50 |
| 11 | + python /path/to/fundis_haplotype_phaser.py --input /path/to/input_dir --percent_system_use 50 |
12 | 12 | """
|
13 | 13 |
|
14 | 14 | import os
|
@@ -181,38 +181,46 @@ def analyze_ngspeciesid_folder(ngspeciesid_dir):
|
181 | 181 | return True
|
182 | 182 |
|
183 | 183 | # Main entry point of the script
|
184 |
| -def main(args): |
185 |
| - # Define the main directory based on the argument or use a default value |
186 |
| - input_dir = args.input_dir if args.input_dir else '/mnt/e/Fundis/NGSpeciesID-20230719T211158Z-001/NGSpeciesID' |
187 |
| - percent_system_use = float(args.percent_system_use)/100 if args.percent_system_use else 0.8 |
188 |
| - |
189 |
| - # Get the number of CPUs and calculate the number of threads |
190 |
| - num_cpus = multiprocessing.cpu_count() |
191 |
| - cpu_threads = int(math.floor(num_cpus * percent_system_use)) |
192 |
| - |
193 |
| - # Get a list of all folders in the main directory, excluding folders named '__Summary__' or '_Summary' |
194 |
| - folder_list = [f.path for f in os.scandir(input_dir) if f.is_dir() and '__Summary__' not in f.path and '_Summary' not in f.path] |
195 |
| - |
| 184 | +def haplotype_phaser(args): |
196 | 185 | try:
|
197 |
| - # Initialize a multiprocessing pool and process all folders |
198 |
| - with multiprocessing.Pool(cpu_threads) as pool: |
199 |
| - pool.map(analyze_ngspeciesid_folder, folder_list) |
200 |
| - except queue.Empty: |
201 |
| - print("Queue is empty. All Pipeline tasks have been processed.") |
202 |
| - finally: |
203 |
| - print('PASS: Haplotypes phased for all RiC >=9 conesensus fasta for each sample in NGSPeciesID input_dir\n') |
| 186 | + # Global environment_dir |
| 187 | + environment_dir = "" |
| 188 | + environment_cmd_prefix = "" |
| 189 | + environment_dir = check_os() |
| 190 | + main_working_dir = os.getcwd() |
| 191 | + |
| 192 | + # Define the main directory based on the argument or use a default value |
| 193 | + input_dir = args.input if args.input else '/mnt/e/Fundis/NGSpeciesID-20230719T211158Z-001/NGSpeciesID' |
| 194 | + if '.fastq' in input_dir: |
| 195 | + input_dir = input_dir.replace('.fastq','_minibar_NGSID') |
| 196 | + percent_system_use = float(args.percent_system_use)/100 if args.percent_system_use else 0.5 |
| 197 | + |
| 198 | + # Get the number of CPUs and calculate the number of threads |
| 199 | + num_cpus = multiprocessing.cpu_count() |
| 200 | + cpu_threads = int(math.floor(num_cpus * percent_system_use)) |
| 201 | + |
| 202 | + # Get a list of all folders in the main directory, excluding folders named '__Summary__' or '_Summary' |
| 203 | + folder_list = [f.path for f in os.scandir(input_dir) if f.is_dir() and '__Summary__' not in f.path and '_Summary' not in f.path] |
| 204 | + |
| 205 | + try: |
| 206 | + # Initialize a multiprocessing pool and process all folders |
| 207 | + with multiprocessing.Pool(cpu_threads) as pool: |
| 208 | + pool.map(analyze_ngspeciesid_folder, folder_list) |
| 209 | + except queue.Empty: |
| 210 | + print("Queue is empty. All Pipeline tasks have been processed.") |
| 211 | + finally: |
| 212 | + print('PASS: Haplotypes phased for each conesensus fasta and sample in NGSPeciesID input_dir\n') |
| 213 | + return True |
| 214 | + |
| 215 | + except Exception as e: |
| 216 | + print(f"ERROR: There was a problem in haplotype_phaser: {str(e)}") |
| 217 | + return False |
204 | 218 |
|
205 | 219 | # If this script is the main entry point, parse the arguments and call the main function
|
206 |
| -if __name__ == "__main__": |
207 |
| - # Global environment_dir |
208 |
| - environment_dir = "" |
209 |
| - environment_cmd_prefix = "" |
210 |
| - environment_dir = check_os() |
211 |
| - main_working_dir = os.getcwd() |
212 |
| - |
| 220 | +if __name__ == "__main__": |
213 | 221 | # Parse user arguments
|
214 | 222 | parser = argparse.ArgumentParser(description="Process NGSpeciesID main folder.")
|
215 |
| - parser.add_argument('-i','--input_dir', type=str, help='Path to the NGSpeciesID main directory') |
| 223 | + parser.add_argument('-i','--input', type=str, help='Path to the NGSpeciesID main directory') |
216 | 224 | parser.add_argument('-p','--percent_system_use', type=str, help='Percent system use written as integer.')
|
217 | 225 | args = parser.parse_args()
|
218 |
| - main(args) |
| 226 | + haplotype_phaser(args) |
0 commit comments