Skip to content

Commit e5f42cd

Browse files
authored
Added Error Checking & Python Import Calls to fundis_summarize.py
Added Error Checking updated function to accept Python import calls as well. Signed-off-by: Ian M. B. <99409346+iPsychonaut@users.noreply.github.com>
1 parent e484883 commit e5f42cd

File tree

1 file changed

+61
-25
lines changed

1 file changed

+61
-25
lines changed

fundis_summarize.py

Lines changed: 61 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
This is a module intended to be used as a part of a pipeline.
88
99
This can be used individually by calling the command:
10-
python /path/to/fundis_summarize.py -i /path/to/input_dir -p 80
11-
python /path/to/fundis_summarize.py --input_dir /path/to/input_dir --percent_system_use 80
10+
python /path/to/fundis_summarize.py -i /path/to/input_dir -p 50
11+
python /path/to/fundis_summarize.py --input /path/to/input_dir --percent_system_use 50
1212
"""
1313
from Bio import SeqIO
1414
import pandas as pd
@@ -17,38 +17,68 @@
1717
import shutil
1818
from tqdm import tqdm
1919
import argparse
20+
import platform
21+
22+
# Function to determine which Operating System the code is being executed in
23+
def check_os():
24+
global environment_dir
25+
global environment_cmd_prefix
26+
# Determine the operating system in use
27+
# os.name will return 'posix', 'nt', or 'java'
28+
os_name = os.name
29+
# platform.system() will return 'Linux', 'Windows', 'Java', etc.
30+
platform_system = platform.system()
31+
32+
# If the operating system is Windows (identified by 'nt' from os.name or 'Windows' from platform.system())
33+
if os_name == 'nt' or platform_system == 'Windows':
34+
# Set the working directory to "E:" (or whatever drive letter is appropriate for your Windows system)
35+
environment_dir = "E:"
36+
# For running Linux commands in Windows Subsystem for Linux (WSL), prefix the command with "wsl "
37+
environment_cmd_prefix = "wsl "
38+
39+
# If the operating system is Linux (identified by 'posix' from os.name or 'Linux' from platform.system())
40+
elif os_name == 'posix' or platform_system == 'Linux':
41+
# Set the working directory to "/mnt/e" (or whatever the corresponding path is in your Linux system)
42+
environment_dir = "/mnt/e"
43+
44+
else:
45+
# If the operating system is neither Windows nor Linux, raise an Exception
46+
raise Exception("ERROR: OS NOT TESTED WITH THIS CODE.")
47+
48+
# Print out the detected operating system and the determined environment directory
49+
print(f'Operating System: {platform_system}')
50+
print(f'Environment Directory: {environment_dir}')
51+
return environment_dir
2052

2153
# Function to take in a folder containined processed NGSequenceID folders and generate a summary folder for MycoMap
2254
def mycomap_summarize_ngsid_dir(ngsid_dir):
23-
print('\nGenerating MycoMap summary folder for {ngsid_dir}...')
55+
if '.fastq' in ngsid_dir:
56+
ngsid_dir = ngsid_dir.replace('.fastq','_minibar_NGSID')
57+
print(f'\nGenerating MycoMap summary folder for {ngsid_dir}...')
2458
# Create the summary and FASTQ directories if they don't exist
2559
summary_dir = ngsid_dir.replace("_minibar_NGSID","_Summary")
2660

2761
# Create pandas DataFrame for sequence information
2862
stats_df = pd.DataFrame(columns=['Filename', 'Length', 'Reads in Consensus', 'Multiple'])
2963

3064
# Get all directories in the ngsid_dir
31-
sample_dirs = [d for d in os.listdir(ngsid_dir) if os.path.isdir(os.path.join(ngsid_dir, d))]
65+
sample_dirs = [d for d in os.listdir(ngsid_dir) if os.path.isdir(os.path.join(ngsid_dir, d))]
3266

67+
# Generate storage folders
3368
fastq_dir = f"{summary_dir}/FASTQ Files"
3469
os.makedirs(summary_dir, exist_ok=True)
3570
os.makedirs(fastq_dir, exist_ok=True)
3671

37-
# MAKE LOOK THROUGH SAMPLE DIR IN SAMPLE DIRS
72+
# Look through current_sample_dir in sample_dirs
3873
for current_sample_dir in tqdm(sample_dirs):
39-
#current_sample_dir = "E:/Fundis/TEST/sample_HS_ONT03_03_35-HAY-F-001900-iNat155234876-Agaricomycetes_NGSequenceID"
4074
current_sample_dir = f'{ngsid_dir}/{current_sample_dir}'
4175
base_name = current_sample_dir.split("sample_")[-1]
4276

43-
# get all directories in the path that match the pattern
77+
# Establish main variables and directories for processing
4478
consensus_dirs = [entry.path for entry in os.scandir(current_sample_dir) if entry.is_dir() and entry.name.startswith('consensus_reference_') and any(os.scandir(entry.path))]
45-
4679
consensus_dirs = [entry.replace("\\","/") for entry in consensus_dirs]
47-
4880
consensus_fastq_list = []
49-
5081
medaka_count = 1
51-
5282
reads_in_consensus = 0
5383

5484
for consensus_dir in consensus_dirs:
@@ -156,23 +186,29 @@ def mycomap_summarize_ngsid_dir(ngsid_dir):
156186
# Write the updated records to the combined fasta file
157187
SeqIO.write(records, combined_fasta_file, 'fasta')
158188

159-
def main(args):
160-
# Set the path to the folder containing the fastq files
161-
percent_system_use = float(args.percent_system_use)/100 if args.percent_system_use else 0.8
162-
input_dir = args.input_dir if args.input_dir else os.path.dirname(os.path.realpath(__file__))
163-
mycomap_summarize_ngsid_dir(input_dir)
164-
print('PASS: Successfully summarized NGSequenceID Folder for MycoMap upload')
189+
def summarize(args):
190+
try:
191+
# Global environment_dir
192+
environment_dir = ""
193+
environment_cmd_prefix = ""
194+
environment_dir = check_os()
195+
main_working_dir = os.getcwd()
196+
197+
# Set the path to the folder containing the fastq files
198+
percent_system_use = float(args.percent_system_use)/100 if args.percent_system_use else 0.5
199+
input_dir = args.input if args.input else os.path.dirname(os.path.realpath(__file__))
200+
mycomap_summarize_ngsid_dir(input_dir)
201+
print('PASS: Successfully summarized NGSequenceID Folder for MycoMap upload')
202+
return True
203+
204+
except Exception as e:
205+
print(f"ERROR: There was a problem in summarize: {str(e)}")
206+
return False
165207

166208
if __name__ == "__main__":
167-
# Global environment_dir
168-
environment_dir = ""
169-
environment_cmd_prefix = ""
170-
environment_dir = check_os()
171-
main_working_dir = os.getcwd()
172-
173209
# Parse user arguments
174210
parser = argparse.ArgumentParser(description="Process NGSpeciesID source folder.")
175-
parser.add_argument('-i','--input_dir', type=str, help='Path to the NGSpeciesID source folder')
211+
parser.add_argument('-i','--input', type=str, help='Path to the NGSpeciesID source folder')
176212
parser.add_argument('-p','--percent_system_use', type=str, help='Percent system use written as integer.')
177213
args = parser.parse_args()
178-
main(args)
214+
summarize(args)

0 commit comments

Comments
 (0)