Skip to content

Commit d5e1366

Browse files
authored
Update FunDiS_Minibar.py
Updated to accept .fastq or .fastq.gz, includes trimming of reads with chopper, includes nuanced minibar command inputs, DOES NOT REMOVE EXCESS FILES (per HS) Signed-off-by: Ian M. B. <99409346+iPsychonaut@users.noreply.github.com>
1 parent 1e74452 commit d5e1366

File tree

1 file changed

+48
-86
lines changed

1 file changed

+48
-86
lines changed

FunDiS_Minibar.py

Lines changed: 48 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -12,66 +12,14 @@
1212
"""
1313

1414
# Base Python Imports
15-
import subprocess, os, gzip
15+
import os, gzip
1616

17-
# Required Python Imports
18-
from termcolor import cprint
19-
from datetime import datetime
17+
# Custom Python Imports
18+
from FunDiS_Tools import log_print, generate_log_file, initialize_logging_environment, run_subprocess_cmd
2019

21-
# Set Global File
22-
DEFAULT_LOG_FILE = None
23-
24-
# Function to color coded print to console and save to log_file information
25-
def log_print(input_message, log_file=None):
26-
"""
27-
Logs a message to a file and prints it to the console with appropriate coloring.
28-
29-
This function takes a message and logs it to the specified file. Additionally, the message is printed to the
30-
console, potentially with specific coloring depending on the context.
31-
32-
Parameters:
33-
input_message (str): Message to be logged and printed.
34-
log_file (str): Path to the log file.
35-
36-
Notes:
37-
- The function uses a global default log file if none is specified.
38-
- Timestamps each log entry for easy tracking.
39-
- Utilizes color coding in the console to distinguish between different types of messages (e.g., errors, warnings).
40-
- Supports color coding for specific message types: NOTE, CMD, ERROR, WARN, and PASS.
41-
- Falls back to default (white) color if the message type is unrecognized.
42-
"""
43-
# Access the global variable
44-
global DEFAULT_LOG_FILE
45-
46-
# Use the default log file if none specified
47-
if log_file is None:
48-
log_file = DEFAULT_LOG_FILE
49-
50-
# Establish current date-time
51-
now = datetime.now()
52-
message = f'[{now:%Y-%m-%d %H:%M:%S}]\t{input_message}'
53-
54-
# Determine the print color based on the input_message content
55-
message_type_dict = {'NOTE': ['blue'],
56-
'CMD': ['cyan'],
57-
'ERROR': ['red'],
58-
'WARN': ['yellow'],
59-
'PASS': ['green'],}
60-
print_color = ['white'] # Default color
61-
for key, value in message_type_dict.items():
62-
if key.lower() in input_message.lower():
63-
print_color = value
64-
break
65-
66-
# Writing the message to the log file
67-
with open(log_file, 'a') as file:
68-
print(message, file=file)
69-
70-
# Handling different message types for colored printing
71-
try:
72-
cprint(message, print_color[0])
73-
except (KeyError, IndexError):
74-
cprint(message, print_color[1] if len(print_color) > 1 else 'white')
20+
# Global output_area variable
21+
CPU_THREADS = 1
22+
PERCENT_RESOURCES = 0.75
7523

7624
# Function to unzip .fastq.gz file with gzip.
7725
def unzip_fastq(ngsid_fastq_gz_path, ngsid_fastq_path):
@@ -90,7 +38,7 @@ def unzip_fastq(ngsid_fastq_gz_path, ngsid_fastq_path):
9038
f_out.write(f_in.read())
9139

9240
# Wrapper function to run ngsid_minibar_prep in a separate thread.
93-
def run_minibar_prep(minibar_path, minibar_index_path, ngsid_fastq_gz_path, ngsid_output_dir):
41+
def run_minibar_prep(minibar_path, minibar_index_path, ngsid_fastq_gz_path, ngsid_output_dir, chopper_command_dict, minibar_command_dict):
9442
"""
9543
Wrapper function to run minibar preparation in a separate thread.
9644
@@ -102,18 +50,20 @@ def run_minibar_prep(minibar_path, minibar_index_path, ngsid_fastq_gz_path, ngsi
10250
minibar_index_path (str): Path to the minibar index file.
10351
ngsid_fastq_gz_path (str): Path to the compressed FASTQ file.
10452
ngsid_output_dir (str): Directory path for storing output files.
53+
chopper_command_dict (dict): Dictionary containing chopper settings.
54+
minibar_command_dict (dict): Dictionary containing MiniBar settings.
10555
10656
Notes:
10757
- Logs an error message if an exception is encountered during the minibar preparation.
10858
"""
10959
global DEFAULT_LOG_FILE
11060
try:
111-
minibar_prep(minibar_path, minibar_index_path, ngsid_fastq_gz_path, ngsid_output_dir)
61+
minibar_prep(minibar_path, minibar_index_path, ngsid_fastq_gz_path, ngsid_output_dir, chopper_command_dict, minibar_command_dict)
11262
except Exception as e:
11363
log_print( f"ERROR:\t{str(e)}")
11464

11565
# Function to perform minibar preparation
116-
def minibar_prep(minibar_path, minibar_index_path, ngsid_fastq_gz_path, ngsid_output_dir):
66+
def minibar_prep(minibar_path, minibar_index_path, input_file_path, ngsid_output_dir, chopper_command_dict, minibar_command_dict):
11767
"""
11868
Performs minibar preparation for NGSpeciesID processing.
11969
@@ -123,8 +73,10 @@ def minibar_prep(minibar_path, minibar_index_path, ngsid_fastq_gz_path, ngsid_ou
12373
Parameters:
12474
minibar_path (str): Path to the minibar script.
12575
minibar_index_path (str): Path to the minibar index file.
126-
ngsid_fastq_gz_path (str): Path to the compressed FASTQ file.
76+
input_file_path (str): Path to the compressed FASTQ file.
12777
ngsid_output_dir (str): Directory path for storing output files.
78+
chopper_command_dict (dict): Dictionary containing chopper settings.
79+
minibar_command_dict (dict): Dictionary containing MiniBar settings.
12880
12981
Global Variables:
13082
output_area (str): A global variable used for logging output messages.
@@ -135,39 +87,49 @@ def minibar_prep(minibar_path, minibar_index_path, ngsid_fastq_gz_path, ngsid_ou
13587
- Handles and logs file removal errors.
13688
"""
13789
global DEFAULT_LOG_FILE
138-
log_print( f"Prepping {ngsid_fastq_gz_path} for NGSpeciesID with minibar...")
139-
ngsid_fastq_path = ngsid_fastq_gz_path.replace(".gz", "")
140-
141-
if os.path.exists(ngsid_fastq_path):
142-
log_print( "PASS:\tSkipping extraction, files already exist")
90+
main_dir = os.getcwd()
91+
log_print(f"Prepping {input_file_path} for NGSpeciesID with minibar...")
92+
93+
# Determine if the file is compressed (.gz) and set the correct chopper command
94+
if input_file_path.endswith(".gz"):
95+
log_print(f"File is compressed: {input_file_path}")
96+
chopper_input_cmd = f"gunzip -c {input_file_path}"
97+
uncompressed_fastq_path = input_file_path.replace(".gz","") # Remove .gz extension
14398
else:
144-
unzip_fastq(ngsid_fastq_gz_path, ngsid_fastq_path)
99+
log_print(f"File is uncompressed: {input_file_path}")
100+
chopper_input_cmd = f"cat {input_file_path}"
101+
uncompressed_fastq_path = input_file_path
145102

146-
main_dir = os.getcwd()
103+
# Ensure the output directory exists
147104
if not os.path.exists(ngsid_output_dir):
148105
os.makedirs(ngsid_output_dir)
149106
os.chdir(ngsid_output_dir)
150-
151-
minibar_cmd = [minibar_path, "-F", minibar_index_path, ngsid_fastq_path] # TODO: Be able to adjust the number of differences allowed for in primer sequence
152-
log_print( f"CMD:\t{' '.join(minibar_cmd)}")
153-
process = subprocess.run(minibar_cmd, capture_output=True, text=True)
154-
log_print( process.stdout)
155-
log_print( process.stderr)
107+
108+
# Construct the chopper command
109+
chopper_cmd_str = f"{chopper_input_cmd} | chopper -q {chopper_command_dict['-q']} -l {chopper_command_dict['--minlength']} --maxlength {chopper_command_dict['--maxlength']} --threads {CPU_THREADS}"
156110

157-
if process.returncode != 0:
158-
log_print( f"ERROR:\t{process.stderr}")
159-
else:
160-
log_print( f"PASS:\tSuccessfully processed {ngsid_fastq_path} with minibar and cleaned up files")
111+
# Construct minibar command
112+
minibar_cmd_str = f"{minibar_path} -F {minibar_index_path} -i - --outfolder {ngsid_output_dir}"
113+
for key, value in minibar_command_dict.items():
114+
if value != "":
115+
minibar_cmd_str += f" {key} {value}"
116+
elif value == True: # Assuming some keys might be flags without explicit values
117+
minibar_cmd_str += f" {key}"
118+
119+
# Combine the chopper and minibar commands
120+
combined_cmd_str = f"{chopper_cmd_str} | {minibar_cmd_str}"
121+
122+
run_subprocess_cmd(combined_cmd_str, shell_check=True)
161123

162-
for file in [f"{ngsid_output_dir}/sample_Multiple_Matches.fastq", "sample_unk.fastq"]:
163-
try:
164-
os.remove(file)
165-
log_print( f"PASS:\tRemoved file: {file}")
166-
except OSError as e:
167-
log_print( f"ERROR:\tDuring file removal {file}: {e}")
124+
# TODO: Do not remove these files until future notice
125+
# for file in [f"{ngsid_output_dir}/sample_Multiple_Matches.fastq", f"{ngsid_output_dir}/sample_unk.fastq", uncompressed_fastq_path]:
126+
# try:
127+
# os.remove(file)
128+
# log_print( f"PASS:\tRemoved file: {file}")
129+
# except OSError as e:
130+
# log_print( f"ERROR:\tDuring file removal {file}: {e}")
168131

169132
os.chdir(main_dir)
170133

171134
if __name__ == "__main__":
172-
global DEFAULT_LOG_FILE
173135
log_print("UNLOGGED DEBUG:\tUNDEVELOPED FunDiS_Minibar.py DEBUG AREA")

0 commit comments

Comments
 (0)