@@ -97,12 +97,11 @@ def write_multi_read_wig(wig_file: TextIO,
97
97
wig_file .write ("{}\n " .format (value ))
98
98
99
99
100
- def main (args ):
101
- unique_count_filename = Path (args .unique_count_file )
102
- kmer_length = args .kmer_length
103
- single_read_bed_filename = args .single_read_bed_file
104
- multi_read_wig_filename = args .multi_read_wig_file
105
- verbose = args .verbose
100
+ def write_mappability_files (unique_count_filenames : list [Path ],
101
+ kmer_length : int ,
102
+ single_read_bed_filename : str , # Might be stdout
103
+ multi_read_wig_filename : str , # Might be stdout
104
+ verbose : bool ):
106
105
107
106
# Error if both single-read and multi-read output files are standard output
108
107
if (single_read_bed_filename == STDOUT_FILENAME and
@@ -114,70 +113,91 @@ def main(args):
114
113
not multi_read_wig_filename ):
115
114
raise ValueError ("Must specify at least one output file" )
116
115
117
- # Get the chromosome name from the unique length filename
118
- # NB: Assume the chromosome name is the the entire string preceding the
119
- # ".unique*" part of the unique_count_filename (may contain periods)
120
- file_basename = unique_count_filename .name
121
- chr_name = \
122
- file_basename [:file_basename .find (CHROMOSOME_FILENAME_DELIMITER )]
123
-
124
- # Get the data type from the unique length filename suffix
125
- data_type_string = unique_count_filename .suffix
126
-
127
- if data_type_string == ".uint8" :
128
- data_type = np .uint8
129
- elif data_type_string == ".uint16" :
130
- data_type = np .uint16
131
- elif data_type_string == ".uint32" :
132
- data_type = np .uint32
133
- else :
134
- raise ValueError (f"Unknown extension on unique length file: "
135
- f"\" { data_type_string } \" " )
136
-
137
- # NB: The single-read mappability is defined for the entire sequence where
138
- # a uniquely mappable k-mer would cover. So if a k-mer is uniquely mappable
139
- # starting at position i, then the single read mappability would be 1 for
140
- # all positions i to i + kmer_length - 1
141
- # It follows that the multi-read mappability covers the same positions as
142
- # the single-read, so any non-zero value would be considered single-read
143
- # mappable
144
- verbose_print (verbose ,
145
- f"Calculating mappability regions from minimum unique k-mer "
146
- f"lengths in file: { unique_count_filename } " )
147
-
148
- multi_read_mappability = create_multiread_mappability_from_unique_file (
149
- unique_count_filename ,
150
- kmer_length ,
151
- data_type ) # type: ignore
152
-
153
- verbose_print (verbose , "Chromosome size:" )
154
- verbose_print (verbose ,
155
- "{}\t {}" .format (chr_name , multi_read_mappability .shape [0 ]))
156
-
157
- if single_read_bed_filename :
158
- verbose_print (verbose , "Writing out single-read mappability regions" )
159
-
160
- if single_read_bed_filename == STDOUT_FILENAME :
161
- write_single_read_bed (sys .stdout ,
162
- kmer_length ,
163
- multi_read_mappability ,
164
- chr_name )
116
+ # For every unique length file specified
117
+ for unique_count_filename in unique_count_filenames :
118
+ # Get the chromosome name from the unique length filename
119
+ # NB: Assume the chromosome name is the the entire string preceding the
120
+ # ".unique*" part of the unique_count_filename (may contain periods)
121
+ file_basename = unique_count_filename .name
122
+ chr_name = \
123
+ file_basename [:file_basename .find (CHROMOSOME_FILENAME_DELIMITER )]
124
+
125
+ # Get the data type from the unique length filename suffix
126
+ data_type_string = unique_count_filename .suffix
127
+
128
+ if data_type_string == ".uint8" :
129
+ data_type = np .uint8
130
+ elif data_type_string == ".uint16" :
131
+ data_type = np .uint16
132
+ elif data_type_string == ".uint32" :
133
+ data_type = np .uint32
165
134
else :
166
- with open (single_read_bed_filename , "w" ) as single_read_bed_file :
167
- write_single_read_bed (single_read_bed_file ,
135
+ raise ValueError (f"Unknown extension on unique length file: "
136
+ f"\" { data_type_string } \" " )
137
+
138
+ # NB: The single-read mappability is defined for the entire sequence
139
+ # where a uniquely mappable k-mer would cover. So if a k-mer is
140
+ # uniquely mappable starting at position i, then the single read
141
+ # mappability would be 1 for all positions i to i + kmer_length - 1
142
+ # It follows that the multi-read mappability covers the same positions
143
+ # as the single-read, so any non-zero value would be considered
144
+ # single-read mappable
145
+ verbose_print (verbose , f"Calculating mappability regions from minimum "
146
+ f"unique k-mer lengths in file: "
147
+ f"{ unique_count_filename } " )
148
+
149
+ multi_read_mappability = create_multiread_mappability_from_unique_file (
150
+ unique_count_filename ,
151
+ kmer_length ,
152
+ data_type ) # type: ignore
153
+
154
+ verbose_print (verbose , "Chromosome size:" )
155
+ verbose_print (verbose ,
156
+ "{}\t {}" .format (chr_name ,
157
+ multi_read_mappability .shape [0 ]))
158
+
159
+ if single_read_bed_filename :
160
+ verbose_print (verbose , f"Appending single-read mappability "
161
+ f"regions to { single_read_bed_filename } " )
162
+
163
+ if single_read_bed_filename == STDOUT_FILENAME :
164
+ write_single_read_bed (sys .stdout ,
168
165
kmer_length ,
169
166
multi_read_mappability ,
170
167
chr_name )
171
-
172
- if multi_read_wig_filename :
173
- verbose_print (verbose , "Writing out multi-read mappability regions" )
174
-
175
- if multi_read_wig_filename == STDOUT_FILENAME :
176
- write_multi_read_wig (sys .stdout ,
177
- multi_read_mappability ,
178
- chr_name )
179
- else :
180
- with open (multi_read_wig_filename , "w" ) as multi_read_wig_file :
181
- write_multi_read_wig (multi_read_wig_file ,
168
+ else :
169
+ with open (single_read_bed_filename , "a" ) as \
170
+ single_read_bed_file :
171
+ write_single_read_bed (single_read_bed_file ,
172
+ kmer_length ,
173
+ multi_read_mappability ,
174
+ chr_name )
175
+
176
+ if multi_read_wig_filename :
177
+ verbose_print (verbose , f"Appending multi-read mappability regions"
178
+ f" to { multi_read_wig_filename } " )
179
+
180
+ if multi_read_wig_filename == STDOUT_FILENAME :
181
+ write_multi_read_wig (sys .stdout ,
182
182
multi_read_mappability ,
183
183
chr_name )
184
+ else :
185
+ with open (multi_read_wig_filename , "a" ) as multi_read_wig_file :
186
+ write_multi_read_wig (multi_read_wig_file ,
187
+ multi_read_mappability ,
188
+ chr_name )
189
+
190
+
191
+ def main (args ):
192
+ unique_count_filenames = [Path (filename ) for filename in
193
+ args .unique_count_files ]
194
+ kmer_length = args .kmer_length
195
+ single_read_bed_filename = args .single_read_bed_file
196
+ multi_read_wig_filename = args .multi_read_wig_file
197
+ verbose = args .verbose
198
+
199
+ write_mappability_files (unique_count_filenames ,
200
+ kmer_length ,
201
+ single_read_bed_filename ,
202
+ multi_read_wig_filename ,
203
+ verbose )
0 commit comments