@@ -257,10 +257,10 @@ def binary_search(index_filename: Path,
257
257
# Track which kmer positions need to be counted on the index
258
258
# Create a list of indices where each index refers to the corresponding
259
259
# position in the given sequence segment
260
- counted_positions = np .nonzero (np . copy ( ~ finished_search ) )[0 ]
260
+ kmer_indices = np .nonzero (~ finished_search )[0 ]
261
261
262
262
# Create a list of kmers to count on the index
263
- for i in counted_positions :
263
+ for i in kmer_indices :
264
264
current_kmer_length = current_length_query [i ]
265
265
kmer = sequence_segment .data [i :i + current_kmer_length ]
266
266
working_kmers .append (kmer )
@@ -272,65 +272,65 @@ def binary_search(index_filename: Path,
272
272
273
273
# Assert that the number of indices to count and the number of counts
274
274
# are equal
275
- assert counted_positions .size == count_list .size , \
275
+ assert kmer_indices .size == count_list .size , \
276
276
"Number of counted positions ({}) and number of counts ({}) " \
277
- "do not match" .format (len (counted_positions ), len (count_list ))
277
+ "do not match" .format (len (kmer_indices ), len (count_list ))
278
278
279
279
# Where we have counts of 1
280
- unique_lengths [counted_positions ] = np .where (
280
+ unique_lengths [kmer_indices ] = np .where (
281
281
(count_list == 1 ) &
282
282
# And if there is no current unique length recorded
283
- ((unique_lengths [counted_positions ] == 0 ) |
283
+ ((unique_lengths [kmer_indices ] == 0 ) |
284
284
# Or there is a smaller length found than the current min length
285
- (current_length_query [counted_positions ] <
286
- unique_lengths [counted_positions ])),
285
+ (current_length_query [kmer_indices ] <
286
+ unique_lengths [kmer_indices ])),
287
287
# Record the minimum kmer length found if it less than the current
288
- current_length_query [counted_positions ],
288
+ current_length_query [kmer_indices ],
289
289
# Otherwise keep the current unique length
290
- unique_lengths [counted_positions ])
290
+ unique_lengths [kmer_indices ])
291
291
292
292
# If we have a k-mer count of 1 and the current length queried is the
293
293
# same as the lower bound (i.e. can't get smaller for a unique count)
294
294
# This position has finished searching
295
- finished_search [counted_positions ] = np .where (
295
+ finished_search [kmer_indices ] = np .where (
296
296
count_list == 1 ,
297
- current_length_query [counted_positions ] ==
298
- lower_length_bound [counted_positions ],
299
- finished_search [counted_positions ])
297
+ current_length_query [kmer_indices ] ==
298
+ lower_length_bound [kmer_indices ],
299
+ finished_search [kmer_indices ])
300
300
301
301
# If we have a k-mer count > 1 and the current length queried is the
302
302
# same as the uppper bound (i.e. can't find a unique length or larger)
303
303
# This position has finished searching
304
- finished_search [counted_positions ] = np .where (
304
+ finished_search [kmer_indices ] = np .where (
305
305
count_list > 1 ,
306
- current_length_query [counted_positions ] ==
307
- upper_length_bound [counted_positions ],
308
- finished_search [counted_positions ])
306
+ current_length_query [kmer_indices ] ==
307
+ upper_length_bound [kmer_indices ],
308
+ finished_search [kmer_indices ])
309
309
310
310
# Update the query length and bounds for the next iteration
311
311
312
312
# Lower the upper bounds of our search range on positions where
313
313
# we need to decrease our k-mer length (i.e. counts == 1)
314
314
# Set the new upper (inclusive) bound to the current query length - 1
315
- upper_length_bound [counted_positions ] = np .where (
315
+ upper_length_bound [kmer_indices ] = np .where (
316
316
count_list == 1 ,
317
- current_length_query [counted_positions ] - 1 ,
318
- upper_length_bound [counted_positions ])
317
+ current_length_query [kmer_indices ] - 1 ,
318
+ upper_length_bound [kmer_indices ])
319
319
320
320
# Raise the lower bounds of our search range on positions where
321
321
# we need to increase our k-mer length (i.e. counts > 1)
322
322
# Set the new lower (inclusive) bound to the current query length + 1
323
- lower_length_bound [counted_positions ] = np .where (
323
+ lower_length_bound [kmer_indices ] = np .where (
324
324
count_list > 1 ,
325
- current_length_query [counted_positions ] + 1 ,
326
- lower_length_bound [counted_positions ])
325
+ current_length_query [kmer_indices ] + 1 ,
326
+ lower_length_bound [kmer_indices ])
327
327
328
328
# Calculate the new query length as the midpoint between the updated
329
329
# upper and lower bounds
330
330
# NB: Avoid overflow by dividing first before sum
331
- current_length_query [counted_positions ] = np .floor (
332
- (upper_length_bound [counted_positions ] / 2 ) +
333
- (lower_length_bound [counted_positions ] / 2 )).astype (data_type )
331
+ current_length_query [kmer_indices ] = np .floor (
332
+ (upper_length_bound [kmer_indices ] / 2 ) +
333
+ (lower_length_bound [kmer_indices ] / 2 )).astype (data_type )
334
334
335
335
iteration_count += 1
336
336
0 commit comments