@@ -342,30 +342,45 @@ def update_upper_search_bound(upper_length_bound_array: npt.NDArray[np.uint],
342
342
# For every ambigious starting position in reverse order
343
343
for i in ambiguous_starting_positions [::- 1 ]:
344
344
# From (max kmer length - 1) positions before this position:
345
- length_change_position = i - (max_lengths_to_ambiguous_position .size )
346
- minimum_length_position = i - min_kmer_length
347
- # Set the maximum length up to 1 next to the ambiguous base position
348
- upper_length_bound_array [length_change_position :i ] = \
349
- max_lengths_to_ambiguous_position
345
+
346
+ # Skip if the ambiguous position is at position 0
347
+ if i == 0 :
348
+ continue
349
+
350
+ # Calculate the starting position where the upper length bounds will
351
+ # change
352
+ # NB: Account for the case where our ambigious position is less than
353
+ # the max kmer length - 1
354
+ upper_length_change_position = \
355
+ max (0 , i - (max_lengths_to_ambiguous_position .size ))
356
+
357
+ # This value is different from the max kmer length (-1) if the
358
+ # ambiguous position is less than the max kmer length - 1 (near the
359
+ # start of the arrays)
360
+ num_upper_length_changes = i - upper_length_change_position
361
+ upper_length_bound_array [upper_length_change_position :i ] = \
362
+ max_lengths_to_ambiguous_position [- num_upper_length_changes :]
350
363
351
364
# Calculate the new query length as the midpoint between the updated
352
365
# upper and the current lower bounds
353
366
new_initial_search_array = np .floor (
354
- (upper_length_bound_array [length_change_position :i ] / 2 ) +
355
- (lower_length_bound_array [length_change_position :i ] / 2 )). astype (
356
- data_type )
367
+ (upper_length_bound_array [upper_length_change_position :i ] / 2 ) +
368
+ (lower_length_bound_array [upper_length_change_position :i ] /
369
+ 2 )). astype ( data_type )
357
370
358
371
# If we have an initial search length
359
372
if initial_search_length :
360
373
# Use the initial search length if it is less than the new midpoint
361
374
new_initial_search_array = np .fmin (new_initial_search_array ,
362
375
initial_search_length )
363
376
364
- current_length_query_array [length_change_position :i ] = \
377
+ current_length_query_array [upper_length_change_position :i ] = \
365
378
new_initial_search_array
366
379
367
380
# Mark positions with values of (min length - 1) to 1 as finished
368
- finished_search_array [minimum_length_position + 1 :i ] = True
381
+ finished_search_array [upper_length_change_position :i ] = \
382
+ (upper_length_bound_array [upper_length_change_position :i ] <
383
+ min_kmer_length )
369
384
370
385
371
386
def linear_search (index_filename : Path ,
0 commit comments