Skip to content

Commit 976f99e

Browse files
committed
Remove unnecessary array copy in binary search
1 parent 943883b commit 976f99e

File tree

1 file changed

+27
-27
lines changed

1 file changed

+27
-27
lines changed

newmap/unique_counts.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -257,10 +257,10 @@ def binary_search(index_filename: Path,
257257
# Track which kmer positions need to be counted on the index
258258
# Create a list of indices where each index refers to the corresponding
259259
# position in the given sequence segment
260-
counted_positions = np.nonzero(np.copy(~finished_search))[0]
260+
kmer_indices = np.nonzero(~finished_search)[0]
261261

262262
# Create a list of kmers to count on the index
263-
for i in counted_positions:
263+
for i in kmer_indices:
264264
current_kmer_length = current_length_query[i]
265265
kmer = sequence_segment.data[i:i+current_kmer_length]
266266
working_kmers.append(kmer)
@@ -272,65 +272,65 @@ def binary_search(index_filename: Path,
272272

273273
# Assert that the number of indices to count and the number of counts
274274
# are equal
275-
assert counted_positions.size == count_list.size, \
275+
assert kmer_indices.size == count_list.size, \
276276
"Number of counted positions ({}) and number of counts ({}) " \
277-
"do not match".format(len(counted_positions), len(count_list))
277+
"do not match".format(len(kmer_indices), len(count_list))
278278

279279
# Where we have counts of 1
280-
unique_lengths[counted_positions] = np.where(
280+
unique_lengths[kmer_indices] = np.where(
281281
(count_list == 1) &
282282
# And if there is no current unique length recorded
283-
((unique_lengths[counted_positions] == 0) |
283+
((unique_lengths[kmer_indices] == 0) |
284284
# Or there is a smaller length found than the current min length
285-
(current_length_query[counted_positions] <
286-
unique_lengths[counted_positions])),
285+
(current_length_query[kmer_indices] <
286+
unique_lengths[kmer_indices])),
287287
# Record the minimum kmer length found if it less than the current
288-
current_length_query[counted_positions],
288+
current_length_query[kmer_indices],
289289
# Otherwise keep the current unique length
290-
unique_lengths[counted_positions])
290+
unique_lengths[kmer_indices])
291291

292292
# If we have a k-mer count of 1 and the current length queried is the
293293
# same as the lower bound (i.e. can't get smaller for a unique count)
294294
# This position has finished searching
295-
finished_search[counted_positions] = np.where(
295+
finished_search[kmer_indices] = np.where(
296296
count_list == 1,
297-
current_length_query[counted_positions] ==
298-
lower_length_bound[counted_positions],
299-
finished_search[counted_positions])
297+
current_length_query[kmer_indices] ==
298+
lower_length_bound[kmer_indices],
299+
finished_search[kmer_indices])
300300

301301
# If we have a k-mer count > 1 and the current length queried is the
302302
# same as the uppper bound (i.e. can't find a unique length or larger)
303303
# This position has finished searching
304-
finished_search[counted_positions] = np.where(
304+
finished_search[kmer_indices] = np.where(
305305
count_list > 1,
306-
current_length_query[counted_positions] ==
307-
upper_length_bound[counted_positions],
308-
finished_search[counted_positions])
306+
current_length_query[kmer_indices] ==
307+
upper_length_bound[kmer_indices],
308+
finished_search[kmer_indices])
309309

310310
# Update the query length and bounds for the next iteration
311311

312312
# Lower the upper bounds of our search range on positions where
313313
# we need to decrease our k-mer length (i.e. counts == 1)
314314
# Set the new upper (inclusive) bound to the current query length - 1
315-
upper_length_bound[counted_positions] = np.where(
315+
upper_length_bound[kmer_indices] = np.where(
316316
count_list == 1,
317-
current_length_query[counted_positions] - 1,
318-
upper_length_bound[counted_positions])
317+
current_length_query[kmer_indices] - 1,
318+
upper_length_bound[kmer_indices])
319319

320320
# Raise the lower bounds of our search range on positions where
321321
# we need to increase our k-mer length (i.e. counts > 1)
322322
# Set the new lower (inclusive) bound to the current query length + 1
323-
lower_length_bound[counted_positions] = np.where(
323+
lower_length_bound[kmer_indices] = np.where(
324324
count_list > 1,
325-
current_length_query[counted_positions] + 1,
326-
lower_length_bound[counted_positions])
325+
current_length_query[kmer_indices] + 1,
326+
lower_length_bound[kmer_indices])
327327

328328
# Calculate the new query length as the midpoint between the updated
329329
# upper and lower bounds
330330
# NB: Avoid overflow by dividing first before sum
331-
current_length_query[counted_positions] = np.floor(
332-
(upper_length_bound[counted_positions] / 2) +
333-
(lower_length_bound[counted_positions] / 2)).astype(data_type)
331+
current_length_query[kmer_indices] = np.floor(
332+
(upper_length_bound[kmer_indices] / 2) +
333+
(lower_length_bound[kmer_indices] / 2)).astype(data_type)
334334

335335
iteration_count += 1
336336

0 commit comments

Comments
 (0)