Skip to content

Commit fa73ce4

Browse files
committed
Additional slight speed up of simplitig computation in global.
1 parent e8464a7 commit fa73ce4

File tree

1 file changed

+23
-11
lines changed

1 file changed

+23
-11
lines changed

src/simplitigs.h

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ std::vector<simplitig_t> simplitigs_from_fasta(std::string &path, int k) {
7676
}
7777

7878
template <bool complements, typename kmer_t, typename kh_S_t, typename kh_wrapper_t>
79-
inline kmer_t simplitig_right_rev_extension(kmer_t &forward, kmer_t &backward, kh_S_t *kMers, kh_wrapper_t &wrapper, int k) {
79+
inline uint8_t simplitig_right_rev_extension(kmer_t &forward, kmer_t &backward, kh_S_t *kMers, kh_wrapper_t &wrapper, int k) {
8080
bool forward_direction = true;
8181
if constexpr (!complements) {
8282
forward_direction = (backward == kmer_t(-1));
@@ -121,32 +121,44 @@ simplitig_t next_simplitig(kh_S_t *kMers, kh_wrapper_t wrapper, kmer_t begin, in
121121
if constexpr (!complements) {
122122
first_complement = last_complement = kmer_t(-1);
123123
}
124-
std::list<char> simplitig {};
125-
for (int i = 0; i < k; ++i) {
126-
simplitig.emplace_back(NucleotideAtIndex(last, k, i));
124+
// Assumes that the largest simplitig is in order of sqrt(n) to save some time resizing the vector.
125+
size_t size_estimate = sqrt(kh_size(kMers)) * 2;
126+
simplitig_t simplitig_front;
127+
simplitig_t simplitig_back(2 * k);
128+
simplitig_front.reserve(size_estimate);
129+
simplitig_back.reserve(2 * k + size_estimate);
130+
for (int i = 0; i < 2 * k; ++i) {
131+
simplitig_back[2 * k - i - 1] = last & (kmer_t(1) << i);
127132
}
128133
eraseKMer(kMers, wrapper, last, k, complements);
129134
while (true) {
130-
kmer_t ext = simplitig_right_rev_extension<complements>(last, last_complement, kMers, wrapper, k);
131-
if (ext == kmer_t(-1)) {
135+
uint8_t ext = simplitig_right_rev_extension<complements>(last, last_complement, kMers, wrapper, k);
136+
if (ext == uint8_t(-1)) {
132137
// No right extension found.
133138
break;
134139
} else {
135140
// Extend the simplitig to the right.
136-
simplitig.emplace_back(letters[3 ^ (uint8_t)ext]);
141+
142+
simplitig_back.push_back(!(ext & 2));
143+
simplitig_back.push_back(!(ext & 1));
137144
}
138145
}
139146
while(true) {
140-
kmer_t ext = simplitig_right_rev_extension<complements>(first_complement, first, kMers, wrapper, k);
141-
if (ext == kmer_t(-1)) {
147+
uint8_t ext = simplitig_right_rev_extension<complements>(first_complement, first, kMers, wrapper, k);
148+
if (ext == uint8_t(-1)) {
142149
// No left extension found.
143150
break;
144151
} else {
145152
// Extend the simplitig to the left.
146-
simplitig.emplace_front(letters[(uint8_t)ext]);
153+
simplitig_front.push_back(ext & 1);
154+
simplitig_front.push_back(ext & 2);
147155
}
148156
}
149-
return simplitig_from_string(std::string(simplitig.begin(), simplitig.end()));
157+
158+
std::reverse(simplitig_front.begin(), simplitig_front.end());
159+
simplitig_front.reserve(simplitig_front.size() + simplitig_back.size());
160+
for (bool b : simplitig_back) simplitig_front.push_back(b);
161+
return simplitig_front;
150162
}
151163

152164
template <typename kmer_t, typename kh_S_t, typename kh_wrapper_t>

0 commit comments

Comments
 (0)