|
| 1 | +#include "fzy.h" |
| 2 | + |
| 3 | +#include <algorithm> |
| 4 | +#include <array> |
| 5 | +#include <bitset> |
| 6 | +#include <string_view> |
| 7 | + |
| 8 | +namespace { |
| 9 | +constexpr size_t MATCH_MAX_LEN = 1024; |
| 10 | + |
| 11 | +constexpr double SCORE_MAX = std::numeric_limits<double>::infinity(); |
| 12 | +constexpr double SCORE_MIN = -std::numeric_limits<double>::infinity(); |
| 13 | + |
| 14 | +constexpr double SCORE_GAP_LEADING = -0.005; |
| 15 | +constexpr double SCORE_GAP_TRAILING = -0.005; |
| 16 | +constexpr double SCORE_GAP_INNER = -0.01; |
| 17 | +constexpr double SCORE_MATCH_CONSECUTIVE = 1.0; |
| 18 | +constexpr double SCORE_MATCH_SLASH = 0.9; |
| 19 | +constexpr double SCORE_MATCH_WORD = 0.8; |
| 20 | +constexpr double SCORE_MATCH_CAPITAL = 0.7; |
| 21 | +constexpr double SCORE_MATCH_DOT = 0.6; |
| 22 | + |
| 23 | +struct ScoredResult { |
| 24 | + double score{}; |
| 25 | + QString str; |
| 26 | + QObject* obj = nullptr; |
| 27 | +}; |
| 28 | + |
| 29 | +bool isUpper(char16_t ch) { |
| 30 | + return 'A' <= ch && ch <= 'Z'; |
| 31 | +} |
| 32 | + |
| 33 | +bool isOrdinary(char16_t ch) { |
| 34 | + return |
| 35 | + ('0' <= ch && ch <= '9') || |
| 36 | + ('a' <= ch && ch <= 'z') || |
| 37 | + ('A' <= ch && ch <= 'Z') |
| 38 | + ; |
| 39 | +} |
| 40 | + |
| 41 | +// This is from llvm but with char16_t |
| 42 | +// But will return pointer to end rather than nullptr |
| 43 | +const char16_t* strpbrk(const char16_t *src, const char16_t *segment) { |
| 44 | + std::bitset<256> bitset; |
| 45 | + |
| 46 | + for (; *segment; ++segment) // NOLINT |
| 47 | + bitset.set(*reinterpret_cast<const unsigned char *>(segment)); |
| 48 | + for (; *src && !bitset.test(*reinterpret_cast<const unsigned char *>(src)); |
| 49 | + ++src) // NOLINT |
| 50 | + ; |
| 51 | + return src; // NOLINT |
| 52 | +} |
| 53 | + |
| 54 | +const char16_t *strcasechr(const char16_t *s, char16_t c) { |
| 55 | + const char16_t accept[3] = {c, static_cast<char16_t>(toupper(c)), 0}; // NOLINT |
| 56 | + return strpbrk(s, accept); |
| 57 | +} |
| 58 | + |
| 59 | +bool hasMatch(std::u16string_view needle, std::u16string_view haystack) { |
| 60 | + const auto *haystackIter = haystack.begin(); |
| 61 | + for (auto needleChar : needle){ |
| 62 | + haystackIter = strcasechr(haystackIter, needleChar); |
| 63 | + if (haystackIter == haystack.end()) { |
| 64 | + return false; |
| 65 | + } |
| 66 | + haystackIter++; // NOLINT |
| 67 | + } |
| 68 | + return true; |
| 69 | +} |
| 70 | + |
| 71 | +struct MatchStruct { |
| 72 | + std::u16string lowerNeedle; |
| 73 | + std::u16string lowerHaystack; |
| 74 | + |
| 75 | + std::array<double, MATCH_MAX_LEN> matchBonus{}; |
| 76 | +}; |
| 77 | + |
| 78 | + |
| 79 | +double getBonus(char16_t ch, char16_t lastCh){ |
| 80 | + if (!isOrdinary(lastCh)) { |
| 81 | + return 0.0; |
| 82 | + } |
| 83 | + switch (ch) { |
| 84 | + case '/': |
| 85 | + return SCORE_MATCH_SLASH; |
| 86 | + case '-': |
| 87 | + case '_': |
| 88 | + case ' ': return SCORE_MATCH_WORD; |
| 89 | + case '.': return SCORE_MATCH_DOT; |
| 90 | + case 'a': |
| 91 | + case 'b': |
| 92 | + case 'c': |
| 93 | + case 'd': |
| 94 | + case 'e': |
| 95 | + case 'f': |
| 96 | + case 'g': |
| 97 | + case 'h': |
| 98 | + case 'i': |
| 99 | + case 'j': |
| 100 | + case 'k': |
| 101 | + case 'l': |
| 102 | + case 'm': |
| 103 | + case 'n': |
| 104 | + case 'o': |
| 105 | + case 'p': |
| 106 | + case 'q': |
| 107 | + case 'r': |
| 108 | + case 's': |
| 109 | + case 't': |
| 110 | + case 'u': |
| 111 | + case 'v': |
| 112 | + case 'w': |
| 113 | + case 'x': |
| 114 | + case 'y': |
| 115 | + case 'z': |
| 116 | + return isUpper(lastCh) ? SCORE_MATCH_CAPITAL : 0.0; |
| 117 | + default: return 0.0; |
| 118 | + } |
| 119 | +} |
| 120 | + |
| 121 | +void precomputeBonus(std::u16string_view haystack, std::span<double> matchBonus) { |
| 122 | + /* Which positions are beginning of words */ |
| 123 | + char16_t lastCh = '/'; |
| 124 | + for (size_t index = 0; index < haystack.size(); index++) { |
| 125 | + char16_t ch = haystack[index]; |
| 126 | + matchBonus[index] = getBonus(lastCh, ch); |
| 127 | + lastCh = ch; |
| 128 | + } |
| 129 | +} |
| 130 | + |
| 131 | +MatchStruct setupMatchStruct(std::u16string_view needle, std::u16string_view haystack) { |
| 132 | + MatchStruct match{}; |
| 133 | + |
| 134 | + for (const auto nch : needle){ |
| 135 | + match.lowerNeedle.push_back(tolower(nch)); |
| 136 | + } |
| 137 | + for (const auto hch : haystack){ |
| 138 | + match.lowerHaystack.push_back(tolower(hch)); |
| 139 | + } |
| 140 | + |
| 141 | + precomputeBonus(haystack, match.matchBonus); |
| 142 | + |
| 143 | + return match; |
| 144 | +} |
| 145 | + |
| 146 | +void matchRow(const MatchStruct& match, size_t row, std::span<double> currD, std::span<double> currM, std::span<const double> lastD, std::span<const double> lastM) { |
| 147 | + size_t needleLen = match.lowerNeedle.size(); |
| 148 | + size_t haystackLen = match.lowerHaystack.size(); |
| 149 | + |
| 150 | + std::u16string_view lowerNeedle = match.lowerNeedle; |
| 151 | + std::u16string_view lowerHaystack = match.lowerHaystack; |
| 152 | + const auto& matchBonus = match.matchBonus; |
| 153 | + |
| 154 | + double prevScore = SCORE_MIN; |
| 155 | + double gapScore = row == needleLen - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; |
| 156 | + |
| 157 | + /* These will not be used with this value, but not all compilers see it */ |
| 158 | + double prevM = SCORE_MIN; |
| 159 | + double prevD = SCORE_MIN; |
| 160 | + |
| 161 | + for (size_t index = 0; index < haystackLen; index++) { |
| 162 | + if (lowerNeedle[row] == lowerHaystack[index]) { |
| 163 | + double score = SCORE_MIN; |
| 164 | + if (!row) { |
| 165 | + score = (static_cast<double>(index) * SCORE_GAP_LEADING) + matchBonus[index]; |
| 166 | + } else if (index) { /* row > 0 && index > 0*/ |
| 167 | + score = fmax( |
| 168 | + prevM + matchBonus[index], |
| 169 | + |
| 170 | + /* consecutive match, doesn't stack with match_bonus */ |
| 171 | + prevD + SCORE_MATCH_CONSECUTIVE); |
| 172 | + } |
| 173 | + prevD = lastD[index]; |
| 174 | + prevM = lastM[index]; |
| 175 | + currD[index] = score; |
| 176 | + currM[index] = prevScore = fmax(score, prevScore + gapScore); |
| 177 | + } else { |
| 178 | + prevD = lastD[index]; |
| 179 | + prevM = lastM[index]; |
| 180 | + currD[index] = SCORE_MIN; |
| 181 | + currM[index] = prevScore = prevScore + gapScore; |
| 182 | + } |
| 183 | + } |
| 184 | +} |
| 185 | + |
| 186 | +double match(std::u16string_view needle, std::u16string_view haystack) { |
| 187 | + if (needle.empty()) |
| 188 | + return SCORE_MIN; |
| 189 | + |
| 190 | + if (haystack.size() > MATCH_MAX_LEN || needle.size() > haystack.size()) { |
| 191 | + return SCORE_MIN; |
| 192 | + } else if (haystack.size() == needle.size()){ |
| 193 | + return SCORE_MAX; |
| 194 | + } |
| 195 | + |
| 196 | + MatchStruct match = setupMatchStruct(needle, haystack); |
| 197 | + |
| 198 | + /* |
| 199 | + * D Stores the best score for this position ending with a match. |
| 200 | + * M Stores the best possible score at this position. |
| 201 | + */ |
| 202 | + std::array<double, MATCH_MAX_LEN> d{}; |
| 203 | + std::array<double, MATCH_MAX_LEN> m{}; |
| 204 | + |
| 205 | + for (size_t index = 0; index < needle.size(); index++) { |
| 206 | + matchRow(match, index, d, m, d, m); |
| 207 | + } |
| 208 | + |
| 209 | + return m[haystack.size() - 1]; |
| 210 | +} |
| 211 | + |
| 212 | +} |
| 213 | + |
| 214 | +namespace qs { |
| 215 | + |
| 216 | +QList<QObject*> FzyFinder::filter(const QString& needle, const QList<QObject*>& haystacks, const QString& name) { |
| 217 | + std::vector<ScoredResult> list; |
| 218 | + for (const auto& haystack : haystacks){ |
| 219 | + const auto h = haystack->property(name.toUtf8()).toString(); |
| 220 | + if (hasMatch(needle, h)) { |
| 221 | + list.emplace_back(match(needle, h), h, haystack); |
| 222 | + } |
| 223 | + } |
| 224 | + std::ranges::stable_sort(list, std::ranges::greater(), &ScoredResult::score); |
| 225 | + auto out = QList<QObject*>(static_cast<qsizetype>(list.size())); |
| 226 | + std::ranges::transform(list, out.begin(), [](const ScoredResult& result) -> QObject* { return result.obj; }); |
| 227 | + return out; |
| 228 | +} |
| 229 | + |
| 230 | +} |
0 commit comments