Skip to content

Commit aa61c56

Browse files
committed
Helpers: Use better algorithm for split snippet.
1 parent 7e3efd2 commit aa61c56

File tree

1 file changed

+28
-24
lines changed

1 file changed

+28
-24
lines changed

src/Helpers.php

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,38 +18,20 @@ public function __construct()
1818

1919
/**
2020
* Create best feature snippet which should contains maximum of query words.
21-
*
2221
* Snippet can be generated from set of matches which will be combined by "...".
2322
*/
2423
public static function smartTruncate(string $query, string $haystack, int $len = 60): string
2524
{
2625
$words = implode('|', explode(' ', self::convertQueryToRegexWords($query)));
2726

28-
$snippetGenerator = static function (int $len) use ($words, $haystack): array {
29-
$s = '\s\x00\-\/\:\-\@\[-`{-~';
30-
preg_match_all(
31-
'/(?<=[' . $s . ']).{0,' . $len . '}((' . $words . ').{0,' . $len . '})+(?=[' . $s . '])?/uis',
32-
$haystack,
33-
$matches,
34-
PREG_SET_ORDER,
35-
);
36-
37-
$snippets = [];
38-
foreach ($matches as $match) {
39-
$snippets[] = htmlspecialchars($match[0], 0, 'UTF-8');
40-
}
41-
42-
return $snippets;
43-
};
44-
4527
$return = '';
4628
for ($i = 0; $i <= $len / 30; $i++) {
47-
$attempt = implode(' ... ', $snippetGenerator(30 + $i * 10));
48-
if (
49-
$attempt !== ''
50-
&& ($return === '' || mb_strlen($attempt, 'UTF-8') >= $len) // first iteration or longer
51-
) {
52-
$return = $attempt;
29+
$snippet = implode(' ... ', self::generateSnippetParts($words, $haystack, len: 30 + $i * 10));
30+
if ($return === '') { // first iteration
31+
$return = $snippet;
32+
}
33+
if (mb_strlen($snippet, 'UTF-8') >= $len) { // prefer longer snippet
34+
$return = $snippet;
5335
break;
5436
}
5537
}
@@ -316,4 +298,26 @@ public static function truncate(string $s, int $maxLen, string $append = "\u{202
316298

317299
return $s;
318300
}
301+
302+
303+
/**
304+
* @return array<int, string>
305+
*/
306+
private static function generateSnippetParts(string $words, string $haystack, int $len): array
307+
{
308+
$s = '\s\x00\-\/\:\-\@\[-`{-~';
309+
preg_match_all(
310+
'/(?<=[' . $s . ']).{0,' . $len . '}((' . $words . ').{0,' . $len . '})+(?=[' . $s . '])?/uis',
311+
$haystack,
312+
$matches,
313+
PREG_SET_ORDER,
314+
);
315+
316+
$snippets = [];
317+
foreach ($matches as $match) {
318+
$snippets[] = htmlspecialchars($match[0], 0, 'UTF-8');
319+
}
320+
321+
return $snippets;
322+
}
319323
}

0 commit comments

Comments
 (0)