@@ -18,38 +18,20 @@ public function __construct()
18
18
19
19
/**
20
20
* Create best feature snippet which should contains maximum of query words.
21
- *
22
21
* Snippet can be generated from set of matches which will be combined by "...".
23
22
*/
24
23
public static function smartTruncate (string $ query , string $ haystack , int $ len = 60 ): string
25
24
{
26
25
$ words = implode ('| ' , explode (' ' , self ::convertQueryToRegexWords ($ query )));
27
26
28
- $ snippetGenerator = static function (int $ len ) use ($ words , $ haystack ): array {
29
- $ s = '\s\x00\-\/\:\-\@\[-`{-~ ' ;
30
- preg_match_all (
31
- '/(?<=[ ' . $ s . ']).{0, ' . $ len . '}(( ' . $ words . ').{0, ' . $ len . '})+(?=[ ' . $ s . '])?/uis ' ,
32
- $ haystack ,
33
- $ matches ,
34
- PREG_SET_ORDER ,
35
- );
36
-
37
- $ snippets = [];
38
- foreach ($ matches as $ match ) {
39
- $ snippets [] = htmlspecialchars ($ match [0 ], 0 , 'UTF-8 ' );
40
- }
41
-
42
- return $ snippets ;
43
- };
44
-
45
27
$ return = '' ;
46
28
for ($ i = 0 ; $ i <= $ len / 30 ; $ i ++) {
47
- $ attempt = implode (' ... ' , $ snippetGenerator ( 30 + $ i * 10 ));
48
- if (
49
- $ attempt !== ''
50
- && ( $ return === '' || mb_strlen ( $ attempt , ' UTF-8 ' ) >= $ len ) // first iteration or longer
51
- ) {
52
- $ return = $ attempt ;
29
+ $ snippet = implode (' ... ' , self :: generateSnippetParts ( $ words , $ haystack , len: 30 + $ i * 10 ));
30
+ if ($ return === '' ) { // first iteration
31
+ $ return = $ snippet ;
32
+ }
33
+ if ( mb_strlen ( $ snippet , ' UTF-8 ' ) >= $ len ) { // prefer longer snippet
34
+ $ return = $ snippet ;
53
35
break ;
54
36
}
55
37
}
@@ -316,4 +298,26 @@ public static function truncate(string $s, int $maxLen, string $append = "\u{202
316
298
317
299
return $ s ;
318
300
}
301
+
302
+
303
+ /**
304
+ * @return array<int, string>
305
+ */
306
+ private static function generateSnippetParts (string $ words , string $ haystack , int $ len ): array
307
+ {
308
+ $ s = '\s\x00\-\/\:\-\@\[-`{-~ ' ;
309
+ preg_match_all (
310
+ '/(?<=[ ' . $ s . ']).{0, ' . $ len . '}(( ' . $ words . ').{0, ' . $ len . '})+(?=[ ' . $ s . '])?/uis ' ,
311
+ $ haystack ,
312
+ $ matches ,
313
+ PREG_SET_ORDER ,
314
+ );
315
+
316
+ $ snippets = [];
317
+ foreach ($ matches as $ match ) {
318
+ $ snippets [] = htmlspecialchars ($ match [0 ], 0 , 'UTF-8 ' );
319
+ }
320
+
321
+ return $ snippets ;
322
+ }
319
323
}
0 commit comments