Skip to content

Commit 2e0ab39

Browse files
committed
Reimplement SearchByPrefix to include distance in results
1 parent 261e4a8 commit 2e0ab39

11 files changed

+189
-436
lines changed

src/Levenshtypo.Tests/LevenshtrieSearchByPrefixTests.cs

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,51 @@ public void StackOverflow_Scenario2()
114114
Test(levenshtrie, "a", 0, entries);
115115
}
116116

117-
private static void Test(Levenshtrie<string> t, string query, int distance, IEnumerable<string> expectedResults)
117+
private static void Test(Levenshtrie<string> t, string query, int distance, IEnumerable<string> expected)
118118
{
119+
var list = expected.ToList();
120+
if (list.Count > 10_000)
121+
{
122+
// Test will be too slow for Shouldly to compare with ignoreCase.
123+
// N.B. to be clear: Levenshtypo can easily handle such numbers
124+
return;
125+
}
126+
127+
var expectedResults = list.Select(e => new LevenshtrieSearchResult<string>(CalculatePrefixDistance(query, e), e));
128+
119129
t.SearchByPrefix(query, distance)
120-
.OrderBy(x => x)
121-
.ShouldBe(expectedResults.OrderBy(x => x));
130+
.ShouldBe(expectedResults, ignoreOrder: true, comparer: new LevenshtrieSearchResultEqualityComparer<string>());
122131

123132
t.EnumerateSearchByPrefix(query, distance)
124-
.OrderBy(x => x)
125-
.ShouldBe(expectedResults.OrderBy(x => x));
133+
.ShouldBe(expectedResults, ignoreOrder: true, comparer: new LevenshtrieSearchResultEqualityComparer<string>());
134+
}
135+
136+
private static int CalculatePrefixDistance(string query, string result)
137+
{
138+
int minDistance = int.MaxValue;
139+
140+
var automaton = LevenshtomatonFactory.Instance.Construct(query, maxEditDistance: 5);
141+
142+
var state = automaton.Start();
143+
144+
if (state.IsFinal && state.Distance < minDistance)
145+
{
146+
minDistance = state.Distance;
147+
}
148+
149+
foreach (var r in result.EnumerateRunes())
150+
{
151+
if (!state.MoveNext(r, out state))
152+
{
153+
break;
154+
}
155+
156+
if (state.IsFinal && state.Distance < minDistance)
157+
{
158+
minDistance = state.Distance;
159+
}
160+
}
161+
162+
return minDistance;
126163
}
127164
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
using System.Diagnostics.CodeAnalysis;
2+
3+
namespace Levenshtypo.Tests;
4+
5+
internal class LevenshtrieSearchResultEqualityComparer<T> : IEqualityComparer<LevenshtrieSearchResult<T>>
6+
{
7+
public bool Equals(LevenshtrieSearchResult<T> x, LevenshtrieSearchResult<T> y)
8+
=> x.Distance == y.Distance
9+
&& (x.Result?.Equals(y.Result) ?? (y.Result is null));
10+
11+
public int GetHashCode([DisallowNull] LevenshtrieSearchResult<T> obj)
12+
{
13+
var hashCode = new HashCode();
14+
hashCode.Add(obj.Distance);
15+
hashCode.Add(obj.Result);
16+
return hashCode.ToHashCode();
17+
}
18+
}

src/Levenshtypo.Tests/LevenshtrieSearchTests.cs

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
using System.Diagnostics.CodeAnalysis;
2-
using Shouldly;
1+
using Shouldly;
32

43
namespace Levenshtypo.Tests;
54

@@ -112,6 +111,7 @@ public void MultiMap()
112111
.ShouldBe(["1", "2", "3", "5", "6", "7"], ignoreOrder: true, comparer: StringComparer.OrdinalIgnoreCase);
113112

114113
t.SearchByPrefix("00", maxEditDistance: 0)
114+
.Select(r => r.Result)
115115
.ToArray()
116116
.ShouldBe(["1", "2", "3", "4", "5", "6", "7"], ignoreOrder: true, comparer: StringComparer.OrdinalIgnoreCase);
117117

@@ -127,24 +127,9 @@ private static void Test(Levenshtrie<string> t, string query, int distance, IEnu
127127
var expectedResults = expected.Select(e => new LevenshtrieSearchResult<string>(e.distance, e.word));
128128

129129
t.Search(query, distance)
130-
.ShouldBe(expectedResults, ignoreOrder: true, comparer: new LevenshtrieSearchResultComparer<string>());
130+
.ShouldBe(expectedResults, ignoreOrder: true, comparer: new LevenshtrieSearchResultEqualityComparer<string>());
131131

132132
t.EnumerateSearch(query, distance)
133-
.ShouldBe(expectedResults, ignoreOrder: true, comparer: new LevenshtrieSearchResultComparer<string>());
134-
}
135-
136-
private class LevenshtrieSearchResultComparer<T> : IEqualityComparer<LevenshtrieSearchResult<T>>
137-
{
138-
public bool Equals(LevenshtrieSearchResult<T> x, LevenshtrieSearchResult<T> y)
139-
=> x.Distance == y.Distance
140-
&& (x.Result?.Equals(y.Result) ?? (y.Result is null));
141-
142-
public int GetHashCode([DisallowNull] LevenshtrieSearchResult<T> obj)
143-
{
144-
var hashCode = new HashCode();
145-
hashCode.Add(obj.Distance);
146-
hashCode.Add(obj.Result);
147-
return hashCode.ToHashCode();
148-
}
133+
.ShouldBe(expectedResults, ignoreOrder: true, comparer: new LevenshtrieSearchResultEqualityComparer<string>());
149134
}
150135
}

src/Levenshtypo/ILevenshtrie.cs

Lines changed: 49 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,6 @@ public interface ILevenshtrie<T>
2525
LevenshtrieSearchResult<T>[] Search<TSearchState>(TSearchState searcher)
2626
where TSearchState : ILevenshtomatonExecutionState<TSearchState>;
2727

28-
/// <summary>
29-
/// Searches for values whose keys begin with a prefix accepted by the search state,
30-
/// but without considering further edits beyond the matched prefix.
31-
/// </summary>
32-
/// <typeparam name="TSearchState">The automaton state used to guide the prefix traversal.</typeparam>
33-
/// <param name="searcher">The automaton execution state.</param>
34-
/// <returns>An array of values with matching prefixes.</returns>
35-
T[] SearchByPrefix<TSearchState>(TSearchState searcher)
36-
where TSearchState : ILevenshtomatonExecutionState<TSearchState>;
37-
3828
/// <summary>
3929
/// Lazily searches for values whose keys are accepted by the specified automaton execution state.
4030
/// Results are returned in arbitrary order and evaluated on-demand.
@@ -48,20 +38,6 @@ T[] SearchByPrefix<TSearchState>(TSearchState searcher)
4838
/// </remarks>
4939
IEnumerable<LevenshtrieSearchResult<T>> EnumerateSearch<TSearchState>(TSearchState searcher)
5040
where TSearchState : ILevenshtomatonExecutionState<TSearchState>;
51-
52-
/// <summary>
53-
/// Lazily searches for values whose keys begin with a prefix accepted by the automaton execution state.
54-
/// Results are returned in arbitrary order and evaluated on-demand.
55-
/// </summary>
56-
/// <typeparam name="TSearchState">The automaton state used to guide the traversal.</typeparam>
57-
/// <param name="searcher">The automaton execution state.</param>
58-
/// <returns>An enumerable of values with matching prefixes.</returns>
59-
/// <remarks>
60-
/// This method avoids allocating a full result array up front, and can be more efficient
61-
/// when only a subset of results are consumed. However, it may be slower when all results are needed.
62-
/// </remarks>
63-
IEnumerable<T> EnumerateSearchByPrefix<TSearchState>(TSearchState searcher)
64-
where TSearchState : ILevenshtomatonExecutionState<TSearchState>;
6541
}
6642

6743
public static class LevenshtrieExtensions
@@ -113,6 +89,17 @@ public static LevenshtrieSearchResult<T>[] Search<T>(this ILevenshtrie<T> @this,
11389
public static LevenshtrieSearchResult<T>[] Search<T>(this ILevenshtrie<T> @this, LevenshtomatonExecutionState searcher)
11490
=> @this.Search(searcher);
11591

92+
/// <summary>
93+
/// Searches for values whose keys begin with a prefix accepted by the search state,
94+
/// but without considering further edits beyond the matched prefix.
95+
/// </summary>
96+
/// <typeparam name="TSearchState">The automaton state used to guide the prefix traversal.</typeparam>
97+
/// <param name="searcher">The automaton execution state.</param>
98+
/// <returns>An array of matched values and their corresponding edit distances.</returns>
99+
public static LevenshtrieSearchResult<T>[] SearchByPrefix<T, TSearchState>(this ILevenshtrie<T> @this, TSearchState searcher)
100+
where TSearchState : ILevenshtomatonExecutionState<TSearchState>
101+
=> @this.Search(PrefixTrackingLevenshtomatonExecutionState<TSearchState>.Start(searcher));
102+
116103
/// <summary>
117104
/// Searches for values whose keys begin with the specified prefix text,
118105
/// allowing up to the given edit distance. Results are returned in arbitrary order.
@@ -123,11 +110,12 @@ public static LevenshtrieSearchResult<T>[] Search<T>(this ILevenshtrie<T> @this,
123110
/// <param name="maxEditDistance">The maximum allowed edit distance.</param>
124111
/// <param name="metric">The edit distance metric to use.</param>
125112
/// <returns>An array of values whose keys begin with the given prefix.</returns>
126-
public static T[] SearchByPrefix<T>(this ILevenshtrie<T> @this, string text, int maxEditDistance, LevenshtypoMetric metric = LevenshtypoMetric.Levenshtein)
113+
public static LevenshtrieSearchResult<T>[] SearchByPrefix<T>(this ILevenshtrie<T> @this, string text, int maxEditDistance, LevenshtypoMetric metric = LevenshtypoMetric.Levenshtein)
127114
{
128115
var automaton = LevenshtomatonFactory.Instance.Construct(text, maxEditDistance, ignoreCase: @this.IgnoreCase, metric: metric);
129116
return @this.SearchByPrefix(automaton);
130117
}
118+
131119
/// <summary>
132120
/// Searches for values whose keys are accepted by the given automaton, restricted to prefix matches.
133121
/// Results are returned in arbitrary order.
@@ -139,15 +127,15 @@ public static T[] SearchByPrefix<T>(this ILevenshtrie<T> @this, string text, int
139127
/// <exception cref="ArgumentException">
140128
/// Thrown if the automaton's case sensitivity does not match the trie's configuration.
141129
/// </exception>
142-
public static T[] SearchByPrefix<T>(this ILevenshtrie<T> @this, Levenshtomaton automaton)
130+
public static LevenshtrieSearchResult<T>[] SearchByPrefix<T>(this ILevenshtrie<T> @this, Levenshtomaton automaton)
143131
{
144132
if (automaton.IgnoreCase != @this.IgnoreCase)
145133
{
146134
throw new ArgumentException("Case sensitivity of automaton does not match.");
147135
}
148136

149-
var executor = @this as ILevenshtomatonExecutor<SearchByPrefixWrapper<T[]>> ?? new TrieExecutor<T>(@this);
150-
return automaton.Execute(executor).Wrapped;
137+
var executor = new TriePrefixExecutor<T>(@this);
138+
return automaton.Execute<LevenshtrieSearchResult<T>[]>(executor);
151139
}
152140

153141
/// <summary>
@@ -158,8 +146,8 @@ public static T[] SearchByPrefix<T>(this ILevenshtrie<T> @this, Levenshtomaton a
158146
/// <param name="this">The trie to search.</param>
159147
/// <param name="searcher">A boxed automaton execution state.</param>
160148
/// <returns>Values whose keys match the prefix condition.</returns>
161-
public static T[] SearchByPrefix<T>(this ILevenshtrie<T> @this, LevenshtomatonExecutionState searcher)
162-
=> @this.SearchByPrefix<LevenshtomatonExecutionState>(searcher);
149+
public static LevenshtrieSearchResult<T>[] SearchByPrefix<T>(this ILevenshtrie<T> @this, LevenshtomatonExecutionState searcher)
150+
=> @this.SearchByPrefix<T, LevenshtomatonExecutionState>(searcher);
163151

164152
/// <summary>
165153
/// Lazily searches for approximate matches using a Levenshtein automaton, evaluated on-demand.
@@ -223,7 +211,7 @@ public static IEnumerable<LevenshtrieSearchResult<T>> EnumerateSearch<T>(this IL
223211
/// Lazily evaluates prefix matches. Uses less memory than eager prefix search,
224212
/// and is efficient when only a subset of results is needed.
225213
/// </remarks>
226-
public static IEnumerable<T> EnumerateSearchByPrefix<T>(this ILevenshtrie<T> @this, string text, int maxEditDistance, LevenshtypoMetric metric = LevenshtypoMetric.Levenshtein)
214+
public static IEnumerable<LevenshtrieSearchResult<T>> EnumerateSearchByPrefix<T>(this ILevenshtrie<T> @this, string text, int maxEditDistance, LevenshtypoMetric metric = LevenshtypoMetric.Levenshtein)
227215
{
228216
var automaton = LevenshtomatonFactory.Instance.Construct(text, maxEditDistance, ignoreCase: @this.IgnoreCase, metric: metric);
229217
return @this.EnumerateSearchByPrefix(automaton);
@@ -239,15 +227,15 @@ public static IEnumerable<T> EnumerateSearchByPrefix<T>(this ILevenshtrie<T> @th
239227
/// <exception cref="ArgumentException">
240228
/// Thrown if the automaton's case sensitivity does not match the trie's configuration.
241229
/// </exception>
242-
public static IEnumerable<T> EnumerateSearchByPrefix<T>(this ILevenshtrie<T> @this, Levenshtomaton automaton)
230+
public static IEnumerable<LevenshtrieSearchResult<T>> EnumerateSearchByPrefix<T>(this ILevenshtrie<T> @this, Levenshtomaton automaton)
243231
{
244232
if (automaton.IgnoreCase != @this.IgnoreCase)
245233
{
246234
throw new ArgumentException("Case sensitivity of automaton does not match.");
247235
}
248236

249-
var executor = @this as ILevenshtomatonExecutor<SearchByPrefixWrapper<IEnumerable<T>>> ?? new TrieExecutor<T>(@this);
250-
return automaton.Execute(executor).Wrapped;
237+
var executor = new TriePrefixExecutor<T>(@this);
238+
return automaton.Execute<IEnumerable<LevenshtrieSearchResult<T>>>(executor);
251239
}
252240

253241
/// <summary>
@@ -257,25 +245,42 @@ public static IEnumerable<T> EnumerateSearchByPrefix<T>(this ILevenshtrie<T> @th
257245
/// <param name="this">The trie to search.</param>
258246
/// <param name="searcher">A boxed automaton execution state.</param>
259247
/// <returns>An enumerable of values with matching prefixes.</returns>
260-
public static IEnumerable<T> EnumerateSearchByPrefix<T>(this ILevenshtrie<T> @this, LevenshtomatonExecutionState searcher)
261-
=> @this.EnumerateSearchByPrefix<LevenshtomatonExecutionState>(searcher);
248+
public static IEnumerable<LevenshtrieSearchResult<T>> EnumerateSearchByPrefix<T>(this ILevenshtrie<T> @this, LevenshtomatonExecutionState searcher)
249+
=> @this.EnumerateSearchByPrefix<T, LevenshtomatonExecutionState>(searcher);
250+
251+
/// <summary>
252+
/// Lazily searches for values whose keys begin with a prefix accepted by the automaton execution state.
253+
/// Results are returned in arbitrary order and evaluated on-demand.
254+
/// </summary>
255+
/// <typeparam name="TSearchState">The automaton state used to guide the traversal.</typeparam>
256+
/// <param name="searcher">The automaton execution state.</param>
257+
/// <returns>An array of matched values and their corresponding edit distances.</returns>
258+
/// <remarks>
259+
/// This method avoids allocating a full result array up front, and can be more efficient
260+
/// when only a subset of results are consumed. However, it may be slower when all results are needed.
261+
/// </remarks>
262+
public static IEnumerable<LevenshtrieSearchResult<T>> EnumerateSearchByPrefix<T, TSearchState>(this ILevenshtrie<T> @this, TSearchState searcher)
263+
where TSearchState : ILevenshtomatonExecutionState<TSearchState>
264+
=> @this.EnumerateSearch(PrefixTrackingLevenshtomatonExecutionState<TSearchState>.Start(searcher));
262265

263266
private class TrieExecutor<T>(ILevenshtrie<T> trie) :
264267
ILevenshtomatonExecutor<LevenshtrieSearchResult<T>[]>,
265-
ILevenshtomatonExecutor<IEnumerable<LevenshtrieSearchResult<T>>>,
266-
ILevenshtomatonExecutor<SearchByPrefixWrapper<T[]>>,
267-
ILevenshtomatonExecutor<SearchByPrefixWrapper<IEnumerable<T>>>
268+
ILevenshtomatonExecutor<IEnumerable<LevenshtrieSearchResult<T>>>
268269
{
269270
public LevenshtrieSearchResult<T>[] ExecuteAutomaton<TState>(TState executionState) where TState : struct, ILevenshtomatonExecutionState<TState>
270271
=> trie.Search(executionState);
271272

272273
IEnumerable<LevenshtrieSearchResult<T>> ILevenshtomatonExecutor<IEnumerable<LevenshtrieSearchResult<T>>>.ExecuteAutomaton<TState>(TState executionState)
273274
=> trie.EnumerateSearch(executionState);
275+
}
276+
private class TriePrefixExecutor<T>(ILevenshtrie<T> trie) :
277+
ILevenshtomatonExecutor<LevenshtrieSearchResult<T>[]>,
278+
ILevenshtomatonExecutor<IEnumerable<LevenshtrieSearchResult<T>>>
279+
{
280+
public LevenshtrieSearchResult<T>[] ExecuteAutomaton<TState>(TState executionState) where TState : struct, ILevenshtomatonExecutionState<TState>
281+
=> trie.SearchByPrefix(executionState);
274282

275-
SearchByPrefixWrapper<T[]> ILevenshtomatonExecutor<SearchByPrefixWrapper<T[]>>.ExecuteAutomaton<TState>(TState executionState)
276-
=> new(trie.SearchByPrefix(executionState));
277-
278-
SearchByPrefixWrapper<IEnumerable<T>> ILevenshtomatonExecutor<SearchByPrefixWrapper<IEnumerable<T>>>.ExecuteAutomaton<TState>(TState executionState)
279-
=> new(trie.EnumerateSearchByPrefix(executionState));
283+
IEnumerable<LevenshtrieSearchResult<T>> ILevenshtomatonExecutor<IEnumerable<LevenshtrieSearchResult<T>>>.ExecuteAutomaton<TState>(TState executionState)
284+
=> trie.EnumerateSearchByPrefix(executionState);
280285
}
281286
}

0 commit comments

Comments
 (0)