Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,15 @@
</PropertyGroup>
<ItemGroup>
<PackageVersion Include="BenchmarkDotNet" Version="0.13.12" />
<PackageVersion Include="FastBertTokenizer" Version="0.4.67" />
<PackageVersion Include="Microsoft.Bcl.Numerics" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.Configuration.Abstractions" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.Configuration.Binder" Version="8.0.1" />
<PackageVersion Include="Microsoft.AspNetCore.Components.Web" Version="6.0.0" />
<PackageVersion Include="Microsoft.AspNetCore.Components.WebAssembly" Version="8.0.0" />
<PackageVersion Include="Microsoft.AspNetCore.Components.WebAssembly.Server" Version="8.0.0" />
<PackageVersion Include="Azure.AI.OpenAI" Version="1.0.0-beta.11" />
<PackageVersion Include="Microsoft.ML.OnnxRuntime" Version="1.17.0" />
<PackageVersion Include="Microsoft.SemanticKernel.Abstractions" Version="1.6.2" />
<PackageVersion Include="Microsoft.SemanticKernel.Core" Version="1.6.2" />
<PackageVersion Include="Microsoft.SemanticKernel.Connectors.Onnx" Version="1.18.0-alpha" />
<PackageVersion Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.18.0-alpha" />
<PackageVersion Include="System.Numerics.Tensors" Version="8.0.0" />
<PackageVersion Include="System.Runtime.Caching" Version="8.0.0" />
<PackageVersion Include="System.Text.Json" Version="8.0.4" />
Expand Down
15 changes: 4 additions & 11 deletions SmartComponents.sln
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,13 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SmartComponents.LocalEmbedd
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "benchmark", "benchmark", "{04F66920-45C0-4410-89ED-F2B5E6223958}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SmartComponents.LocalEmbeddings.Benchmark", "benchmark\SmartComponents.LocalEmbeddings.Benchmark\SmartComponents.LocalEmbeddings.Benchmark.csproj", "{CB0537FA-53A2-4470-A5CA-423C7D09EFC5}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SmartComponents.LocalEmbeddings.Benchmark", "benchmark\SmartComponents.LocalEmbeddings.Benchmark\SmartComponents.LocalEmbeddings.Benchmark.csproj", "{CB0537FA-53A2-4470-A5CA-423C7D09EFC5}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TestBlazorServerNet6App", "test\testassets\TestBlazorServerNet6App\TestBlazorServerNet6App.csproj", "{F8C57083-620C-4D8F-8366-60E06593F720}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestBlazorServerNet6App", "test\testassets\TestBlazorServerNet6App\TestBlazorServerNet6App.csproj", "{F8C57083-620C-4D8F-8366-60E06593F720}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SmartComponents.E2ETest.BlazorNet6", "test\SmartComponents.E2ETest.BlazorNet6\SmartComponents.E2ETest.BlazorNet6.csproj", "{7A919A92-A121-420B-9E18-47A60DCDAA69}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SmartComponents.E2ETest.BlazorNet6", "test\SmartComponents.E2ETest.BlazorNet6\SmartComponents.E2ETest.BlazorNet6.csproj", "{7A919A92-A121-420B-9E18-47A60DCDAA69}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SmartComponents.LocalEmbeddings.SemanticKernel", "src\SmartComponents.LocalEmbeddings.SemanticKernel\SmartComponents.LocalEmbeddings.SemanticKernel.csproj", "{A31FD69E-2744-4800-AA7E-D734E8737715}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SmartComponents.LocalEmbeddings.SemanticKernel.Test", "test\SmartComponents.LocalEmbeddings.SemanticKernel.Test\SmartComponents.LocalEmbeddings.SemanticKernel.Test.csproj", "{23031658-179A-4425-82F2-29290DE4F3B2}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SmartComponents.LocalEmbeddings.SemanticKernel.Test", "test\SmartComponents.LocalEmbeddings.SemanticKernel.Test\SmartComponents.LocalEmbeddings.SemanticKernel.Test.csproj", "{23031658-179A-4425-82F2-29290DE4F3B2}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down Expand Up @@ -131,10 +129,6 @@ Global
{7A919A92-A121-420B-9E18-47A60DCDAA69}.Debug|Any CPU.Build.0 = Debug|Any CPU
{7A919A92-A121-420B-9E18-47A60DCDAA69}.Release|Any CPU.ActiveCfg = Release|Any CPU
{7A919A92-A121-420B-9E18-47A60DCDAA69}.Release|Any CPU.Build.0 = Release|Any CPU
{A31FD69E-2744-4800-AA7E-D734E8737715}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A31FD69E-2744-4800-AA7E-D734E8737715}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A31FD69E-2744-4800-AA7E-D734E8737715}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A31FD69E-2744-4800-AA7E-D734E8737715}.Release|Any CPU.Build.0 = Release|Any CPU
{23031658-179A-4425-82F2-29290DE4F3B2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{23031658-179A-4425-82F2-29290DE4F3B2}.Debug|Any CPU.Build.0 = Debug|Any CPU
{23031658-179A-4425-82F2-29290DE4F3B2}.Release|Any CPU.ActiveCfg = Release|Any CPU
Expand Down Expand Up @@ -163,7 +157,6 @@ Global
{CB0537FA-53A2-4470-A5CA-423C7D09EFC5} = {04F66920-45C0-4410-89ED-F2B5E6223958}
{F8C57083-620C-4D8F-8366-60E06593F720} = {7A830C0D-7E18-4674-A729-726085D9C0D1}
{7A919A92-A121-420B-9E18-47A60DCDAA69} = {03710CDB-ACD6-4712-95C8-B780EEEFAA29}
{A31FD69E-2744-4800-AA7E-D734E8737715} = {B1370349-29FA-49A1-A229-A31F7516A1FF}
{23031658-179A-4425-82F2-29290DE4F3B2} = {03710CDB-ACD6-4712-95C8-B780EEEFAA29}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
Expand Down
30 changes: 18 additions & 12 deletions docs/local-embeddings.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ While you can use an external AI service to compute embeddings, in many cases yo

With `SmartComponents.LocalEmbeddings`, you can compute embeddings in under a millisecond, and perform semantic search over hundreds of thousands of candidates in single-digit milliseconds. However, there are limits. To understand the performance characteristics and when you might benefit from moving to an external vector database, see *Performance* below.

## Relationship to Semantic Kernel

Originally, `SmartComponents.LocalEmbeddings` was a standalone library, but more recently has been changed to be a wrapper around Semantic Kernel's own ability to compute embeddings locally using ONNX runtime.

As such, `SmartComponents.LocalEmbeddings` is now equivalent to using Semantic Kernel's `BertOnnxTextEmbeddingGenerationService`, with the following additional features:

* **Acquiring the embeddings model automatically at build time**. If you use SK directly, you need to take care of downloading a suitable `.onnx` file for the embeddings model and making it available at runtime. `LocalEmbeddings` handles this for you - see below for details of how to customize it.
* **Helper methods for finding the closest match from a set of candidates**. If you use SK directly, you can use `TensorPrimitives.CosineSimilarity` and similar methods to compute similarity between two embeddings, or `SemanticTextMemory.SearchAsync` to find the closest match from a precomputed set of embeddings. In comparison, `LocalEmbeddings` provides `LocalEmbedder.FindClosest` (described below) as an alternative way to search through a set of candidates. Both approaches will perform the same, but are convenient in different circumstances. If you're using SK, it's best to stick with the SK APIs, but if you're not using SK, the `LocalEmbedder.FindClosest` helper may be easier to use.
* **Alternative representations for embeddings**. With Semantic Kernel, the convention is to represents embeddings as `Span<float>` or `ReadOnlyMemory<float>`, which are equivalent in space/accuracy to `EmbeddingF32`. Beyond this, `SmartComponents.LocalEmbeddings` offers other representations `EmbeddingI8` and `EmbeddingI1` (described below) which give you different space/accuracy tradeoffs. For example, `EmbeddingI1` takes up only 1/32 of the memory of `EmbeddingF32` or `Span<float>` and can be use in nearest-neighbour searches considerably faster, at the cost of reduced accuracy. This is described in detail below.

**Recommendation**: `SmartComponents.LocalEmbeddings` is now a set of samples of ways you can build further capabilities and conveniences on top of Semantic Kernel's `BertOnnxTextEmbeddingGenerationService`. If you find these useful, you can use them in your own applications. But if SK's APIs are sufficient for your use cases, you should simply use them directly without using `SmartComponents.LocalEmbeddings`.

## Getting started

Add the `SmartComponents.LocalEmbeddings` project from this repo to your solution and reference it from your app.
Expand Down Expand Up @@ -262,20 +274,14 @@ The overall goal for `SmartComponents.LocalEmbeddings` is to make semantic searc

## Usage with Semantic Kernel

If you want to use this ONNX-based local embeddings generator with [Semantic Kernel](https://learn.microsoft.com/en-us/semantic-kernel/overview/), then you can use the the `SmartComponents.LocalEmbeddings.SemanticKernel` library.
As mentioned in the introduction to this document, `SmartComponents.LocalEmbeddings` is simply a wrapper around Semantic Kernel's `BertOnnxTextEmbeddingGenerationService`, showing ways to add further conveniences and capabilities.

Add the `SmartComponents.LocalEmbeddings.SemanticKernel` project to your solution and reference it from your app. Then use `AddLocalTextEmbeddingGeneration` to add a local embeddings generator to your `Kernel`:
The `LocalEmbedder` type implements SK's `ITextEmbeddingGenerationService` interface, so it can be used directly with any Semantic Kernel APIs that needs to generate embeddings. For example, when constructing a `SemanticTextMemory`, you can pass an instance of `LocalEmbedder` as the `embeddingGenerator` constructor argument:

```cs
var builder = Kernel.CreateBuilder();
builder.AddLocalTextEmbeddingGeneration();
```

You can then generate embeddings in the usual way for Semantic Kernel:

```cs
var kernel = builder.Build();
var embeddingGenerator = kernel.Services.GetRequiredService<ITextEmbeddingGenerationService>();
var storage = new VolatileMemoryStore(); // Requires a reference to Microsoft.SemanticKernel.Plugins.Memory
using var embedder = new LocalEmbedder();
var semanticTextMemory = new SemanticTextMemory(storage, embedder);

var embedding = await embeddingGenerator.GenerateEmbeddingAsync("Some text here");
// ... and now use semanticTextMemory to store and search for items
```

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Loading
Loading