Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
using Build5Nines.SharpVector.Embeddings;
using OpenAI.Embeddings;
using System.Collections.Generic;
using System.Linq;

namespace Build5Nines.SharpVector.OpenAI.Embeddings;

public class OpenAIEmbeddingsGenerator : IEmbeddingsGenerator
public class OpenAIEmbeddingsGenerator : IEmbeddingsGenerator //IBatchEmbeddingsGenerator
{
protected EmbeddingClient EmbeddingClient { get; private set; }

Expand All @@ -18,4 +20,29 @@ public async Task<float[]> GenerateEmbeddingsAsync(string text)
var vector = embedding.ToFloats();
return vector.ToArray();
}

/// <summary>
/// Generates embeddings for a batch of input texts using the OpenAI embeddings client.
/// This leverages the API's multi-input batching for improved throughput and reduced overhead.
/// </summary>
/// <param name="texts">Collection of non-empty texts to embed.</param>
/// <returns>A list of float vectors aligned to the input order.</returns>
public async Task<IReadOnlyList<float[]>> GenerateEmbeddingsAsync(IEnumerable<string> texts)
{
if (texts is null) throw new ArgumentNullException(nameof(texts));

var inputs = texts.ToList();
if (inputs.Count == 0)
{
return Array.Empty<float[]>();
}

// Call the batch embeddings API once for all inputs.
var batchResult = await EmbeddingClient.GenerateEmbeddingsAsync(inputs);

// Map the embeddings to float arrays while preserving order.
var vectors = batchResult.Value.Select(e => e.ToFloats().ToArray()).ToList();

return vectors;
}
}
2 changes: 1 addition & 1 deletion src/Build5Nines.SharpVector/Build5Nines.SharpVector.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<PackageId>Build5Nines.SharpVector</PackageId>
<PackageProjectUrl>https://sharpvector.build5nines.com</PackageProjectUrl>
<RepositoryUrl>https://github.com/Build5Nines/SharpVector</RepositoryUrl>
<Version>2.1.2</Version>
<Version>2.1.3</Version>
<Description>Lightweight In-memory Vector Database to embed in any .NET Applications</Description>
<Copyright>Copyright (c) 2025 Build5Nines LLC</Copyright>
<PackageReadmeFile>README.md</PackageReadmeFile>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
namespace Build5Nines.SharpVector.Embeddings;

/// <summary>
/// Optional capability for embeddings generators to support batch embedding of multiple texts.
/// Implementations can leverage provider APIs that accept multi-input requests for better performance.
/// </summary>
public interface IBatchEmbeddingsGenerator : IEmbeddingsGenerator
{
/// <summary>
/// Generates embeddings for multiple input texts in a single call when supported.
/// </summary>
/// <param name="texts">Collection of texts to embed. Order should be preserved in output.</param>
/// <returns>A read-only list of embeddings vectors corresponding to the input order.</returns>
Task<IReadOnlyList<float[]>> GenerateEmbeddingsAsync(IEnumerable<string> texts);
}
7 changes: 7 additions & 0 deletions src/Build5Nines.SharpVector/IVectorDatabase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ public interface IVectorDatabase<TId, TMetadata, TDocument>
/// <returns></returns>
Task<TId> AddTextAsync(TDocument text, TMetadata? metadata = default(TMetadata));

/// <summary>
/// Adds multiple texts with Metadata to the database and returns their IDs
/// </summary>
/// <param name="items">The texts and metadata to add in batch.</param>
/// <returns>The IDs of the added texts.</returns>
Task<IReadOnlyList<TId>> AddTextsAsync(IEnumerable<(TDocument text, TMetadata? metadata)> items);

/// <summary>
/// Get all the Ids for each text the database.
/// </summary>
Expand Down
58 changes: 58 additions & 0 deletions src/Build5Nines.SharpVector/MemoryVectorDatabaseBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,22 @@ public IEnumerable<TId> GetIds()
return id;
}

public async Task<IReadOnlyList<TId>> AddTextsAsync(IEnumerable<(TVocabularyKey text, TMetadata? metadata)> items)
{
if (items is null) throw new ArgumentNullException(nameof(items));

var ids = new List<TId>();

foreach(var item in items)
{
TId id = await AddTextAsync(item.text, item.metadata);
ids.Add(id);
}

return ids;
}


/// <summary>
/// Retrieves a text and metadata by its ID
/// </summary>
Expand Down Expand Up @@ -469,6 +485,48 @@ public IEnumerable<TId> GetIds()
return id;
}

/// <summary>
/// Adds multiple texts with optional metadata to the database efficiently.
/// If the embeddings generator supports batching, this will generate vectors in a single multi-input call.
/// </summary>
/// <param name="items">Collection of (text, metadata) tuples to add.</param>
/// <returns>List of generated IDs in the same order as inputs.</returns>
public async Task<IReadOnlyList<TId>> AddTextsAsync(IEnumerable<(string text, TMetadata? metadata)> items)
{
if (items is null) throw new ArgumentNullException(nameof(items));

var list = items.ToList();
if (list.Count == 0) return Array.Empty<TId>();

// Try batch embeddings if supported
float[][] vectors;
if (EmbeddingsGenerator is IBatchEmbeddingsGenerator batchGen)
{
var batch = await batchGen.GenerateEmbeddingsAsync(list.Select(i => i.text));
vectors = batch.Select(v => v.ToArray()).ToArray();
}
else
{
// Fallback to per-item embedding
vectors = new float[list.Count][];
for (int i = 0; i < list.Count; i++)
{
vectors[i] = await EmbeddingsGenerator.GenerateEmbeddingsAsync(list[i].text);
}
}

// Store items and produce IDs
var ids = new List<TId>(list.Count);
for (int i = 0; i < list.Count; i++)
{
TId id = _idGenerator.NewId();
ids.Add(id);
await VectorStore.SetAsync(id, new VectorTextItem<TMetadata>(list[i].text, list[i].metadata, vectors[i]));
}

return ids;
}

/// <summary>
/// Retrieves a text and metadata by its ID
/// </summary>
Expand Down
1 change: 1 addition & 0 deletions src/OpenAIConsoleTest/OpenAIConsoleTest.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
</ItemGroup>

<ItemGroup>
<!-- <ProjectReference Include="..\Build5Nines.SharpVector\Build5Nines.SharpVector.csproj" /> -->
<ProjectReference Include="..\Build5Nines.SharpVector.OpenAI\Build5Nines.SharpVector.OpenAI.csproj" />
</ItemGroup>

Expand Down
74 changes: 74 additions & 0 deletions src/SharpVectorTest/BatchAddTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
namespace SharpVectorTest;

using System.Linq;
using System.Threading.Tasks;
using Build5Nines.SharpVector;
using Build5Nines.SharpVector.Embeddings;
using Build5Nines.SharpVector.Id;
using Build5Nines.SharpVector.VectorCompare;
using Build5Nines.SharpVector.VectorStore;

[TestClass]
public class BatchAddTests
{
[TestMethod]
public async Task AddTextsAsync_UsesBatchEmbeddings_WhenAvailable()
{
var db = new BatchMockMemoryVectorDatabase();

var inputs = new (string text, string? metadata)[]
{
("one", "m1"),
("two", "m2"),
("three", "m3")
};

var ids = await db.AddTextsAsync(inputs);

Assert.AreEqual(3, ids.Count);

var results = db.Search("one");
Assert.AreEqual(3, results.Texts.Count());

// Ensure vectors were assigned from batch generator (length = 5 per mock)
foreach (var item in db)
{
Assert.AreEqual(5, item.Vector.Length);
}
}
}

public class BatchMockMemoryVectorDatabase
: MemoryVectorDatabaseBase<
int,
string,
MemoryDictionaryVectorStore<int, string>,
IntIdGenerator,
CosineSimilarityVectorComparer
>
{
public BatchMockMemoryVectorDatabase()
: base(
new MockBatchEmbeddingsGenerator(),
new MemoryDictionaryVectorStore<int, string>()
)
{ }
}

public class MockBatchEmbeddingsGenerator : IEmbeddingsGenerator, IBatchEmbeddingsGenerator
{
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously
public async Task<float[]> GenerateEmbeddingsAsync(string text)
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
{
return new float[] { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f };
}

#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously
public async Task<IReadOnlyList<float[]>> GenerateEmbeddingsAsync(IEnumerable<string> texts)
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
{
// Return a different first value to ensure we can recognize batched path if needed
return texts.Select((t, idx) => new float[] { 0.9f, 0.2f, 0.3f, 0.4f, 0.5f }).ToList();
}
}
24 changes: 24 additions & 0 deletions src/SharpVectorTest/VectorDatabaseTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,30 @@ public void BasicMemoryVectorDatabase_05()
Assert.AreEqual("metadata2", results.Texts.First().Metadata);
}

[TestMethod]
public void BasicMemoryVectorDatabase_05_Batch()
{
var vdb = new BasicMemoryVectorDatabase();

// // Load Vector Database with some sample text
var inputs = new (string text, string? metadata)[]
{
("The 👑 King", "metadata1"),
("It's 🔥 Fire.", "metadata2"),
("No emoji", "metadata3")
};
vdb.AddTextsAsync(inputs).Wait();

var results = vdb.Search("🔥", pageCount: 1);

Assert.AreEqual(1, results.Texts.Count());
Assert.AreEqual(0.5773503184318542, results.Texts.First().Similarity);
Assert.AreEqual("It's 🔥 Fire.", results.Texts.First().Text);
Assert.AreEqual(2, results.Texts.First().Id);
Assert.AreEqual("metadata2", results.Texts.First().Metadata);
}


[TestMethod]
public void BasicMemoryVectorDatabase_06()
{
Expand Down
Loading