diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index eaa9d61..9f38f77 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -1,17 +1,9 @@ -name: Build and Release +name: Build Release on: push: branches: - main - - dev - paths-ignore: - - 'docs/**' - - mkdocs.yml - pull_request: - branches: - - main - - dev paths-ignore: - 'docs/**' - mkdocs.yml @@ -39,6 +31,15 @@ jobs: - name: Build run: dotnet build --configuration Release --no-restore + - name: Run tests with code coverage + run: dotnet test --no-build --verbosity normal --results-directory "./TestResults/Coverage/" --collect:"XPlat Code Coverage" + + - name: Upload test results artifact + uses: actions/upload-artifact@v4 + with: + name: test-results + path: '**/TestResults/**' + - name: Performance Test run: dotnet run --project SharpVectorPerformance --configuration Release diff --git a/.github/workflows/dotnet-tests.yml b/.github/workflows/dotnet-tests.yml index e56f1e4..8a7498e 100644 --- a/.github/workflows/dotnet-tests.yml +++ b/.github/workflows/dotnet-tests.yml @@ -1,13 +1,6 @@ name: .NET Tests on: - push: - branches: - - main - - dev - paths-ignore: - - 'docs/**' - - 'mkdocs.yml' pull_request: branches: - main @@ -18,7 +11,7 @@ on: workflow_dispatch: jobs: - build: + tests: runs-on: ubuntu-latest defaults: run: @@ -40,10 +33,143 @@ jobs: run: dotnet build --no-restore - name: Run tests with code coverage - run: dotnet test --no-build --verbosity normal --results-directory "./TestResults/Coverage/" --collect:"XPlat Code Coverage" + run: dotnet test --no-build --verbosity normal --results-directory "./TestResults/Coverage/" --logger "trx;LogFileName=test_results.trx" --collect:"XPlat Code Coverage" + + # - name: Publish test results + # uses: dorny/test-reporter@v1 + # with: + # name: tests + # path: src/TestResults/Coverage/test_results.trx + # reporter: dotnet-trx + # fail-on-error: true - name: Upload test results artifact uses: actions/upload-artifact@v4 with: name: test-results path: '**/TestResults/**' + + + + + - name: Install xmlstarlet + run: sudo apt-get update && sudo apt-get install -y xmlstarlet + + - name: Summarize test results from .trx + run: | + TRX_FILE="TestResults/Coverage/test_results.trx" + + # Register the namespace (ns) and query using that + PASSED=$(xmlstarlet sel -N ns="http://microsoft.com/schemas/VisualStudio/TeamTest/2010" -t -v "count(//ns:UnitTestResult[@outcome='Passed'])" "$TRX_FILE") + FAILED=$(xmlstarlet sel -N ns="http://microsoft.com/schemas/VisualStudio/TeamTest/2010" -t -v "count(//ns:UnitTestResult[@outcome='Failed'])" "$TRX_FILE") + SKIPPED=$(xmlstarlet sel -N ns="http://microsoft.com/schemas/VisualStudio/TeamTest/2010" -t -v "count(//ns:UnitTestResult[@outcome='NotExectured'])" "$TRX_FILE") + TOTAL=$(xmlstarlet sel -N ns="http://microsoft.com/schemas/VisualStudio/TeamTest/2010" -t -v "count(//ns:UnitTestResult)" "$TRX_FILE") + + echo "## πŸ§ͺ Test Results Summary" >> $GITHUB_STEP_SUMMARY + + # Write message based on test outcome + if [ "$FAILED" -eq 0 ]; then + echo "βœ… All tests passed!" >> $GITHUB_STEP_SUMMARY + else + echo "❌ $FAILED test(s) failed!" >> $GITHUB_STEP_SUMMARY + fi + + BAR_LENGTH=10 + + make_bar() { + COUNT=$1 + TOTAL=$2 + CHAR=$3 + FILLED=$(( (COUNT * BAR_LENGTH + TOTAL / 2) / TOTAL )) + EMPTY=$(( BAR_LENGTH - FILLED )) + + BAR="" + for ((i=0; i> $GITHUB_STEP_SUMMARY + + + + + - name: Install ReportGenerator + run: dotnet tool install -g dotnet-reportgenerator-globaltool + + - name: Generate Markdown report + run: | + reportgenerator \ + -reports:./TestResults/Coverage/**/coverage.cobertura.xml \ + -targetdir:./coveragereport \ + -reporttypes:MarkdownSummaryGithub + + - name: Append coverage to summary + run: | + echo "## Code Coverage Summary" >> $GITHUB_STEP_SUMMARY + cat ./coveragereport/SummaryGithub.md >> $GITHUB_STEP_SUMMARY + + - name: Upload code coverage report + uses: actions/upload-artifact@v4 + with: + name: CodeCoverage + path: ./src/TestResults/Coverage/**/coverage.cobertura.xml + + + + performance: + runs-on: ubuntu-latest + defaults: + run: + working-directory: src + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Setup .NET Core + uses: actions/setup-dotnet@v2 + with: + dotnet-version: '8.0.x' # Adjust the version as needed + + - name: Restore dependencies + run: dotnet restore + + - name: Build + run: dotnet build --no-restore + + - name: Performance Test + run: dotnet run --project SharpVectorPerformance --configuration Release + + - name: Performance Results + run: | + echo "## Performance Results" > $GITHUB_STEP_SUMMARY + cat ./BenchmarkDotNet.Artifacts/results/SharpVectorPerformance.MemoryVectorDatabasePerformance-report-github.md >> $GITHUB_STEP_SUMMARY + + - name: Upload Performance artifact + uses: actions/upload-artifact@v4 + with: + name: performance-results + path: './src/BenchmarkDotNet.Artifacts/*' diff --git a/.github/workflows/ghpages-mkdocs.yml b/.github/workflows/ghpages-mkdocs.yml index e26d799..c1c9bcc 100644 --- a/.github/workflows/ghpages-mkdocs.yml +++ b/.github/workflows/ghpages-mkdocs.yml @@ -9,6 +9,8 @@ on: - .github/workflows/ghpages-mkdocs.yml - docs/** - mkdocs.yml + paths-ignore: + - .github/** workflow_dispatch: # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages diff --git a/CHANGELOG.md b/CHANGELOG.md index 868ef80..812286d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## vNext + +Add: + +- Added `IVectorTextResultItem.Similarity` and marked `IVectorTextResultItem.VectorComparison` obsolete. `VectorComparison` will be removed in the future. +- Added more comment metadata to code + ## v2.1.1 Add: diff --git a/README.md b/README.md index 665d328..1ada860 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,7 @@ `Build5Nines.SharpVector` is an in-memory vector database library designed for .NET applications. It allows you to store, search, and manage text data using vector representations. The library is customizable and extensible, enabling support for different vector comparison methods, preprocessing techniques, and vectorization strategies. -[![.NET Core Tests](https://github.com/Build5Nines/SharpVector/actions/workflows/dotnet-tests.yml/badge.svg)](https://github.com/Build5Nines/SharpVector/actions/workflows/dotnet-tests.yml) -[![Build and Release](https://github.com/Build5Nines/SharpVector/actions/workflows/build-release.yml/badge.svg)](https://github.com/Build5Nines/SharpVector/actions/workflows/build-release.yml) +[![Release Build](https://github.com/Build5Nines/SharpVector/actions/workflows/build-release.yml/badge.svg)](https://github.com/Build5Nines/SharpVector/actions/workflows/build-release.yml) ![Libraries.io dependency status for GitHub repo](https://img.shields.io/librariesio/github/build5nines/sharpvector) [![NuGet](https://img.shields.io/nuget/v/Build5Nines.SharpVector.svg)](https://www.nuget.org/packages/Build5Nines.SharpVector/) diff --git a/docs/docs/index.md b/docs/docs/index.md index 2b32c5c..c90496a 100644 --- a/docs/docs/index.md +++ b/docs/docs/index.md @@ -6,8 +6,7 @@ description: The lightweight, in-memory, semantic search, text vector database f **Build5Nines.SharpVector** is the lightweight, in-memory, semantic search, text vector database built for .NET applications. It enables fast and flexible vector-based similarity search for text data β€” ideal for search engines, recommendation systems, semantic analysis, and AI-enhanced features. -[![.NET Core Tests](https://github.com/Build5Nines/SharpVector/actions/workflows/dotnet-tests.yml/badge.svg)](https://github.com/Build5Nines/SharpVector/actions/workflows/dotnet-tests.yml) -[![Build and Release](https://github.com/Build5Nines/SharpVector/actions/workflows/build-release.yml/badge.svg)](https://github.com/Build5Nines/SharpVector/actions/workflows/build-release.yml) +[![Release Build](https://github.com/Build5Nines/SharpVector/actions/workflows/build-release.yml/badge.svg)](https://github.com/Build5Nines/SharpVector/actions/workflows/build-release.yml) ![Libraries.io dependency status for GitHub repo](https://img.shields.io/librariesio/github/build5nines/sharpvector) [![NuGet](https://img.shields.io/nuget/v/Build5Nines.SharpVector.svg)](https://www.nuget.org/packages/Build5Nines.SharpVector/) diff --git a/docs/docs/resources/index.md b/docs/docs/resources/index.md index acd27e2..864f9bd 100644 --- a/docs/docs/resources/index.md +++ b/docs/docs/resources/index.md @@ -13,3 +13,4 @@ Here's a couple helpful tutorial links with additional documentation and example - [Enhanced In-Memory Text Vector Search in .NET with SharpVector and OpenAI Embeddings](https://build5nines.com/enhanced-in-memory-text-vector-search-in-net-with-sharpvector-and-openai-embeddings/?utm_source=github&utm_medium=sharpvector) by Chris Pietschmann - [Build a Generative AI + RAG App in C# with Phi-3, ONNX, and SharpVector](https://build5nines.com/build-a-generative-ai-rag-app-in-c-with-phi-3-onnx-and-sharpvector/?utm_source=github&utm_medium=sharpvector) by Chris Pietschmann - [Implementing Local RAG using Phi-3 ONNX Runtime and Sidecar Pattern on Linux App Service](https://azure.github.io/AppService/2024/09/03/Phi3-vector.html) by Tulika Chaudharie (Principal Product Manager at Microsoft for Azure App Service) +- [Semantic Search PDF Files Locally using .NET / C# and Build5Nines.SharpVector](https://build5nines.com/semantic-search-pdf-files-locally-using-c-and-build5nines-sharpvector/) by Chris Pietschmann diff --git a/samples/azure/document-intelligence/b59-azure-doc-intelligence/Program.cs b/samples/azure/document-intelligence/b59-azure-doc-intelligence/Program.cs new file mode 100644 index 0000000..bec9628 --- /dev/null +++ b/samples/azure/document-intelligence/b59-azure-doc-intelligence/Program.cs @@ -0,0 +1,156 @@ +ο»Ώusing Azure; +using Azure.AI.DocumentIntelligence; +using System; +using System.Text; +using System.IO; +using System.Threading.Tasks; +using Build5Nines.SharpVector; + +// This sample demonstrates how to use the Document Intelligence client library to analyze a document using the prebuilt-read model. +string endpoint = "https://.cognitiveservices.azure.com/"; +string apiKey = ""; +string filePath = "document.pdf"; // Can be .pdf, .docx, .jpg, etc. + +// Create timers to measure how long it takes to run the code +var overallTimer = new System.Diagnostics.Stopwatch(); +var stepTimer = new System.Diagnostics.Stopwatch(); +overallTimer.Start(); + + +// Create a DocumentIntelligenceClient +var credential = new AzureKeyCredential(apiKey); +var client = new DocumentIntelligenceClient(new Uri(endpoint), credential); + +var vdb = new BasicMemoryVectorDatabase(); + + + + + +// Read the file into a BinaryData object +Console.WriteLine("Reading file..."); +stepTimer.Start(); + +using var stream = File.OpenRead(filePath); +byte[] buffer = new byte[stream.Length]; +await stream.ReadAsync(buffer, 0, buffer.Length); +var binaryData = BinaryData.FromBytes(buffer); + +stepTimer.Stop(); +Console.WriteLine($"File loaded into memory: {stepTimer.ElapsedMilliseconds} ms"); + +Console.WriteLine("Analyzing document with Azure Document Intelligence..."); +stepTimer.Restart(); + +// Analyze the document using the prebuilt-read model +var operation = await client.AnalyzeDocumentAsync( + WaitUntil.Completed, + "prebuilt-read", + binaryData); + +var docResult = operation.Value; + +stepTimer.Stop(); +Console.WriteLine($"Document analysis completed: {stepTimer.ElapsedMilliseconds} ms"); + +stepTimer.Restart(); +Console.WriteLine("Loading SharpVector database..."); + +foreach (var page in docResult.Pages) +{ + var sb = new StringBuilder(); + foreach (var line in page.Lines) + { + sb.AppendLine(line.Content); + } + + // Add the text to the vector database + // Let's use the Page Number as the metadata + // Note: In a real-world scenario, you might want to use more meaningful metadata + var textMetadata = page.PageNumber.ToString(); + vdb.AddText(sb.ToString(), textMetadata); +} + +stepTimer.Stop(); +Console.WriteLine($"SharpVector database loaded: {stepTimer.ElapsedMilliseconds} ms"); + + + + + +// Console.WriteLine(""); +// Console.WriteLine("Loading PDF File into vector database..."); +// stepTimer.Restart(); +// // read pdf file with PdfPig locally +// var vdb2 = new BasicMemoryVectorDatabase(); +// using (var pdfDocument = UglyToad.PdfPig.PdfDocument.Open(filePath)) +// { +// foreach (var page in pdfDocument.GetPages()) +// { +// // Add the text to the vector database +// // Let's use the Page Number as the metadata +// // Note: In a real-world scenario, you might want to use more meaningful metadata +// var metadata = page.Number.ToString(); +// vdb.AddText(page.Text, metadata); +// } +// } +// stepTimer.Stop(); +// Console.WriteLine($"Vector database loaded: {stepTimer.ElapsedMilliseconds} ms"); + + + + + + + + +Console.WriteLine(""); +Console.WriteLine("Searching in SharpVector database for \"Azure ML\" with similarity score > 0.5..."); +stepTimer.Restart(); + +var query = "Azure ML"; +var semanticResults = vdb.Search( + query, + threshold: 0.5f // Set a threshold for the similarity score to only match results above this value + ); + +stepTimer.Stop(); +Console.WriteLine($"Search completed: {stepTimer.ElapsedMilliseconds} ms"); + + +Console.WriteLine("Top Matching Results:"); +foreach (var result in semanticResults.Texts) +{ + //var text = result.Text; + var metadata = result.Metadata; + var similarity = result.VectorComparison; + Console.WriteLine($" - Page: {metadata} - Similarity: {similarity}"); +} + + +Console.WriteLine(""); +Console.WriteLine("Searching in SharpVector database for \"Why use a Cloud Adoption Framework strategy\", top 3 results..."); +stepTimer.Restart(); + +query = "Why use a Cloud Adoption Framework strategy"; +semanticResults = vdb.Search( + query, + pageCount: 3 // Set the number of top results to return + ); + +stepTimer.Stop(); +Console.WriteLine($"Search completed: {stepTimer.ElapsedMilliseconds} ms"); + + +Console.WriteLine("Top Matching Results:"); +foreach (var result in semanticResults.Texts) +{ + //var text = result.Text; + var metadata = result.Metadata; + var similarity = result.VectorComparison; + Console.WriteLine($" - Page: {metadata} - Similarity: {similarity}"); +} + +overallTimer.Stop(); +Console.WriteLine(""); +Console.WriteLine($"Overall processing time: {overallTimer.ElapsedMilliseconds} ms"); \ No newline at end of file diff --git a/samples/azure/document-intelligence/b59-azure-doc-intelligence/b59-azure-doc-intelligence.csproj b/samples/azure/document-intelligence/b59-azure-doc-intelligence/b59-azure-doc-intelligence.csproj new file mode 100644 index 0000000..d1e63d2 --- /dev/null +++ b/samples/azure/document-intelligence/b59-azure-doc-intelligence/b59-azure-doc-intelligence.csproj @@ -0,0 +1,17 @@ +ο»Ώ + + + Exe + net9.0 + b59_azure_doc_intelligence + enable + enable + + + + + + + + + diff --git a/samples/azure/document-intelligence/b59-azure-doc-intelligence/document.pdf b/samples/azure/document-intelligence/b59-azure-doc-intelligence/document.pdf new file mode 100644 index 0000000..0fd5cd6 Binary files /dev/null and b/samples/azure/document-intelligence/b59-azure-doc-intelligence/document.pdf differ diff --git a/samples/genai-rag-onnx/Program.cs b/samples/genai-rag-onnx/Program.cs index 5d3471f..eef4e74 100644 --- a/samples/genai-rag-onnx/Program.cs +++ b/samples/genai-rag-onnx/Program.cs @@ -131,7 +131,7 @@ static async Task Main(string[] args) ragContext += result.Text + "\n\n"; // Print the metadata, vector comparison, and text of the result to the console Console.WriteLine($"Document: {result.Metadata}"); - Console.WriteLine($"Vector Comparison: {result.VectorComparison}"); + Console.WriteLine($"Vector Comparison: {result.Similarity}"); Console.WriteLine($"Text Chunk Length: {result.Text.Length}"); Console.WriteLine(""); } diff --git a/src/Build5Nines.SharpVector/VectorCompare/CosineSimilarityVectorComparerAsync.cs b/src/Build5Nines.SharpVector/VectorCompare/CosineSimilarityVectorComparerAsync.cs index 2ba87be..c33fd33 100644 --- a/src/Build5Nines.SharpVector/VectorCompare/CosineSimilarityVectorComparerAsync.cs +++ b/src/Build5Nines.SharpVector/VectorCompare/CosineSimilarityVectorComparerAsync.cs @@ -59,7 +59,7 @@ public float Calculate(float[] vectorA, float[] vectorB) public IEnumerable> Sort(IEnumerable> results) { - return results.OrderByDescending(s => s.VectorComparison); + return results.OrderByDescending(s => s.Similarity); } public async Task>> SortAsync(IEnumerable> results) diff --git a/src/Build5Nines.SharpVector/VectorCompare/EuclideanDistanceVectorComparerAsync.cs b/src/Build5Nines.SharpVector/VectorCompare/EuclideanDistanceVectorComparerAsync.cs index b2806b3..5c425cf 100644 --- a/src/Build5Nines.SharpVector/VectorCompare/EuclideanDistanceVectorComparerAsync.cs +++ b/src/Build5Nines.SharpVector/VectorCompare/EuclideanDistanceVectorComparerAsync.cs @@ -43,7 +43,7 @@ public float Calculate(float[] vectorA, float[] vectorB) public IEnumerable> Sort(IEnumerable> results) { - return results.OrderBy(s => s.VectorComparison); + return results.OrderBy(s => s.Similarity); } public async Task>> SortAsync(IEnumerable> results) diff --git a/src/Build5Nines.SharpVector/VectorTextResult.cs b/src/Build5Nines.SharpVector/VectorTextResult.cs index fd22010..997f15d 100644 --- a/src/Build5Nines.SharpVector/VectorTextResult.cs +++ b/src/Build5Nines.SharpVector/VectorTextResult.cs @@ -5,8 +5,17 @@ namespace Build5Nines.SharpVector; using System.Linq; using System.Runtime.InteropServices; +/// +/// Represents a result of a vector text search. +/// +/// The type of the identifier. +/// The type of the document. +/// The type of the metadata. public interface IVectorTextResult { + /// + /// The list of Texts found in the search results. + /// IEnumerable> Texts { get; } /// @@ -30,10 +39,20 @@ public interface IVectorTextResult public int TotalPages { get; } } +/// +/// Represents a result of a vector text search. +/// +/// The type of the metadata. public interface IVectorTextResult : IVectorTextResult { } +/// +/// Represents a result of a vector text search. +/// +/// The type of the identifier. +/// The type of the document. +/// The type of the metadata. public class VectorTextResult : IVectorTextResult { @@ -50,6 +69,9 @@ public VectorTextResult(int totalCount, int pageIndex, int totalPages, IEnumerab /// public IEnumerable> Texts { get; private set; } + /// + /// Returns true if the search returned no results. + /// public bool IsEmpty { get => Texts == null || !Texts.Any(); } /// @@ -68,6 +90,10 @@ public VectorTextResult(int totalCount, int pageIndex, int totalPages, IEnumerab public int TotalPages { get; private set; } } +/// +/// Represents a result of a vector text search. +/// +/// The type of the metadata. public class VectorTextResult : VectorTextResult, IVectorTextResult { diff --git a/src/Build5Nines.SharpVector/VectorTextResultItem.cs b/src/Build5Nines.SharpVector/VectorTextResultItem.cs index 286adec..101c3d2 100644 --- a/src/Build5Nines.SharpVector/VectorTextResultItem.cs +++ b/src/Build5Nines.SharpVector/VectorTextResultItem.cs @@ -3,46 +3,106 @@ namespace Build5Nines.SharpVector; +/// +/// Represents a result item from a semantic search on a vector database. +/// +/// The type of the document. +/// The type of the metadata. public interface IVectorTextResultItem { + /// + /// The string of text that was vectorized. + /// TDocument Text{ get; } + + /// + /// The metadata associated with the text. + /// TMetadata? Metadata { get; } + /// + /// The vector similarity score between the query and the text. (This is deprecated, use 'Similarity' instead) + /// + [Obsolete("Use 'Similarity' instead")] float VectorComparison { get; } + + /// + /// The vector similarity score between the query and the text. + /// + float Similarity { get; } } +/// +/// Represents a result item from a semantic search on a vector database. +/// +/// The type of the ID. +/// The type of the document. +/// The type of the metadata. public interface IVectorTextResultItem : IVectorTextResultItem { TId Id { get; } } +/// +/// Represents a result item from a semantic search on a vector database. +/// +/// The type of the metadata. public interface IVectorTextResultItem : IVectorTextResultItem, IVectorTextResultItem { } +/// +/// Represents a result item from a semantic search on a vector database. +/// +/// The type of the ID. +/// The type of the document. +/// The type of the metadata. public class VectorTextResultItem : IVectorTextResultItem, IVectorTextResultItem { private IVectorTextItem _item; private TId _id; - public VectorTextResultItem(TId id, IVectorTextItem item, float vectorComparison) + public VectorTextResultItem(TId id, IVectorTextItem item, float similarity) { _id = id; _item = item; - VectorComparison = vectorComparison; + Similarity = similarity; } + /// + /// The string of text that was vectorized. + /// public TDocument Text { get => _item.Text; } + + /// + /// The metadata associated with the text. + /// public TMetadata? Metadata { get => _item.Metadata; } public TId Id { get => _id; } + /// + /// The vector representation / embeddings of the text. + /// public ImmutableArray Vectors { get => ImmutableArray.Create(_item.Vector); } - public float VectorComparison { get; private set; } + /// + /// The vector similarity score between the query and the text. + /// + public float Similarity { get; private set; } + + /// + /// The vector similarity score between the query and the text. (This is deprecated, use 'Similarity' instead) + /// + [Obsolete("Use 'Similarity' instead")] + public float VectorComparison { get => Similarity; } } +/// +/// Represents a result item from a semantic search on a vector database. +/// +/// The type of the metadata. public class VectorTextResultItem : VectorTextResultItem, IVectorTextResultItem { diff --git a/src/SharpVectorTest/Data/TextDataLoaderAsyncTests.cs b/src/SharpVectorTest/Data/TextDataLoaderAsyncTests.cs index c02414a..3d195ba 100644 --- a/src/SharpVectorTest/Data/TextDataLoaderAsyncTests.cs +++ b/src/SharpVectorTest/Data/TextDataLoaderAsyncTests.cs @@ -46,6 +46,6 @@ public async Task TextDataLoaderAsync_Paragraphs_01() Assert.AreEqual(1, results.Texts.Count()); Assert.AreEqual("The Lion King is a 1994 Disney animated film about a young lion cub named Simba who is the heir to the throne of an African savanna. ", results.Texts.First().Text); Assert.AreEqual("{ chuckSize: \"133\" }", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } } \ No newline at end of file diff --git a/src/SharpVectorTest/Regression/RegressionTests.cs b/src/SharpVectorTest/Regression/RegressionTests.cs index bb84168..10b30b3 100644 --- a/src/SharpVectorTest/Regression/RegressionTests.cs +++ b/src/SharpVectorTest/Regression/RegressionTests.cs @@ -25,7 +25,7 @@ public void VectorDatabaseVersion_2_0_2_001() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("{ value: \"JSON Metadata Value\" }", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] diff --git a/src/SharpVectorTest/VectorDatabaseTests.cs b/src/SharpVectorTest/VectorDatabaseTests.cs index c7117c7..fb9e669 100644 --- a/src/SharpVectorTest/VectorDatabaseTests.cs +++ b/src/SharpVectorTest/VectorDatabaseTests.cs @@ -28,7 +28,7 @@ public void BasicMemoryVectorDatabase_01() Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(1, results.Texts.First().Id); Assert.AreEqual("[some metadata here]", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -44,7 +44,7 @@ public void BasicMemoryVectorDatabase_02() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.IsNull(results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -60,7 +60,7 @@ public void BasicMemoryVectorDatabase_03() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(0, results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -76,7 +76,7 @@ public void BasicMemoryVectorDatabase_04() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.IsNull(results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -92,7 +92,7 @@ public void BasicMemoryVectorDatabase_05() var results = vdb.Search("πŸ”₯", pageCount: 1); Assert.AreEqual(1, results.Texts.Count()); - Assert.AreEqual(0.5773503184318542, results.Texts.First().VectorComparison); + Assert.AreEqual(0.5773503184318542, results.Texts.First().Similarity); Assert.AreEqual("It's πŸ”₯ Fire.", results.Texts.First().Text); Assert.AreEqual(2, results.Texts.First().Id); Assert.AreEqual("metadata2", results.Texts.First().Metadata); @@ -129,7 +129,7 @@ public void BasicMemoryVectorDatabase_SaveLoadAsync_01() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("[some metadata here]", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); var filename = "basicmemoryvectordatabase_saveload_01.b59vdb"; vdb.SaveToFileAsync(filename).Wait(); @@ -142,7 +142,7 @@ public void BasicMemoryVectorDatabase_SaveLoadAsync_01() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("[some metadata here]", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -158,7 +158,7 @@ public void BasicMemoryVectorDatabase_SaveLoad_01() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("[some metadata here]", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); var filename = "basicmemoryvectordatabase_saveload_01.b59vdb"; vdb.SaveToFile(filename); @@ -171,7 +171,7 @@ public void BasicMemoryVectorDatabase_SaveLoad_01() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("[some metadata here]", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -187,7 +187,7 @@ public async Task BasicMemoryVectorDatabase_SaveLoadBinaryStreamAsync_01() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("[some metadata here]", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); var stream = new MemoryStream(); await vdb.SerializeToBinaryStreamAsync(stream); @@ -201,7 +201,7 @@ public async Task BasicMemoryVectorDatabase_SaveLoadBinaryStreamAsync_01() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("[some metadata here]", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -217,7 +217,7 @@ public void SimpleTest_01() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(5.0, results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -249,7 +249,7 @@ public void SimpleTest_02() Assert.AreEqual(5, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(5.0, results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -284,7 +284,7 @@ public async Task SimpleTest_Async_01() Assert.AreEqual(5, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(5.0, results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -338,7 +338,7 @@ public async Task SimpleTest_Async_02() Assert.AreEqual(5, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(5.0, results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -354,7 +354,7 @@ public async Task SimpleTest_Async_03() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(0, results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -370,7 +370,7 @@ public async Task SimpleTest_Async_04() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.IsNull(results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -392,7 +392,7 @@ public void SimpleTest_MemoryVectorDatabase_UpdateTextAndMetadata_01() Assert.AreEqual(1, results.Texts.Count()); Assert.AreEqual(newText, results.Texts.First().Text); Assert.AreEqual("6.0", results.Texts.First().Metadata); - Assert.AreEqual(0.11704113334417343, results.Texts.First().VectorComparison); + Assert.AreEqual(0.11704113334417343, results.Texts.First().Similarity); } [TestMethod] @@ -408,7 +408,7 @@ public void SimpleTest_IMemoryVectorDatabase() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(5.0, results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -504,7 +504,7 @@ public void Text_Metadata_String_01() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("{ value: \"JSON Metadata Value\" }", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); } [TestMethod] @@ -520,7 +520,7 @@ public void Text_Metadata_String_Update() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("{ value: \"JSON Metadata Value\" }", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); vdb.UpdateTextMetadata(id, "{ value: \"New Value\" }"); @@ -676,7 +676,7 @@ public void EuclideanDistanceVectorComparerAsyncMemoryVectorDatabase_1() Assert.AreEqual(5, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.StartsWith("The Lion King is a 1994 Disney")); Assert.AreEqual(5.0, results.Texts.First().Metadata); - Assert.AreEqual(1.1491886377334595, results.Texts.First().VectorComparison); + Assert.AreEqual(1.1491886377334595, results.Texts.First().Similarity); } [TestMethod] @@ -710,7 +710,7 @@ public async Task SerializeDeserializeStream_001() Assert.AreEqual(5, firstResult.Texts.Count()); Assert.IsTrue(firstResult.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(5.0, firstResult.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, firstResult.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, firstResult.Texts.First().Similarity); var stream = new MemoryStream(); databaseOne.SerializeToBinaryStream(stream); @@ -724,7 +724,7 @@ public async Task SerializeDeserializeStream_001() Assert.AreEqual(5, secondResult.Texts.Count()); Assert.IsTrue(secondResult.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(5.0, secondResult.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, secondResult.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, secondResult.Texts.First().Similarity); // Compare both results Assert.AreEqual(firstResult.Texts.Count(), secondResult.Texts.Count()); @@ -762,7 +762,9 @@ public async Task SaveLoadFile_001() Assert.AreEqual(5, firstResult.Texts.Count()); Assert.IsTrue(firstResult.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(5.0, firstResult.Texts.First().Metadata); +#pragma warning disable CS0618 // Type or member is obsolete Assert.AreEqual(0.3396831452846527, firstResult.Texts.First().VectorComparison); +#pragma warning restore CS0618 // Type or member is obsolete var fileName = "vector_database.b59vdb"; await databaseOne.SaveToFileAsync(fileName); @@ -774,7 +776,7 @@ public async Task SaveLoadFile_001() Assert.AreEqual(5, secondResult.Texts.Count()); Assert.IsTrue(secondResult.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(5.0, secondResult.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, secondResult.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, secondResult.Texts.First().Similarity); // Compare both results Assert.AreEqual(firstResult.Texts.Count(), secondResult.Texts.Count()); @@ -819,7 +821,7 @@ public async Task SaveLoadFile_002() Assert.AreEqual(5, firstResult.Texts.Count()); Assert.IsTrue(firstResult.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(5.0, firstResult.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, firstResult.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, firstResult.Texts.First().Similarity); var fileName = "vector_database.b59vdb"; var timer = new Stopwatch(); @@ -845,7 +847,7 @@ public async Task SaveLoadFile_002() Assert.AreEqual(5, secondResult.Texts.Count()); Assert.IsTrue(secondResult.Texts.First().Text.Contains("Lion King")); Assert.AreEqual(5.0, secondResult.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, secondResult.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, secondResult.Texts.First().Similarity); // Compare both results Assert.AreEqual(firstResult.Texts.Count(), secondResult.Texts.Count()); @@ -911,7 +913,7 @@ public async Task DatabaseFile_LoadStream_002() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("{ value: \"JSON Metadata Value\" }", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); vdb.UpdateTextMetadata(id, "{ value: \"New Value\" }"); @@ -937,7 +939,7 @@ public async Task DatabaseFile_LoadStream_003() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("{ value: \"JSON Metadata Value\" }", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); vdb.UpdateTextMetadata(id, "{ value: \"New Value\" }"); @@ -963,7 +965,7 @@ public async Task DatabaseFile_LoadFile_002() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("{ value: \"JSON Metadata Value\" }", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); vdb.UpdateTextMetadata(id, "{ value: \"New Value\" }"); @@ -988,7 +990,7 @@ public async Task DatabaseFile_LoadFile_003() Assert.AreEqual(1, results.Texts.Count()); Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); Assert.AreEqual("{ value: \"JSON Metadata Value\" }", results.Texts.First().Metadata); - Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); + Assert.AreEqual(0.3396831452846527, results.Texts.First().Similarity); vdb.UpdateTextMetadata(id, "{ value: \"New Value\" }");