diff --git a/Directory.Packages.props b/Directory.Packages.props
index 8d4873da65..c61c218659 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -23,5 +23,6 @@
+
\ No newline at end of file
diff --git a/GraphRag.slnx b/GraphRag.slnx
index 11433fb994..132d2a9527 100644
--- a/GraphRag.slnx
+++ b/GraphRag.slnx
@@ -14,4 +14,7 @@
+
+
+
diff --git a/benchmarks/ManagedCode.GraphRag.Benchmarks/Cache/MemoryPipelineCacheBenchmarks.cs b/benchmarks/ManagedCode.GraphRag.Benchmarks/Cache/MemoryPipelineCacheBenchmarks.cs
new file mode 100644
index 0000000000..728b3764b3
--- /dev/null
+++ b/benchmarks/ManagedCode.GraphRag.Benchmarks/Cache/MemoryPipelineCacheBenchmarks.cs
@@ -0,0 +1,99 @@
+using BenchmarkDotNet.Attributes;
+using GraphRag.Cache;
+using Microsoft.Extensions.Caching.Memory;
+
+namespace ManagedCode.GraphRag.Benchmarks.Cache;
+
+[MemoryDiagnoser]
+public class MemoryPipelineCacheBenchmarks
+{
+ private IMemoryCache _memoryCache = null!;
+ private MemoryPipelineCache _cache = null!;
+ private string[] _keys = null!;
+ private object[] _values = null!;
+
+ [Params(1_000, 10_000, 100_000)]
+ public int EntryCount { get; set; }
+
+ [GlobalSetup]
+ public void Setup()
+ {
+ _memoryCache = new MemoryCache(new MemoryCacheOptions());
+ _cache = new MemoryPipelineCache(_memoryCache);
+
+ _keys = new string[EntryCount];
+ _values = new object[EntryCount];
+
+ for (var i = 0; i < EntryCount; i++)
+ {
+ _keys[i] = $"key-{i:D8}";
+ _values[i] = new { Id = i, Name = $"Value-{i}", Data = new byte[100] };
+ }
+ }
+
+ [GlobalCleanup]
+ public void Cleanup()
+ {
+ _memoryCache.Dispose();
+ }
+
+ [Benchmark]
+ public async Task SetEntries()
+ {
+ for (var i = 0; i < EntryCount; i++)
+ {
+ await _cache.SetAsync(_keys[i], _values[i]);
+ }
+ }
+
+ [Benchmark]
+ public async Task GetEntries()
+ {
+ // Pre-populate
+ for (var i = 0; i < EntryCount; i++)
+ {
+ await _cache.SetAsync(_keys[i], _values[i]);
+ }
+
+ // Measure gets
+ for (var i = 0; i < EntryCount; i++)
+ {
+ _ = await _cache.GetAsync(_keys[i]);
+ }
+ }
+
+ [Benchmark]
+ public async Task HasEntries()
+ {
+ // Pre-populate
+ for (var i = 0; i < EntryCount; i++)
+ {
+ await _cache.SetAsync(_keys[i], _values[i]);
+ }
+
+ // Measure has checks
+ for (var i = 0; i < EntryCount; i++)
+ {
+ _ = await _cache.HasAsync(_keys[i]);
+ }
+ }
+
+ [Benchmark]
+ public async Task ClearCache()
+ {
+ // Pre-populate
+ for (var i = 0; i < EntryCount; i++)
+ {
+ await _cache.SetAsync(_keys[i], _values[i]);
+ }
+
+ // Measure clear
+ await _cache.ClearAsync();
+ }
+
+ [Benchmark]
+ public IPipelineCache CreateChildScope()
+ {
+ return _cache.CreateChild("child-scope");
+ }
+}
diff --git a/benchmarks/ManagedCode.GraphRag.Benchmarks/Chunking/MarkdownTextChunkerBenchmarks.cs b/benchmarks/ManagedCode.GraphRag.Benchmarks/Chunking/MarkdownTextChunkerBenchmarks.cs
new file mode 100644
index 0000000000..6be7c2a7eb
--- /dev/null
+++ b/benchmarks/ManagedCode.GraphRag.Benchmarks/Chunking/MarkdownTextChunkerBenchmarks.cs
@@ -0,0 +1,84 @@
+using BenchmarkDotNet.Attributes;
+using GraphRag.Chunking;
+using GraphRag.Config;
+
+namespace ManagedCode.GraphRag.Benchmarks.Chunking;
+
+[MemoryDiagnoser]
+public class MarkdownTextChunkerBenchmarks
+{
+ private MarkdownTextChunker _chunker = null!;
+ private ChunkSlice[] _smallDocument = null!;
+ private ChunkSlice[] _mediumDocument = null!;
+ private ChunkSlice[] _largeDocument = null!;
+ private ChunkingConfig _config = null!;
+
+ [Params(512, 1024, 2048)]
+ public int ChunkSize { get; set; }
+
+ [Params(0, 64, 128)]
+ public int ChunkOverlap { get; set; }
+
+ [GlobalSetup]
+ public void Setup()
+ {
+ _chunker = new MarkdownTextChunker();
+ _config = new ChunkingConfig
+ {
+ Size = ChunkSize,
+ Overlap = ChunkOverlap,
+ Strategy = ChunkStrategyType.Sentence
+ };
+
+ // Generate test documents of different sizes
+ _smallDocument = new[] { new ChunkSlice("doc1", GenerateMarkdownDocument(1_000)) };
+ _mediumDocument = new[] { new ChunkSlice("doc1", GenerateMarkdownDocument(100_000)) };
+ _largeDocument = new[] { new ChunkSlice("doc1", GenerateMarkdownDocument(1_000_000)) };
+ }
+
+ [Benchmark]
+ public IReadOnlyList ChunkSmallDocument()
+ {
+ return _chunker.Chunk(_smallDocument, _config);
+ }
+
+ [Benchmark]
+ public IReadOnlyList ChunkMediumDocument()
+ {
+ return _chunker.Chunk(_mediumDocument, _config);
+ }
+
+ [Benchmark]
+ public IReadOnlyList ChunkLargeDocument()
+ {
+ return _chunker.Chunk(_largeDocument, _config);
+ }
+
+ private static string GenerateMarkdownDocument(int approximateLength)
+ {
+ var paragraphs = new[]
+ {
+ "# Introduction\n\nThis is a sample markdown document for benchmarking purposes. It contains various markdown elements including headers, paragraphs, lists, and code blocks.\n\n",
+ "## Section One\n\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris.\n\n",
+ "### Subsection A\n\nDuis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident.\n\n",
+ "- First item in the list\n- Second item with more content\n- Third item explaining something important\n\n",
+ "1. Numbered first item\n2. Numbered second item\n3. Numbered third item with explanation\n\n",
+ "```csharp\npublic class Example\n{\n public void Method() { }\n}\n```\n\n",
+ "## Section Two\n\nSunt in culpa qui officia deserunt mollit anim id est laborum. Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium.\n\n",
+ "> This is a blockquote that spans multiple lines and contains important information that should be preserved during chunking.\n\n",
+ "### Subsection B\n\nNemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt.\n\n",
+ "| Column 1 | Column 2 | Column 3 |\n|----------|----------|----------|\n| Data 1 | Data 2 | Data 3 |\n| Data 4 | Data 5 | Data 6 |\n\n"
+ };
+
+ var result = new System.Text.StringBuilder(approximateLength + 1000);
+ var index = 0;
+
+ while (result.Length < approximateLength)
+ {
+ result.Append(paragraphs[index % paragraphs.Length]);
+ index++;
+ }
+
+ return result.ToString();
+ }
+}
diff --git a/benchmarks/ManagedCode.GraphRag.Benchmarks/Chunking/TokenTextChunkerBenchmarks.cs b/benchmarks/ManagedCode.GraphRag.Benchmarks/Chunking/TokenTextChunkerBenchmarks.cs
new file mode 100644
index 0000000000..045fc4557b
--- /dev/null
+++ b/benchmarks/ManagedCode.GraphRag.Benchmarks/Chunking/TokenTextChunkerBenchmarks.cs
@@ -0,0 +1,82 @@
+using BenchmarkDotNet.Attributes;
+using GraphRag.Chunking;
+using GraphRag.Config;
+
+namespace ManagedCode.GraphRag.Benchmarks.Chunking;
+
+[MemoryDiagnoser]
+public class TokenTextChunkerBenchmarks
+{
+ private TokenTextChunker _chunker = null!;
+ private ChunkSlice[] _smallDocument = null!;
+ private ChunkSlice[] _mediumDocument = null!;
+ private ChunkSlice[] _largeDocument = null!;
+ private ChunkingConfig _config = null!;
+
+ [Params(512, 1024, 2048)]
+ public int ChunkSize { get; set; }
+
+ [Params(0, 64, 128)]
+ public int ChunkOverlap { get; set; }
+
+ [GlobalSetup]
+ public void Setup()
+ {
+ _chunker = new TokenTextChunker();
+ _config = new ChunkingConfig
+ {
+ Size = ChunkSize,
+ Overlap = ChunkOverlap,
+ Strategy = ChunkStrategyType.Tokens
+ };
+
+ // Generate plain text documents of different sizes
+ _smallDocument = new[] { new ChunkSlice("doc1", GeneratePlainTextDocument(1_000)) };
+ _mediumDocument = new[] { new ChunkSlice("doc1", GeneratePlainTextDocument(100_000)) };
+ _largeDocument = new[] { new ChunkSlice("doc1", GeneratePlainTextDocument(1_000_000)) };
+ }
+
+ [Benchmark]
+ public IReadOnlyList ChunkSmallDocument()
+ {
+ return _chunker.Chunk(_smallDocument, _config);
+ }
+
+ [Benchmark]
+ public IReadOnlyList ChunkMediumDocument()
+ {
+ return _chunker.Chunk(_mediumDocument, _config);
+ }
+
+ [Benchmark]
+ public IReadOnlyList ChunkLargeDocument()
+ {
+ return _chunker.Chunk(_largeDocument, _config);
+ }
+
+ private static string GeneratePlainTextDocument(int approximateLength)
+ {
+ var sentences = new[]
+ {
+ "The quick brown fox jumps over the lazy dog. ",
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ",
+ "Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ",
+ "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. ",
+ "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore. ",
+ "Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia. ",
+ "Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit. ",
+ "Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet. "
+ };
+
+ var result = new System.Text.StringBuilder(approximateLength + 200);
+ var index = 0;
+
+ while (result.Length < approximateLength)
+ {
+ result.Append(sentences[index % sentences.Length]);
+ index++;
+ }
+
+ return result.ToString();
+ }
+}
diff --git a/benchmarks/ManagedCode.GraphRag.Benchmarks/Community/CommunityBuilderBenchmarks.cs b/benchmarks/ManagedCode.GraphRag.Benchmarks/Community/CommunityBuilderBenchmarks.cs
new file mode 100644
index 0000000000..be2691b0ca
--- /dev/null
+++ b/benchmarks/ManagedCode.GraphRag.Benchmarks/Community/CommunityBuilderBenchmarks.cs
@@ -0,0 +1,104 @@
+using System.Collections.Immutable;
+using BenchmarkDotNet.Attributes;
+using GraphRag.Community;
+using GraphRag.Config;
+using GraphRag.Entities;
+using GraphRag.Relationships;
+
+namespace ManagedCode.GraphRag.Benchmarks.Community;
+
+[MemoryDiagnoser]
+public class CommunityBuilderBenchmarks
+{
+ private EntityRecord[] _entities = null!;
+ private RelationshipRecord[] _relationships = null!;
+ private ClusterGraphConfig _labelPropagationConfig = null!;
+ private ClusterGraphConfig _connectedComponentsConfig = null!;
+
+ [Params(100, 1_000, 5_000)]
+ public int NodeCount { get; set; }
+
+ [GlobalSetup]
+ public void Setup()
+ {
+ _labelPropagationConfig = new ClusterGraphConfig
+ {
+ Algorithm = CommunityDetectionAlgorithm.FastLabelPropagation,
+ MaxIterations = 20,
+ MaxClusterSize = 25,
+ Seed = 42
+ };
+
+ _connectedComponentsConfig = new ClusterGraphConfig
+ {
+ Algorithm = CommunityDetectionAlgorithm.ConnectedComponents,
+ MaxClusterSize = 25,
+ Seed = 42
+ };
+
+ (_entities, _relationships) = GenerateGraph(NodeCount, avgEdgesPerNode: 5);
+ }
+
+ [Benchmark(Baseline = true)]
+ public IReadOnlyList FastLabelPropagation()
+ {
+ return CommunityBuilder.Build(_entities, _relationships, _labelPropagationConfig);
+ }
+
+ [Benchmark]
+ public IReadOnlyList ConnectedComponents()
+ {
+ return CommunityBuilder.Build(_entities, _relationships, _connectedComponentsConfig);
+ }
+
+ private static (EntityRecord[] Entities, RelationshipRecord[] Relationships) GenerateGraph(
+ int nodeCount,
+ int avgEdgesPerNode)
+ {
+ var random = new Random(42);
+ var entities = new EntityRecord[nodeCount];
+
+ for (var i = 0; i < nodeCount; i++)
+ {
+ entities[i] = new EntityRecord(
+ Id: $"entity-{i}",
+ HumanReadableId: i,
+ Title: $"Entity_{i}",
+ Type: "ENTITY",
+ Description: $"Description for entity {i}",
+ TextUnitIds: ImmutableArray.Create($"tu-{i}"),
+ Frequency: 1,
+ Degree: 0,
+ X: 0,
+ Y: 0);
+ }
+
+ var totalEdges = nodeCount * avgEdgesPerNode;
+ var relationships = new List(totalEdges);
+
+ for (var i = 0; i < totalEdges; i++)
+ {
+ var sourceIdx = random.Next(nodeCount);
+ var targetIdx = random.Next(nodeCount);
+
+ if (sourceIdx == targetIdx)
+ {
+ targetIdx = (targetIdx + 1) % nodeCount;
+ }
+
+ relationships.Add(new RelationshipRecord(
+ Id: $"rel-{i}",
+ HumanReadableId: i,
+ Source: entities[sourceIdx].Title,
+ Target: entities[targetIdx].Title,
+ Type: "RELATED_TO",
+ Description: null,
+ Weight: random.NextDouble(),
+ CombinedDegree: 2,
+ TextUnitIds: ImmutableArray.Create($"tu-{sourceIdx}", $"tu-{targetIdx}"),
+ Bidirectional: false));
+ }
+
+ return (entities, relationships.ToArray());
+ }
+}
diff --git a/benchmarks/ManagedCode.GraphRag.Benchmarks/Community/FastLabelPropagationBenchmarks.cs b/benchmarks/ManagedCode.GraphRag.Benchmarks/Community/FastLabelPropagationBenchmarks.cs
new file mode 100644
index 0000000000..416b848186
--- /dev/null
+++ b/benchmarks/ManagedCode.GraphRag.Benchmarks/Community/FastLabelPropagationBenchmarks.cs
@@ -0,0 +1,121 @@
+using System.Collections.Immutable;
+using BenchmarkDotNet.Attributes;
+using GraphRag.Community;
+using GraphRag.Config;
+using GraphRag.Entities;
+using GraphRag.Relationships;
+
+namespace ManagedCode.GraphRag.Benchmarks.Community;
+
+[MemoryDiagnoser]
+public class FastLabelPropagationBenchmarks
+{
+ private EntityRecord[] _smallGraphEntities = null!;
+ private RelationshipRecord[] _smallGraphRelationships = null!;
+ private EntityRecord[] _mediumGraphEntities = null!;
+ private RelationshipRecord[] _mediumGraphRelationships = null!;
+ private EntityRecord[] _largeGraphEntities = null!;
+ private RelationshipRecord[] _largeGraphRelationships = null!;
+ private ClusterGraphConfig _config = null!;
+
+ [Params(10, 20, 40)]
+ public int MaxIterations { get; set; }
+
+ [GlobalSetup]
+ public void Setup()
+ {
+ _config = new ClusterGraphConfig
+ {
+ Algorithm = CommunityDetectionAlgorithm.FastLabelPropagation,
+ MaxIterations = MaxIterations,
+ Seed = 42
+ };
+
+ // Small graph: 100 nodes, ~300 edges
+ (_smallGraphEntities, _smallGraphRelationships) = GenerateGraph(100, 3);
+
+ // Medium graph: 1,000 nodes, ~5,000 edges
+ (_mediumGraphEntities, _mediumGraphRelationships) = GenerateGraph(1_000, 5);
+
+ // Large graph: 10,000 nodes, ~50,000 edges
+ (_largeGraphEntities, _largeGraphRelationships) = GenerateGraph(10_000, 5);
+ }
+
+ [Benchmark]
+ public IReadOnlyDictionary SmallGraph()
+ {
+ return FastLabelPropagationCommunityDetector.AssignLabels(
+ _smallGraphEntities,
+ _smallGraphRelationships,
+ _config);
+ }
+
+ [Benchmark]
+ public IReadOnlyDictionary MediumGraph()
+ {
+ return FastLabelPropagationCommunityDetector.AssignLabels(
+ _mediumGraphEntities,
+ _mediumGraphRelationships,
+ _config);
+ }
+
+ [Benchmark]
+ public IReadOnlyDictionary LargeGraph()
+ {
+ return FastLabelPropagationCommunityDetector.AssignLabels(
+ _largeGraphEntities,
+ _largeGraphRelationships,
+ _config);
+ }
+
+ private static (EntityRecord[] Entities, RelationshipRecord[] Relationships) GenerateGraph(
+ int nodeCount,
+ int avgEdgesPerNode)
+ {
+ var random = new Random(42);
+ var entities = new EntityRecord[nodeCount];
+
+ for (var i = 0; i < nodeCount; i++)
+ {
+ entities[i] = new EntityRecord(
+ Id: $"entity-{i}",
+ HumanReadableId: i,
+ Title: $"Entity_{i}",
+ Type: "ENTITY",
+ Description: $"Description for entity {i}",
+ TextUnitIds: ImmutableArray.Empty,
+ Frequency: 1,
+ Degree: 0,
+ X: 0,
+ Y: 0);
+ }
+
+ var totalEdges = nodeCount * avgEdgesPerNode;
+ var relationships = new List(totalEdges);
+
+ for (var i = 0; i < totalEdges; i++)
+ {
+ var sourceIdx = random.Next(nodeCount);
+ var targetIdx = random.Next(nodeCount);
+
+ if (sourceIdx == targetIdx)
+ {
+ targetIdx = (targetIdx + 1) % nodeCount;
+ }
+
+ relationships.Add(new RelationshipRecord(
+ Id: $"rel-{i}",
+ HumanReadableId: i,
+ Source: entities[sourceIdx].Title,
+ Target: entities[targetIdx].Title,
+ Type: "RELATED_TO",
+ Description: null,
+ Weight: random.NextDouble(),
+ CombinedDegree: 2,
+ TextUnitIds: ImmutableArray.Empty,
+ Bidirectional: false));
+ }
+
+ return (entities, relationships.ToArray());
+ }
+}
diff --git a/benchmarks/ManagedCode.GraphRag.Benchmarks/ManagedCode.GraphRag.Benchmarks.csproj b/benchmarks/ManagedCode.GraphRag.Benchmarks/ManagedCode.GraphRag.Benchmarks.csproj
new file mode 100644
index 0000000000..9500561bc9
--- /dev/null
+++ b/benchmarks/ManagedCode.GraphRag.Benchmarks/ManagedCode.GraphRag.Benchmarks.csproj
@@ -0,0 +1,17 @@
+
+
+
+ Exe
+ false
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/benchmarks/ManagedCode.GraphRag.Benchmarks/Program.cs b/benchmarks/ManagedCode.GraphRag.Benchmarks/Program.cs
new file mode 100644
index 0000000000..c9a046727c
--- /dev/null
+++ b/benchmarks/ManagedCode.GraphRag.Benchmarks/Program.cs
@@ -0,0 +1,3 @@
+using BenchmarkDotNet.Running;
+
+BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args);
diff --git a/benchmarks/ManagedCode.GraphRag.Benchmarks/Utils/HashingBenchmarks.cs b/benchmarks/ManagedCode.GraphRag.Benchmarks/Utils/HashingBenchmarks.cs
new file mode 100644
index 0000000000..853756c0a7
--- /dev/null
+++ b/benchmarks/ManagedCode.GraphRag.Benchmarks/Utils/HashingBenchmarks.cs
@@ -0,0 +1,68 @@
+using BenchmarkDotNet.Attributes;
+using GraphRag.Utils;
+
+namespace ManagedCode.GraphRag.Benchmarks.Utils;
+
+[MemoryDiagnoser]
+public class HashingBenchmarks
+{
+ private KeyValuePair[] _smallProperties = null!;
+ private KeyValuePair[] _mediumProperties = null!;
+ private KeyValuePair[] _largeProperties = null!;
+ private KeyValuePair[] _largeValueProperties = null!;
+
+ [GlobalSetup]
+ public void Setup()
+ {
+ // 1 property with small value
+ _smallProperties = new[]
+ {
+ new KeyValuePair("id", "entity-123")
+ };
+
+ // 5 properties with medium values
+ _mediumProperties = new[]
+ {
+ new KeyValuePair("id", "entity-123"),
+ new KeyValuePair("name", "Sample Entity Name"),
+ new KeyValuePair("type", "ORGANIZATION"),
+ new KeyValuePair("frequency", 42),
+ new KeyValuePair("active", true)
+ };
+
+ // 20 properties with various values
+ _largeProperties = Enumerable.Range(0, 20)
+ .Select(i => new KeyValuePair($"property_{i}", $"value_{i}_with_some_content"))
+ .ToArray();
+
+ // 5 properties with large string values
+ _largeValueProperties = new[]
+ {
+ new KeyValuePair("id", "entity-123"),
+ new KeyValuePair("description", new string('x', 1000)),
+ new KeyValuePair("content", new string('y', 2000)),
+ new KeyValuePair("summary", new string('z', 500)),
+ new KeyValuePair("metadata", new string('w', 1500))
+ };
+ }
+
+ [Benchmark(Baseline = true)]
+ public string HashSmallProperties() => Hashing.GenerateSha512Hash(_smallProperties);
+
+ [Benchmark]
+ public string HashMediumProperties() => Hashing.GenerateSha512Hash(_mediumProperties);
+
+ [Benchmark]
+ public string HashLargeProperties() => Hashing.GenerateSha512Hash(_largeProperties);
+
+ [Benchmark]
+ public string HashLargeValueProperties() => Hashing.GenerateSha512Hash(_largeValueProperties);
+
+ [Benchmark]
+ public string HashWithTuples() => Hashing.GenerateSha512Hash(
+ ("id", "entity-123"),
+ ("name", "Sample Entity Name"),
+ ("type", "ORGANIZATION"),
+ ("frequency", (object?)42),
+ ("active", (object?)true));
+}
diff --git a/src/ManagedCode.GraphRag/Properties/AssemblyInfo.cs b/src/ManagedCode.GraphRag/Properties/AssemblyInfo.cs
index b2230db4b0..42ea7fad41 100644
--- a/src/ManagedCode.GraphRag/Properties/AssemblyInfo.cs
+++ b/src/ManagedCode.GraphRag/Properties/AssemblyInfo.cs
@@ -2,3 +2,4 @@
[assembly: InternalsVisibleTo("ManagedCode.GraphRag.Tests")]
[assembly: InternalsVisibleTo("ManagedCode.GraphRag.Tests.Integration")]
+[assembly: InternalsVisibleTo("ManagedCode.GraphRag.Benchmarks")]
diff --git a/src/ManagedCode.GraphRag/Utils/Hashing.cs b/src/ManagedCode.GraphRag/Utils/Hashing.cs
index 8e0e749efd..02b94949e7 100644
--- a/src/ManagedCode.GraphRag/Utils/Hashing.cs
+++ b/src/ManagedCode.GraphRag/Utils/Hashing.cs
@@ -5,22 +5,42 @@ namespace GraphRag.Utils;
public static class Hashing
{
+ private static Encoder Utf8Encoder { get; } = Encoding.UTF8.GetEncoder();
+
public static string GenerateSha512Hash(IEnumerable> fields)
{
ArgumentNullException.ThrowIfNull(fields);
- var builder = new StringBuilder();
+ using var hasher = IncrementalHash.CreateHash(HashAlgorithmName.SHA512);
+
+ Span buffer = stackalloc byte[512];
+
foreach (var field in fields)
{
- builder.Append(field.Key);
- builder.Append(':');
- builder.Append(field.Value);
- builder.Append('|');
+ AppendStringChunked(hasher, field.Key, buffer);
+ hasher.AppendData(":"u8);
+ AppendStringChunked(hasher, field.Value?.ToString(), buffer);
+ hasher.AppendData("|"u8);
}
- var bytes = Encoding.UTF8.GetBytes(builder.ToString());
- var hash = SHA512.HashData(bytes);
- return Convert.ToHexString(hash).ToLowerInvariant();
+ Span hash = stackalloc byte[64];
+ hasher.GetHashAndReset(hash);
+ return Convert.ToHexStringLower(hash);
+ }
+
+ private static void AppendStringChunked(IncrementalHash hasher, string? value, Span buffer)
+ {
+ if (string.IsNullOrEmpty(value)) return;
+
+ var remaining = value.AsSpan();
+
+ while (remaining.Length > 0)
+ {
+ Utf8Encoder.Convert(remaining, buffer, flush: true, out var charsUsed, out var bytesUsed, out _);
+
+ hasher.AppendData(buffer[..bytesUsed]);
+ remaining = remaining[charsUsed..];
+ }
}
public static string GenerateSha512Hash(params (string Key, object? Value)[] fields)
diff --git a/tests/ManagedCode.GraphRag.Tests/Utils/HashingTests.cs b/tests/ManagedCode.GraphRag.Tests/Utils/HashingTests.cs
new file mode 100644
index 0000000000..baaab08c68
--- /dev/null
+++ b/tests/ManagedCode.GraphRag.Tests/Utils/HashingTests.cs
@@ -0,0 +1,123 @@
+using GraphRag.Utils;
+
+namespace ManagedCode.GraphRag.Tests.Utils;
+
+public class HashingTests
+{
+ [Fact]
+ public void GenerateSha512Hash_WithSingleProperty_ReturnsConsistentHash()
+ {
+ var fields = new[] { new KeyValuePair("id", "entity-123") };
+
+ var hash1 = Hashing.GenerateSha512Hash(fields);
+ var hash2 = Hashing.GenerateSha512Hash(fields);
+
+ Assert.Equal(hash1, hash2);
+ Assert.Equal(128, hash1.Length); // SHA512 = 64 bytes = 128 hex chars
+ Assert.True(hash1.All(c => char.IsAsciiHexDigitLower(c) || char.IsDigit(c)));
+ }
+
+ [Fact]
+ public void GenerateSha512Hash_WithMultipleProperties_ReturnsConsistentHash()
+ {
+ var fields = new[]
+ {
+ new KeyValuePair("id", "123"),
+ new KeyValuePair("name", "Test"),
+ new KeyValuePair("count", 42),
+ };
+
+ var hash1 = Hashing.GenerateSha512Hash(fields);
+ var hash2 = Hashing.GenerateSha512Hash(fields);
+
+ Assert.Equal(hash1, hash2);
+ }
+
+ [Fact]
+ public void GenerateSha512Hash_WithEmptyValue_HandlesCorrectly()
+ {
+ var fields = new[] { new KeyValuePair("empty", "") };
+
+ var hash = Hashing.GenerateSha512Hash(fields);
+
+ Assert.NotEmpty(hash);
+ }
+
+ [Fact]
+ public void GenerateSha512Hash_WithNullValue_HandlesCorrectly()
+ {
+ var fields = new[] { new KeyValuePair("nullable", null) };
+
+ var hash = Hashing.GenerateSha512Hash(fields);
+
+ Assert.NotEmpty(hash);
+ }
+
+ [Fact]
+ public void GenerateSha512Hash_WithUnicodeValue_HandlesCorrectly()
+ {
+ var fields = new[] { new KeyValuePair("unicode", "日本語🎉émoji") };
+
+ var hash = Hashing.GenerateSha512Hash(fields);
+
+ Assert.Equal(128, hash.Length);
+ }
+
+ [Fact]
+ public void GenerateSha512Hash_WithLargeValue_HandlesCorrectly()
+ {
+ var largeValue = new string('x', 10_000);
+ var fields = new[] { new KeyValuePair("large", largeValue) };
+
+ var hash = Hashing.GenerateSha512Hash(fields);
+
+ Assert.Equal(128, hash.Length);
+ }
+
+ [Fact]
+ public void GenerateSha512Hash_DifferentInputs_ProduceDifferentHashes()
+ {
+ var fields1 = new[] { new KeyValuePair("id", "1") };
+ var fields2 = new[] { new KeyValuePair("id", "2") };
+
+ var hash1 = Hashing.GenerateSha512Hash(fields1);
+ var hash2 = Hashing.GenerateSha512Hash(fields2);
+
+ Assert.NotEqual(hash1, hash2);
+ }
+
+ [Fact]
+ public void GenerateSha512Hash_PropertyOrderMatters()
+ {
+ var fields1 = new[]
+ {
+ new KeyValuePair("a", "1"),
+ new KeyValuePair("b", "2"),
+ };
+ var fields2 = new[]
+ {
+ new KeyValuePair("b", "2"),
+ new KeyValuePair("a", "1"),
+ };
+
+ var hash1 = Hashing.GenerateSha512Hash(fields1);
+ var hash2 = Hashing.GenerateSha512Hash(fields2);
+
+ Assert.NotEqual(hash1, hash2);
+ }
+
+ [Fact]
+ public void GenerateSha512Hash_TuplesOverload_MatchesKeyValuePairOverload()
+ {
+ var kvpHash = Hashing.GenerateSha512Hash([
+ new KeyValuePair("id", "123"),
+ new KeyValuePair("name", "Test")
+ ]);
+
+ var tupleHash = Hashing.GenerateSha512Hash(
+ ("id", (object?)"123"),
+ ("name", (object?)"Test"));
+
+ Assert.Equal(kvpHash, tupleHash);
+ }
+}