Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@
<PackageVersion Include="Testcontainers.PostgreSql" Version="4.9.0" />
<PackageVersion Include="xunit" Version="2.9.3" />
<PackageVersion Include="xunit.runner.visualstudio" Version="3.1.5" />
<PackageVersion Include="BenchmarkDotNet" Version="0.14.0" />
</ItemGroup>
</Project>
3 changes: 3 additions & 0 deletions GraphRag.slnx
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,7 @@
<Folder Name="/tests/">
<Project Path="tests/ManagedCode.GraphRag.Tests/ManagedCode.GraphRag.Tests.csproj" />
</Folder>
<Folder Name="/benchmarks/">
<Project Path="benchmarks/ManagedCode.GraphRag.Benchmarks/ManagedCode.GraphRag.Benchmarks.csproj" />
</Folder>
</Solution>
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
using BenchmarkDotNet.Attributes;
using GraphRag.Cache;
using Microsoft.Extensions.Caching.Memory;

namespace ManagedCode.GraphRag.Benchmarks.Cache;

[MemoryDiagnoser]
public class MemoryPipelineCacheBenchmarks
{
private IMemoryCache _memoryCache = null!;
private MemoryPipelineCache _cache = null!;
private string[] _keys = null!;
private object[] _values = null!;

[Params(1_000, 10_000, 100_000)]
public int EntryCount { get; set; }

[GlobalSetup]
public void Setup()
{
_memoryCache = new MemoryCache(new MemoryCacheOptions());
_cache = new MemoryPipelineCache(_memoryCache);

_keys = new string[EntryCount];
_values = new object[EntryCount];

for (var i = 0; i < EntryCount; i++)
{
_keys[i] = $"key-{i:D8}";
_values[i] = new { Id = i, Name = $"Value-{i}", Data = new byte[100] };
}
}

[GlobalCleanup]
public void Cleanup()
{
_memoryCache.Dispose();
}

[Benchmark]
public async Task SetEntries()
{
for (var i = 0; i < EntryCount; i++)
{
await _cache.SetAsync(_keys[i], _values[i]);
}
}

[Benchmark]
public async Task GetEntries()
{
// Pre-populate
for (var i = 0; i < EntryCount; i++)
{
await _cache.SetAsync(_keys[i], _values[i]);
}

// Measure gets
for (var i = 0; i < EntryCount; i++)
{
_ = await _cache.GetAsync(_keys[i]);
}
}

[Benchmark]
public async Task HasEntries()
{
// Pre-populate
for (var i = 0; i < EntryCount; i++)
{
await _cache.SetAsync(_keys[i], _values[i]);
}

// Measure has checks
for (var i = 0; i < EntryCount; i++)
{
_ = await _cache.HasAsync(_keys[i]);
}
}

[Benchmark]
public async Task ClearCache()
{
// Pre-populate
for (var i = 0; i < EntryCount; i++)
{
await _cache.SetAsync(_keys[i], _values[i]);
}

// Measure clear
await _cache.ClearAsync();
}

[Benchmark]
public IPipelineCache CreateChildScope()
{
return _cache.CreateChild("child-scope");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
using BenchmarkDotNet.Attributes;
using GraphRag.Chunking;
using GraphRag.Config;

namespace ManagedCode.GraphRag.Benchmarks.Chunking;

[MemoryDiagnoser]
public class MarkdownTextChunkerBenchmarks
{
private MarkdownTextChunker _chunker = null!;
private ChunkSlice[] _smallDocument = null!;
private ChunkSlice[] _mediumDocument = null!;
private ChunkSlice[] _largeDocument = null!;
private ChunkingConfig _config = null!;

[Params(512, 1024, 2048)]
public int ChunkSize { get; set; }

[Params(0, 64, 128)]
public int ChunkOverlap { get; set; }

[GlobalSetup]
public void Setup()
{
_chunker = new MarkdownTextChunker();
_config = new ChunkingConfig
{
Size = ChunkSize,
Overlap = ChunkOverlap,
Strategy = ChunkStrategyType.Sentence
};

// Generate test documents of different sizes
_smallDocument = new[] { new ChunkSlice("doc1", GenerateMarkdownDocument(1_000)) };
_mediumDocument = new[] { new ChunkSlice("doc1", GenerateMarkdownDocument(100_000)) };
_largeDocument = new[] { new ChunkSlice("doc1", GenerateMarkdownDocument(1_000_000)) };
}

[Benchmark]
public IReadOnlyList<TextChunk> ChunkSmallDocument()
{
return _chunker.Chunk(_smallDocument, _config);
}

[Benchmark]
public IReadOnlyList<TextChunk> ChunkMediumDocument()
{
return _chunker.Chunk(_mediumDocument, _config);
}

[Benchmark]
public IReadOnlyList<TextChunk> ChunkLargeDocument()
{
return _chunker.Chunk(_largeDocument, _config);
}

private static string GenerateMarkdownDocument(int approximateLength)
{
var paragraphs = new[]
{
"# Introduction\n\nThis is a sample markdown document for benchmarking purposes. It contains various markdown elements including headers, paragraphs, lists, and code blocks.\n\n",
"## Section One\n\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris.\n\n",
"### Subsection A\n\nDuis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident.\n\n",
"- First item in the list\n- Second item with more content\n- Third item explaining something important\n\n",
"1. Numbered first item\n2. Numbered second item\n3. Numbered third item with explanation\n\n",
"```csharp\npublic class Example\n{\n public void Method() { }\n}\n```\n\n",
"## Section Two\n\nSunt in culpa qui officia deserunt mollit anim id est laborum. Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium.\n\n",
"> This is a blockquote that spans multiple lines and contains important information that should be preserved during chunking.\n\n",
"### Subsection B\n\nNemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt.\n\n",
"| Column 1 | Column 2 | Column 3 |\n|----------|----------|----------|\n| Data 1 | Data 2 | Data 3 |\n| Data 4 | Data 5 | Data 6 |\n\n"
};

var result = new System.Text.StringBuilder(approximateLength + 1000);
var index = 0;

while (result.Length < approximateLength)
{
result.Append(paragraphs[index % paragraphs.Length]);
index++;
}

return result.ToString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
using BenchmarkDotNet.Attributes;
using GraphRag.Chunking;
using GraphRag.Config;

namespace ManagedCode.GraphRag.Benchmarks.Chunking;

[MemoryDiagnoser]
public class TokenTextChunkerBenchmarks
{
private TokenTextChunker _chunker = null!;
private ChunkSlice[] _smallDocument = null!;
private ChunkSlice[] _mediumDocument = null!;
private ChunkSlice[] _largeDocument = null!;
private ChunkingConfig _config = null!;

[Params(512, 1024, 2048)]
public int ChunkSize { get; set; }

[Params(0, 64, 128)]
public int ChunkOverlap { get; set; }

[GlobalSetup]
public void Setup()
{
_chunker = new TokenTextChunker();
_config = new ChunkingConfig
{
Size = ChunkSize,
Overlap = ChunkOverlap,
Strategy = ChunkStrategyType.Tokens
};

// Generate plain text documents of different sizes
_smallDocument = new[] { new ChunkSlice("doc1", GeneratePlainTextDocument(1_000)) };
_mediumDocument = new[] { new ChunkSlice("doc1", GeneratePlainTextDocument(100_000)) };
_largeDocument = new[] { new ChunkSlice("doc1", GeneratePlainTextDocument(1_000_000)) };
}

[Benchmark]
public IReadOnlyList<TextChunk> ChunkSmallDocument()
{
return _chunker.Chunk(_smallDocument, _config);
}

[Benchmark]
public IReadOnlyList<TextChunk> ChunkMediumDocument()
{
return _chunker.Chunk(_mediumDocument, _config);
}

[Benchmark]
public IReadOnlyList<TextChunk> ChunkLargeDocument()
{
return _chunker.Chunk(_largeDocument, _config);
}

private static string GeneratePlainTextDocument(int approximateLength)
{
var sentences = new[]
{
"The quick brown fox jumps over the lazy dog. ",
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. ",
"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ",
"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. ",
"Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore. ",
"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia. ",
"Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit. ",
"Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet. "
};

var result = new System.Text.StringBuilder(approximateLength + 200);
var index = 0;

while (result.Length < approximateLength)
{
result.Append(sentences[index % sentences.Length]);
index++;
}

return result.ToString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
using System.Collections.Immutable;
using BenchmarkDotNet.Attributes;
using GraphRag.Community;
using GraphRag.Config;
using GraphRag.Entities;
using GraphRag.Relationships;

namespace ManagedCode.GraphRag.Benchmarks.Community;

[MemoryDiagnoser]
public class CommunityBuilderBenchmarks
{
private EntityRecord[] _entities = null!;
private RelationshipRecord[] _relationships = null!;
private ClusterGraphConfig _labelPropagationConfig = null!;
private ClusterGraphConfig _connectedComponentsConfig = null!;

[Params(100, 1_000, 5_000)]
public int NodeCount { get; set; }

[GlobalSetup]
public void Setup()
{
_labelPropagationConfig = new ClusterGraphConfig
{
Algorithm = CommunityDetectionAlgorithm.FastLabelPropagation,
MaxIterations = 20,
MaxClusterSize = 25,
Seed = 42
};

_connectedComponentsConfig = new ClusterGraphConfig
{
Algorithm = CommunityDetectionAlgorithm.ConnectedComponents,
MaxClusterSize = 25,
Seed = 42
};

(_entities, _relationships) = GenerateGraph(NodeCount, avgEdgesPerNode: 5);
}

[Benchmark(Baseline = true)]
public IReadOnlyList<CommunityRecord> FastLabelPropagation()
{
return CommunityBuilder.Build(_entities, _relationships, _labelPropagationConfig);
}

[Benchmark]
public IReadOnlyList<CommunityRecord> ConnectedComponents()
{
return CommunityBuilder.Build(_entities, _relationships, _connectedComponentsConfig);
}

private static (EntityRecord[] Entities, RelationshipRecord[] Relationships) GenerateGraph(
int nodeCount,
int avgEdgesPerNode)
{
var random = new Random(42);
var entities = new EntityRecord[nodeCount];

for (var i = 0; i < nodeCount; i++)
{
entities[i] = new EntityRecord(
Id: $"entity-{i}",
HumanReadableId: i,
Title: $"Entity_{i}",
Type: "ENTITY",
Description: $"Description for entity {i}",
TextUnitIds: ImmutableArray.Create($"tu-{i}"),
Frequency: 1,
Degree: 0,
X: 0,
Y: 0);
}

var totalEdges = nodeCount * avgEdgesPerNode;
var relationships = new List<RelationshipRecord>(totalEdges);

for (var i = 0; i < totalEdges; i++)
{
var sourceIdx = random.Next(nodeCount);
var targetIdx = random.Next(nodeCount);

if (sourceIdx == targetIdx)
{
targetIdx = (targetIdx + 1) % nodeCount;
}

relationships.Add(new RelationshipRecord(
Id: $"rel-{i}",
HumanReadableId: i,
Source: entities[sourceIdx].Title,
Target: entities[targetIdx].Title,
Type: "RELATED_TO",
Description: null,
Weight: random.NextDouble(),
CombinedDegree: 2,
TextUnitIds: ImmutableArray.Create($"tu-{sourceIdx}", $"tu-{targetIdx}"),
Bidirectional: false));
}

return (entities, relationships.ToArray());
}
}
Loading