From b2b22f5ef48f4496beeeec627c862ded25aa0d71 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Feb 2026 06:22:29 +0000 Subject: [PATCH 1/6] Initial plan From 30e9fd49e72fe92195d89474199ce052f55003d0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Feb 2026 06:27:08 +0000 Subject: [PATCH 2/6] Add HTTP resilience with retry policy for Azure OpenAI rate limiting - Add Microsoft.Extensions.Http.Resilience package - Configure standard resilience handler with exponential backoff - Handle HTTP 429 (rate limit) with automatic retry - Respect Retry-After header from Azure OpenAI - Add circuit breaker and timeout configuration - Document resilience implementation Co-authored-by: BenjaminMichaelis <22186029+BenjaminMichaelis@users.noreply.github.com> --- Directory.Packages.props | 1 + .../EssentialCSharp.Chat.Common.csproj | 1 + .../Extensions/ServiceCollectionExtensions.cs | 46 +++++ docs/AZURE_OPENAI_RESILIENCE.md | 166 ++++++++++++++++++ 4 files changed, 214 insertions(+) create mode 100644 docs/AZURE_OPENAI_RESILIENCE.md diff --git a/Directory.Packages.props b/Directory.Packages.props index 2b4bd77b..5e885b1f 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -36,6 +36,7 @@ + diff --git a/EssentialCSharp.Chat.Shared/EssentialCSharp.Chat.Common.csproj b/EssentialCSharp.Chat.Shared/EssentialCSharp.Chat.Common.csproj index e600d10f..f51ecef9 100644 --- a/EssentialCSharp.Chat.Shared/EssentialCSharp.Chat.Common.csproj +++ b/EssentialCSharp.Chat.Shared/EssentialCSharp.Chat.Common.csproj @@ -6,6 +6,7 @@ + diff --git a/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs b/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs index 059a6d13..987eeca3 100644 --- a/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs +++ b/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs @@ -4,8 +4,10 @@ using EssentialCSharp.Chat.Common.Services; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Http.Resilience; using Microsoft.SemanticKernel; using Npgsql; +using Polly; namespace EssentialCSharp.Chat.Common.Extensions; @@ -38,6 +40,9 @@ public static IServiceCollection AddAzureOpenAIServices( var endpoint = new Uri(aiOptions.Endpoint); + // Configure HTTP resilience for Azure OpenAI requests + ConfigureAzureOpenAIResilience(services); + // Register Azure OpenAI services with Managed Identity authentication #pragma warning disable SKEXP0010 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. services.AddAzureOpenAIChatClient( @@ -71,6 +76,44 @@ public static IServiceCollection AddAzureOpenAIServices( return services; } + /// + /// Configures HTTP resilience (retry, circuit breaker, timeout) for Azure OpenAI HTTP clients. + /// This handles rate limiting (HTTP 429) and transient errors with exponential backoff. + /// + /// The service collection to configure + private static void ConfigureAzureOpenAIResilience(IServiceCollection services) + { + // Configure resilience for all HTTP clients used by Azure OpenAI services + services.ConfigureHttpClientDefaults(httpClientBuilder => + { + httpClientBuilder.AddStandardResilienceHandler(options => + { + // Configure retry strategy for rate limiting and transient errors + options.Retry.MaxRetryAttempts = 5; + options.Retry.Delay = TimeSpan.FromSeconds(2); + options.Retry.BackoffType = DelayBackoffType.Exponential; + options.Retry.UseJitter = true; + + // The standard resilience handler already handles: + // - HTTP 429 (Too Many Requests / Rate Limit) + // - HTTP 408 (Request Timeout) + // - HTTP 5xx (Server Errors) + // - Respects Retry-After header automatically + + // Configure circuit breaker to prevent overwhelming the service + options.CircuitBreaker.SamplingDuration = TimeSpan.FromSeconds(30); + options.CircuitBreaker.BreakDuration = TimeSpan.FromSeconds(15); + options.CircuitBreaker.FailureRatio = 0.2; // Break if 20% of requests fail + + // Configure timeout for individual attempts + options.AttemptTimeout.Timeout = TimeSpan.FromSeconds(30); + + // Configure total timeout for all retry attempts + options.TotalRequestTimeout.Timeout = TimeSpan.FromMinutes(3); + }); + }); + } + /// /// Adds Azure OpenAI and related AI services to the service collection using configuration /// @@ -183,6 +226,9 @@ public static IServiceCollection AddAzureOpenAIServicesWithApiKey( var endpoint = new Uri(aiOptions.Endpoint); + // Configure HTTP resilience for Azure OpenAI requests + ConfigureAzureOpenAIResilience(services); + // Register Azure OpenAI services with API key authentication #pragma warning disable SKEXP0010 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. services.AddAzureOpenAIChatClient( diff --git a/docs/AZURE_OPENAI_RESILIENCE.md b/docs/AZURE_OPENAI_RESILIENCE.md new file mode 100644 index 00000000..5be9a1c1 --- /dev/null +++ b/docs/AZURE_OPENAI_RESILIENCE.md @@ -0,0 +1,166 @@ +# Azure OpenAI Resilience Configuration + +## Overview + +This document describes the resilience and retry mechanisms implemented for Azure OpenAI API calls in the EssentialCSharp.Web application. These mechanisms handle transient failures, rate limiting (HTTP 429), and other recoverable errors. + +## Implementation + +### Retry Policy + +The application uses Microsoft.Extensions.Http.Resilience to provide automatic retry capabilities for all Azure OpenAI HTTP client calls. This includes: + +- **Embedding Generation** (via `IEmbeddingGenerator`) +- **Chat Completions** (via `IChatCompletionService`) +- **Vector Store Operations** + +### Configuration Details + +The resilience handler is configured in `ServiceCollectionExtensions.ConfigureAzureOpenAIResilience()` with the following settings: + +#### Retry Strategy +- **Max Retry Attempts**: 5 +- **Initial Delay**: 2 seconds +- **Backoff Type**: Exponential with jitter +- **Handles**: + - HTTP 429 (Too Many Requests / Rate Limit Exceeded) + - HTTP 408 (Request Timeout) + - HTTP 5xx (Server Errors) + - Network failures and transient errors + +#### Retry-After Header Support +The standard resilience handler automatically respects the `Retry-After` header sent by Azure OpenAI when rate limits are hit. This ensures: +- The application waits the exact duration specified by Azure +- No unnecessary retries that would continue to hit rate limits +- Efficient use of rate limit quotas + +#### Circuit Breaker +- **Sampling Duration**: 30 seconds +- **Break Duration**: 15 seconds +- **Failure Ratio**: 20% (breaks if 20% of requests fail) + +This prevents overwhelming the Azure OpenAI service during prolonged outages or severe rate limiting. + +#### Timeouts +- **Attempt Timeout**: 30 seconds per individual request +- **Total Request Timeout**: 3 minutes for all retry attempts combined + +## How It Works + +### Rate Limit Scenario (HTTP 429) + +When Azure OpenAI returns an HTTP 429 error: + +1. The resilience handler catches the error +2. Checks the `Retry-After` header (e.g., "retry after 4 seconds") +3. Waits for the specified duration (with jitter to prevent thundering herd) +4. Retries the request automatically +5. Repeats up to 5 times with exponential backoff +6. If all retries fail, the exception is propagated to the caller + +### Example Flow + +``` +Request 1: Embedding Generation + → HTTP 429 (Retry-After: 4 seconds) + → Wait 4 seconds + jitter + → Retry +Request 2: Embedding Generation + → HTTP 429 (Retry-After: 8 seconds) + → Wait 8 seconds + jitter + → Retry +Request 3: Embedding Generation + → HTTP 200 ✓ +``` + +## Benefits + +### For Rate Limiting +- Automatic handling of Azure OpenAI quota limits +- Respects server-specified retry delays +- Prevents quota waste from premature retries +- Exponential backoff prevents retry storms + +### For Reliability +- Handles transient network failures +- Recovers from temporary service outages +- Circuit breaker prevents cascading failures +- Configurable timeouts prevent infinite waits + +## Usage + +The resilience configuration is applied automatically when using: + +```csharp +services.AddAzureOpenAIServices(configuration); +``` + +Or with explicit options: + +```csharp +services.AddAzureOpenAIServices(aiOptions, postgresConnectionString, credential); +``` + +No additional code changes are required in application logic - all retry and error handling is transparent. + +## Monitoring + +The resilience handlers emit telemetry through: +- **Application Insights** (when configured) +- **OpenTelemetry** (standard metrics) +- **Console logging** (for development) + +Key metrics include: +- Retry attempts +- Circuit breaker state changes +- Request durations +- Failure rates + +## Best Practices + +### For Development +- Monitor retry counts in logs +- Test with rate limiting scenarios +- Verify Retry-After header handling + +### For Production +- Set up alerts for high retry rates +- Monitor circuit breaker trips +- Track rate limit quota usage +- Consider increasing Azure OpenAI quota if needed + +## Troubleshooting + +### Excessive Retries +If you see many retries: +- Check Azure OpenAI quota limits +- Review request parallelism (currently set to MaxDegreeOfParallelism = 5) +- Consider requesting quota increase from Azure + +### Circuit Breaker Trips +If the circuit breaker frequently opens: +- Review Azure OpenAI service health +- Check for deployment issues +- Verify network connectivity +- Consider increasing the failure ratio threshold + +### Timeouts +If requests timeout despite retries: +- Check individual attempt timeout (30s) +- Review total timeout (3 minutes) +- Verify Azure OpenAI service performance +- Consider increasing timeout values for batch operations + +## Related Documentation + +- [Microsoft.Extensions.Http.Resilience](https://learn.microsoft.com/en-us/dotnet/core/resilience/http-resilience) +- [Azure OpenAI Quota Management](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/quota) +- [Polly Resilience Library](https://www.pollydocs.org/) +- [HTTP Retry Pattern](https://learn.microsoft.com/en-us/azure/architecture/patterns/retry) + +## Version History + +- **v1.0** (2026-02-10): Initial implementation with standard resilience handler + - Added exponential backoff retry strategy + - Configured circuit breaker and timeouts + - Automatic Retry-After header support From 7e4979aa23c15b6dca29cd0eb4114579654d5198 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Feb 2026 06:31:25 +0000 Subject: [PATCH 3/6] Refine resilience configuration with opt-out parameter - Add configureResilience parameter to allow selective resilience - Update documentation with usage examples for mixed-use applications - Improve code comments explaining when to use which approach - Keep default behavior (resilience enabled) for Chat app Co-authored-by: BenjaminMichaelis <22186029+BenjaminMichaelis@users.noreply.github.com> --- .../Extensions/ServiceCollectionExtensions.cs | 47 +++++++++++++++---- docs/AZURE_OPENAI_RESILIENCE.md | 16 +++++-- 2 files changed, 51 insertions(+), 12 deletions(-) diff --git a/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs b/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs index 987eeca3..f39d3a4d 100644 --- a/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs +++ b/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs @@ -22,12 +22,14 @@ public static class ServiceCollectionExtensions /// The AI configuration options /// The PostgreSQL connection string for the vector store /// The token credential to use for authentication. If null, DefaultAzureCredential will be used. + /// Whether to configure HTTP resilience for all HTTP clients. Default is true. Set to false if you want to configure resilience separately. /// The service collection for chaining public static IServiceCollection AddAzureOpenAIServices( this IServiceCollection services, AIOptions aiOptions, string postgresConnectionString, - TokenCredential? credential = null) + TokenCredential? credential = null, + bool configureResilience = true) { // Use DefaultAzureCredential if no credential is provided // This works both locally (using Azure CLI, Visual Studio, etc.) and in Azure (using Managed Identity) @@ -40,8 +42,11 @@ public static IServiceCollection AddAzureOpenAIServices( var endpoint = new Uri(aiOptions.Endpoint); - // Configure HTTP resilience for Azure OpenAI requests - ConfigureAzureOpenAIResilience(services); + // Configure HTTP resilience for Azure OpenAI requests if requested + if (configureResilience) + { + ConfigureAzureOpenAIResilience(services); + } // Register Azure OpenAI services with Managed Identity authentication #pragma warning disable SKEXP0010 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. @@ -81,9 +86,26 @@ public static IServiceCollection AddAzureOpenAIServices( /// This handles rate limiting (HTTP 429) and transient errors with exponential backoff. /// /// The service collection to configure + /// + /// This method configures resilience for ALL HTTP clients created via IHttpClientFactory. + /// This is appropriate when your application ONLY uses Azure OpenAI HTTP clients. + /// + /// If your application has other HTTP clients (e.g., third-party APIs) that shouldn't + /// have the same retry behavior, set configureResilience=false when calling + /// AddAzureOpenAIServices and configure resilience on a per-client basis instead. + /// + /// For Azure OpenAI services specifically, the resilience configuration: + /// - Retries HTTP 429 (rate limit), 408 (timeout), and 5xx errors + /// - Respects Retry-After headers from Azure OpenAI + /// - Uses exponential backoff with jitter + /// - Implements circuit breaker pattern + /// private static void ConfigureAzureOpenAIResilience(IServiceCollection services) { - // Configure resilience for all HTTP clients used by Azure OpenAI services + // Configure resilience for all HTTP clients created via IHttpClientFactory + // This is appropriate for applications that ONLY use Azure OpenAI services + // For mixed-use applications, consider setting configureResilience=false + // and applying resilience per-client instead. services.ConfigureHttpClientDefaults(httpClientBuilder => { httpClientBuilder.AddStandardResilienceHandler(options => @@ -120,11 +142,13 @@ private static void ConfigureAzureOpenAIResilience(IServiceCollection services) /// The service collection to add services to /// The configuration to read AIOptions from /// Optional token credential to use for authentication. If null, DefaultAzureCredential will be used. + /// Whether to configure HTTP resilience for all HTTP clients. Default is true. Set to false if you want to configure resilience separately. /// The service collection for chaining public static IServiceCollection AddAzureOpenAIServices( this IServiceCollection services, IConfiguration configuration, - TokenCredential? credential = null) + TokenCredential? credential = null, + bool configureResilience = true) { // Configure AI options from configuration services.Configure(configuration.GetSection("AIOptions")); @@ -139,7 +163,7 @@ public static IServiceCollection AddAzureOpenAIServices( var postgresConnectionString = configuration.GetConnectionString("PostgresVectorStore") ?? throw new InvalidOperationException("Connection string 'PostgresVectorStore' not found."); - return services.AddAzureOpenAIServices(aiOptions, postgresConnectionString, credential); + return services.AddAzureOpenAIServices(aiOptions, postgresConnectionString, credential, configureResilience); } /// @@ -206,13 +230,15 @@ private static IServiceCollection AddPostgresVectorStoreWithManagedIdentity( /// The AI configuration options /// The PostgreSQL connection string for the vector store /// The API key for Azure OpenAI authentication + /// Whether to configure HTTP resilience for all HTTP clients. Default is true. Set to false if you want to configure resilience separately. /// The service collection for chaining [Obsolete("API key authentication is not recommended for production. Use AddAzureOpenAIServices with Managed Identity instead.")] public static IServiceCollection AddAzureOpenAIServicesWithApiKey( this IServiceCollection services, AIOptions aiOptions, string postgresConnectionString, - string apiKey) + string apiKey, + bool configureResilience = true) { if (string.IsNullOrEmpty(apiKey)) { @@ -226,8 +252,11 @@ public static IServiceCollection AddAzureOpenAIServicesWithApiKey( var endpoint = new Uri(aiOptions.Endpoint); - // Configure HTTP resilience for Azure OpenAI requests - ConfigureAzureOpenAIResilience(services); + // Configure HTTP resilience for Azure OpenAI requests if requested + if (configureResilience) + { + ConfigureAzureOpenAIResilience(services); + } // Register Azure OpenAI services with API key authentication #pragma warning disable SKEXP0010 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. diff --git a/docs/AZURE_OPENAI_RESILIENCE.md b/docs/AZURE_OPENAI_RESILIENCE.md index 5be9a1c1..6c4bdb6e 100644 --- a/docs/AZURE_OPENAI_RESILIENCE.md +++ b/docs/AZURE_OPENAI_RESILIENCE.md @@ -89,16 +89,26 @@ Request 3: Embedding Generation ## Usage -The resilience configuration is applied automatically when using: +The resilience configuration is applied automatically when using the Chat application or any application that ONLY uses Azure OpenAI services: ```csharp +// In applications that ONLY use Azure OpenAI (like EssentialCSharp.Chat) services.AddAzureOpenAIServices(configuration); ``` -Or with explicit options: +For applications with multiple HTTP clients (e.g., the Web application that also uses hCaptcha and Mailjet): ```csharp -services.AddAzureOpenAIServices(aiOptions, postgresConnectionString, credential); +// Option 1: Disable automatic resilience and configure per-client +services.AddAzureOpenAIServices(configuration, configureResilience: false); + +// Then configure resilience for specific clients as needed +services.AddHttpClient("MyAzureOpenAIClient") + .AddStandardResilienceHandler(/* custom options */); + +// Option 2: Let the default resilience apply to all clients +// This is usually fine as the resilience policies are reasonable for most HTTP APIs +services.AddAzureOpenAIServices(configuration); ``` No additional code changes are required in application logic - all retry and error handling is transparent. From 194c2bbc394caecac16ad73a916a98461b21c10e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Feb 2026 06:33:16 +0000 Subject: [PATCH 4/6] Add detailed explanation of ConfigureHttpClientDefaults usage - Explain Semantic Kernel limitation requiring ConfigureHttpClientDefaults - Document why named/typed client approach is not possible - Clarify usage scenarios for mixed-use vs dedicated applications - Improve inline code comments with technical justification Co-authored-by: BenjaminMichaelis <22186029+BenjaminMichaelis@users.noreply.github.com> --- .../Extensions/ServiceCollectionExtensions.cs | 9 +++++++++ docs/AZURE_OPENAI_RESILIENCE.md | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs b/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs index f39d3a4d..32ea6c75 100644 --- a/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs +++ b/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs @@ -94,6 +94,12 @@ public static IServiceCollection AddAzureOpenAIServices( /// have the same retry behavior, set configureResilience=false when calling /// AddAzureOpenAIServices and configure resilience on a per-client basis instead. /// + /// IMPORTANT: The Semantic Kernel's AddAzureOpenAI* extension methods (used in this class) + /// do NOT expose options to configure specific named or typed HttpClients. The internal + /// implementation creates HttpClient instances through IHttpClientFactory without + /// providing hooks for per-client configuration. Therefore, ConfigureHttpClientDefaults + /// is the ONLY way to apply resilience to Azure OpenAI clients when using Semantic Kernel. + /// /// For Azure OpenAI services specifically, the resilience configuration: /// - Retries HTTP 429 (rate limit), 408 (timeout), and 5xx errors /// - Respects Retry-After headers from Azure OpenAI @@ -106,6 +112,9 @@ private static void ConfigureAzureOpenAIResilience(IServiceCollection services) // This is appropriate for applications that ONLY use Azure OpenAI services // For mixed-use applications, consider setting configureResilience=false // and applying resilience per-client instead. + // + // Note: The Semantic Kernel's AddAzureOpenAI* methods do not support named/typed + // HttpClient configuration, so ConfigureHttpClientDefaults is required. services.ConfigureHttpClientDefaults(httpClientBuilder => { httpClientBuilder.AddStandardResilienceHandler(options => diff --git a/docs/AZURE_OPENAI_RESILIENCE.md b/docs/AZURE_OPENAI_RESILIENCE.md index 6c4bdb6e..928ef36b 100644 --- a/docs/AZURE_OPENAI_RESILIENCE.md +++ b/docs/AZURE_OPENAI_RESILIENCE.md @@ -14,6 +14,18 @@ The application uses Microsoft.Extensions.Http.Resilience to provide automatic r - **Chat Completions** (via `IChatCompletionService`) - **Vector Store Operations** +### Why ConfigureHttpClientDefaults? + +The Semantic Kernel's `AddAzureOpenAI*` extension methods (e.g., `AddAzureOpenAIChatClient`, `AddAzureOpenAIEmbeddingGenerator`) do not expose options to configure specific named or typed HttpClients. The internal implementation creates HttpClient instances through `IHttpClientFactory` without providing hooks for per-client configuration. + +Therefore, `ConfigureHttpClientDefaults` is the **only way** to apply resilience policies to Azure OpenAI clients when using Semantic Kernel and Microsoft.Extensions.AI. + +For applications that ONLY use Azure OpenAI services (like the Chat console application), this is the ideal approach. For mixed-use applications (like the Web application with hCaptcha and Mailjet clients), you can: + +1. Set `configureResilience: false` when calling `AddAzureOpenAIServices` +2. Configure resilience on a per-client basis for other HTTP clients +3. Or accept that all HTTP clients will have the same resilience behavior (which is often acceptable) + ### Configuration Details The resilience handler is configured in `ServiceCollectionExtensions.ConfigureAzureOpenAIResilience()` with the following settings: From 1c31bb44bc8a20effaf47cca2af6b3a835ee2fe1 Mon Sep 17 00:00:00 2001 From: Benjamin Michaelis Date: Mon, 9 Feb 2026 22:39:34 -0800 Subject: [PATCH 5/6] Delete docs/AZURE_OPENAI_RESILIENCE.md --- docs/AZURE_OPENAI_RESILIENCE.md | 188 -------------------------------- 1 file changed, 188 deletions(-) delete mode 100644 docs/AZURE_OPENAI_RESILIENCE.md diff --git a/docs/AZURE_OPENAI_RESILIENCE.md b/docs/AZURE_OPENAI_RESILIENCE.md deleted file mode 100644 index 928ef36b..00000000 --- a/docs/AZURE_OPENAI_RESILIENCE.md +++ /dev/null @@ -1,188 +0,0 @@ -# Azure OpenAI Resilience Configuration - -## Overview - -This document describes the resilience and retry mechanisms implemented for Azure OpenAI API calls in the EssentialCSharp.Web application. These mechanisms handle transient failures, rate limiting (HTTP 429), and other recoverable errors. - -## Implementation - -### Retry Policy - -The application uses Microsoft.Extensions.Http.Resilience to provide automatic retry capabilities for all Azure OpenAI HTTP client calls. This includes: - -- **Embedding Generation** (via `IEmbeddingGenerator`) -- **Chat Completions** (via `IChatCompletionService`) -- **Vector Store Operations** - -### Why ConfigureHttpClientDefaults? - -The Semantic Kernel's `AddAzureOpenAI*` extension methods (e.g., `AddAzureOpenAIChatClient`, `AddAzureOpenAIEmbeddingGenerator`) do not expose options to configure specific named or typed HttpClients. The internal implementation creates HttpClient instances through `IHttpClientFactory` without providing hooks for per-client configuration. - -Therefore, `ConfigureHttpClientDefaults` is the **only way** to apply resilience policies to Azure OpenAI clients when using Semantic Kernel and Microsoft.Extensions.AI. - -For applications that ONLY use Azure OpenAI services (like the Chat console application), this is the ideal approach. For mixed-use applications (like the Web application with hCaptcha and Mailjet clients), you can: - -1. Set `configureResilience: false` when calling `AddAzureOpenAIServices` -2. Configure resilience on a per-client basis for other HTTP clients -3. Or accept that all HTTP clients will have the same resilience behavior (which is often acceptable) - -### Configuration Details - -The resilience handler is configured in `ServiceCollectionExtensions.ConfigureAzureOpenAIResilience()` with the following settings: - -#### Retry Strategy -- **Max Retry Attempts**: 5 -- **Initial Delay**: 2 seconds -- **Backoff Type**: Exponential with jitter -- **Handles**: - - HTTP 429 (Too Many Requests / Rate Limit Exceeded) - - HTTP 408 (Request Timeout) - - HTTP 5xx (Server Errors) - - Network failures and transient errors - -#### Retry-After Header Support -The standard resilience handler automatically respects the `Retry-After` header sent by Azure OpenAI when rate limits are hit. This ensures: -- The application waits the exact duration specified by Azure -- No unnecessary retries that would continue to hit rate limits -- Efficient use of rate limit quotas - -#### Circuit Breaker -- **Sampling Duration**: 30 seconds -- **Break Duration**: 15 seconds -- **Failure Ratio**: 20% (breaks if 20% of requests fail) - -This prevents overwhelming the Azure OpenAI service during prolonged outages or severe rate limiting. - -#### Timeouts -- **Attempt Timeout**: 30 seconds per individual request -- **Total Request Timeout**: 3 minutes for all retry attempts combined - -## How It Works - -### Rate Limit Scenario (HTTP 429) - -When Azure OpenAI returns an HTTP 429 error: - -1. The resilience handler catches the error -2. Checks the `Retry-After` header (e.g., "retry after 4 seconds") -3. Waits for the specified duration (with jitter to prevent thundering herd) -4. Retries the request automatically -5. Repeats up to 5 times with exponential backoff -6. If all retries fail, the exception is propagated to the caller - -### Example Flow - -``` -Request 1: Embedding Generation - → HTTP 429 (Retry-After: 4 seconds) - → Wait 4 seconds + jitter - → Retry -Request 2: Embedding Generation - → HTTP 429 (Retry-After: 8 seconds) - → Wait 8 seconds + jitter - → Retry -Request 3: Embedding Generation - → HTTP 200 ✓ -``` - -## Benefits - -### For Rate Limiting -- Automatic handling of Azure OpenAI quota limits -- Respects server-specified retry delays -- Prevents quota waste from premature retries -- Exponential backoff prevents retry storms - -### For Reliability -- Handles transient network failures -- Recovers from temporary service outages -- Circuit breaker prevents cascading failures -- Configurable timeouts prevent infinite waits - -## Usage - -The resilience configuration is applied automatically when using the Chat application or any application that ONLY uses Azure OpenAI services: - -```csharp -// In applications that ONLY use Azure OpenAI (like EssentialCSharp.Chat) -services.AddAzureOpenAIServices(configuration); -``` - -For applications with multiple HTTP clients (e.g., the Web application that also uses hCaptcha and Mailjet): - -```csharp -// Option 1: Disable automatic resilience and configure per-client -services.AddAzureOpenAIServices(configuration, configureResilience: false); - -// Then configure resilience for specific clients as needed -services.AddHttpClient("MyAzureOpenAIClient") - .AddStandardResilienceHandler(/* custom options */); - -// Option 2: Let the default resilience apply to all clients -// This is usually fine as the resilience policies are reasonable for most HTTP APIs -services.AddAzureOpenAIServices(configuration); -``` - -No additional code changes are required in application logic - all retry and error handling is transparent. - -## Monitoring - -The resilience handlers emit telemetry through: -- **Application Insights** (when configured) -- **OpenTelemetry** (standard metrics) -- **Console logging** (for development) - -Key metrics include: -- Retry attempts -- Circuit breaker state changes -- Request durations -- Failure rates - -## Best Practices - -### For Development -- Monitor retry counts in logs -- Test with rate limiting scenarios -- Verify Retry-After header handling - -### For Production -- Set up alerts for high retry rates -- Monitor circuit breaker trips -- Track rate limit quota usage -- Consider increasing Azure OpenAI quota if needed - -## Troubleshooting - -### Excessive Retries -If you see many retries: -- Check Azure OpenAI quota limits -- Review request parallelism (currently set to MaxDegreeOfParallelism = 5) -- Consider requesting quota increase from Azure - -### Circuit Breaker Trips -If the circuit breaker frequently opens: -- Review Azure OpenAI service health -- Check for deployment issues -- Verify network connectivity -- Consider increasing the failure ratio threshold - -### Timeouts -If requests timeout despite retries: -- Check individual attempt timeout (30s) -- Review total timeout (3 minutes) -- Verify Azure OpenAI service performance -- Consider increasing timeout values for batch operations - -## Related Documentation - -- [Microsoft.Extensions.Http.Resilience](https://learn.microsoft.com/en-us/dotnet/core/resilience/http-resilience) -- [Azure OpenAI Quota Management](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/quota) -- [Polly Resilience Library](https://www.pollydocs.org/) -- [HTTP Retry Pattern](https://learn.microsoft.com/en-us/azure/architecture/patterns/retry) - -## Version History - -- **v1.0** (2026-02-10): Initial implementation with standard resilience handler - - Added exponential backoff retry strategy - - Configured circuit breaker and timeouts - - Automatic Retry-After header support From 4d1499d1fb4eef1e28f6920f77a7a325d535acc8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Feb 2026 06:48:31 +0000 Subject: [PATCH 6/6] Simplify resilience configuration and fix build error - Remove configureResilience parameter - resilience is now always enabled - Fix pre-existing build error in LoggerExtensions.cs (LOGGEN036) - Simplify ConfigureAzureOpenAIResilience comments - All Chat tests passing (4/4) Co-authored-by: BenjaminMichaelis <22186029+BenjaminMichaelis@users.noreply.github.com> --- .../Extensions/ServiceCollectionExtensions.cs | 43 ++++++------------- .../Extensions/LoggerExtensions.cs | 4 +- 2 files changed, 15 insertions(+), 32 deletions(-) diff --git a/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs b/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs index 32ea6c75..24dfdb53 100644 --- a/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs +++ b/EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs @@ -22,14 +22,12 @@ public static class ServiceCollectionExtensions /// The AI configuration options /// The PostgreSQL connection string for the vector store /// The token credential to use for authentication. If null, DefaultAzureCredential will be used. - /// Whether to configure HTTP resilience for all HTTP clients. Default is true. Set to false if you want to configure resilience separately. /// The service collection for chaining public static IServiceCollection AddAzureOpenAIServices( this IServiceCollection services, AIOptions aiOptions, string postgresConnectionString, - TokenCredential? credential = null, - bool configureResilience = true) + TokenCredential? credential = null) { // Use DefaultAzureCredential if no credential is provided // This works both locally (using Azure CLI, Visual Studio, etc.) and in Azure (using Managed Identity) @@ -42,11 +40,8 @@ public static IServiceCollection AddAzureOpenAIServices( var endpoint = new Uri(aiOptions.Endpoint); - // Configure HTTP resilience for Azure OpenAI requests if requested - if (configureResilience) - { - ConfigureAzureOpenAIResilience(services); - } + // Configure HTTP resilience for Azure OpenAI requests + ConfigureAzureOpenAIResilience(services); // Register Azure OpenAI services with Managed Identity authentication #pragma warning disable SKEXP0010 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. @@ -88,11 +83,6 @@ public static IServiceCollection AddAzureOpenAIServices( /// The service collection to configure /// /// This method configures resilience for ALL HTTP clients created via IHttpClientFactory. - /// This is appropriate when your application ONLY uses Azure OpenAI HTTP clients. - /// - /// If your application has other HTTP clients (e.g., third-party APIs) that shouldn't - /// have the same retry behavior, set configureResilience=false when calling - /// AddAzureOpenAIServices and configure resilience on a per-client basis instead. /// /// IMPORTANT: The Semantic Kernel's AddAzureOpenAI* extension methods (used in this class) /// do NOT expose options to configure specific named or typed HttpClients. The internal @@ -105,15 +95,15 @@ public static IServiceCollection AddAzureOpenAIServices( /// - Respects Retry-After headers from Azure OpenAI /// - Uses exponential backoff with jitter /// - Implements circuit breaker pattern + /// + /// This is appropriate for applications that primarily use Azure OpenAI services. + /// The retry policies are reasonable for most HTTP APIs and should not negatively + /// impact other HTTP clients like hCaptcha or Mailjet. /// private static void ConfigureAzureOpenAIResilience(IServiceCollection services) { // Configure resilience for all HTTP clients created via IHttpClientFactory - // This is appropriate for applications that ONLY use Azure OpenAI services - // For mixed-use applications, consider setting configureResilience=false - // and applying resilience per-client instead. - // - // Note: The Semantic Kernel's AddAzureOpenAI* methods do not support named/typed + // The Semantic Kernel's AddAzureOpenAI* methods do not support named/typed // HttpClient configuration, so ConfigureHttpClientDefaults is required. services.ConfigureHttpClientDefaults(httpClientBuilder => { @@ -151,13 +141,11 @@ private static void ConfigureAzureOpenAIResilience(IServiceCollection services) /// The service collection to add services to /// The configuration to read AIOptions from /// Optional token credential to use for authentication. If null, DefaultAzureCredential will be used. - /// Whether to configure HTTP resilience for all HTTP clients. Default is true. Set to false if you want to configure resilience separately. /// The service collection for chaining public static IServiceCollection AddAzureOpenAIServices( this IServiceCollection services, IConfiguration configuration, - TokenCredential? credential = null, - bool configureResilience = true) + TokenCredential? credential = null) { // Configure AI options from configuration services.Configure(configuration.GetSection("AIOptions")); @@ -172,7 +160,7 @@ public static IServiceCollection AddAzureOpenAIServices( var postgresConnectionString = configuration.GetConnectionString("PostgresVectorStore") ?? throw new InvalidOperationException("Connection string 'PostgresVectorStore' not found."); - return services.AddAzureOpenAIServices(aiOptions, postgresConnectionString, credential, configureResilience); + return services.AddAzureOpenAIServices(aiOptions, postgresConnectionString, credential); } /// @@ -239,15 +227,13 @@ private static IServiceCollection AddPostgresVectorStoreWithManagedIdentity( /// The AI configuration options /// The PostgreSQL connection string for the vector store /// The API key for Azure OpenAI authentication - /// Whether to configure HTTP resilience for all HTTP clients. Default is true. Set to false if you want to configure resilience separately. /// The service collection for chaining [Obsolete("API key authentication is not recommended for production. Use AddAzureOpenAIServices with Managed Identity instead.")] public static IServiceCollection AddAzureOpenAIServicesWithApiKey( this IServiceCollection services, AIOptions aiOptions, string postgresConnectionString, - string apiKey, - bool configureResilience = true) + string apiKey) { if (string.IsNullOrEmpty(apiKey)) { @@ -261,11 +247,8 @@ public static IServiceCollection AddAzureOpenAIServicesWithApiKey( var endpoint = new Uri(aiOptions.Endpoint); - // Configure HTTP resilience for Azure OpenAI requests if requested - if (configureResilience) - { - ConfigureAzureOpenAIResilience(services); - } + // Configure HTTP resilience for Azure OpenAI requests + ConfigureAzureOpenAIResilience(services); // Register Azure OpenAI services with API key authentication #pragma warning disable SKEXP0010 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. diff --git a/EssentialCSharp.Web/Extensions/LoggerExtensions.cs b/EssentialCSharp.Web/Extensions/LoggerExtensions.cs index aa183e8b..4658aa3e 100644 --- a/EssentialCSharp.Web/Extensions/LoggerExtensions.cs +++ b/EssentialCSharp.Web/Extensions/LoggerExtensions.cs @@ -4,7 +4,7 @@ namespace EssentialCSharp.Web.Extensions; internal static partial class LoggerExtensions { - [LoggerMessage(Level = LogLevel.Debug, EventId = 1, Message = "Successful captcha with response of: '{JsonResult}'")] + [LoggerMessage(Level = LogLevel.Debug, EventId = 1, Message = "Successful captcha with response")] public static partial void HomeControllerSuccessfulCaptchaResponse( - this ILogger logger, JsonResult jsonResult); + this ILogger logger); }