From 0cf13b0760aaebdebab2feae76dcca7757434016 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Dec 2025 09:35:18 +0000 Subject: [PATCH 1/2] Initial plan From 8c7d028424c2088a21e4bfd80c1fa0c1dc09ddb3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Dec 2025 09:46:10 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20DEFAULT=5FMAX=5FASYNC?= =?UTF-8?q?=20=E7=A1=AC=E7=BC=96=E7=A0=81=E9=97=AE=E9=A2=98=EF=BC=8C?= =?UTF-8?q?=E6=94=B9=E4=B8=BA=E4=BB=8E=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F?= =?UTF-8?q?=E8=AF=BB=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: BukeLy <19304666+BukeLy@users.noreply.github.com> --- src/rag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rag.py b/src/rag.py index 2d7b2d7..5980207 100644 --- a/src/rag.py +++ b/src/rag.py @@ -28,7 +28,7 @@ # EC2 t3.small has 2 vCPUs. 4x oversubscription for I/O-bound LLM API calls. # Empirically tested: 8 gives best throughput without hitting rate limits. -DEFAULT_MAX_ASYNC = 8 +DEFAULT_MAX_ASYNC = int(os.getenv("MAX_ASYNC", "8")) # --- 多租户架构:移除全局单实例 --- # 使用多租户管理器替代全局单实例