diff --git a/infra/main.bicep b/infra/main.bicep index e9a84bec7..632fe2983 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -91,6 +91,30 @@ param privateEndpointVNetPrefix string = '192.168.0.0/16' param privateEndpointSubnetAddressPrefix string = '192.168.0.0/24' param appServiceBackendSubnetAddressPrefix string = '192.168.1.0/24' +@description('Additional OpenAI deployments to create if you want more to choose from(e.g., gpt-5, gpt-5-mini)') +param additionalLlmDeployments array = [ + { + name: 'gpt-5' + model: { + name: 'gpt-5' + version: '2025-08-07' + } + sku: { + capacity: 120 + } + } + { + name: 'gpt-5-mini' + model: { + name: 'gpt-5-mini' + version: '2025-08-07' + } + sku: { + capacity: 120 + } + } +] + var resourceToken = toLower(uniqueString(subscription().id, name, location)) var tags = { 'azd-env-name': name } @@ -134,6 +158,7 @@ module resources 'resources.bicep' = { privateEndpointVNetPrefix: privateEndpointVNetPrefix privateEndpointSubnetAddressPrefix: privateEndpointSubnetAddressPrefix appServiceBackendSubnetAddressPrefix: appServiceBackendSubnetAddressPrefix + additionalLlmDeployments: additionalLlmDeployments } } diff --git a/infra/resources.bicep b/infra/resources.bicep index 4b796a526..456fc59a7 100644 --- a/infra/resources.bicep +++ b/infra/resources.bicep @@ -43,6 +43,9 @@ param privateEndpointVNetPrefix string = '192.168.0.0/16' param privateEndpointSubnetAddressPrefix string = '192.168.0.0/24' param appServiceBackendSubnetAddressPrefix string = '192.168.1.0/24' +@description('Optional additional LLM deployments to create on the same Azure OpenAI resource') +param additionalLlmDeployments array = [] + var openai_name = toLower('${name}-aillm-${resourceToken}') var openai_dalle_name = toLower('${name}-aidalle-${resourceToken}') @@ -73,7 +76,7 @@ var databaseName = 'chat' var historyContainerName = 'history' var configContainerName = 'config' -var llmDeployments = [ +var baseLlmDeployments = [ { name: chatGptDeploymentName model: { @@ -97,6 +100,23 @@ var llmDeployments = [ } ] +var mappedAdditionalDeployments = [ + for addition in additionalLlmDeployments: { + name: addition.name + model: { + format: 'OpenAI' + name: addition.model.name + version: addition.model.version + } + sku: { + name: 'GlobalStandard' + capacity: addition.capacity + } + } +] + +var llmDeployments = concat(baseLlmDeployments, mappedAdditionalDeployments) + module privateEndpoints 'private_endpoints_core.bicep' = if (usePrivateEndpoints) { name: 'private-endpoints' params: { @@ -148,6 +168,10 @@ var appSettingsCommon = [ name: 'SCM_DO_BUILD_DURING_DEPLOYMENT' value: 'true' } + { + name: 'AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME' + value: embeddingDeploymentName + } { name: 'AZURE_OPENAI_API_INSTANCE_NAME' value: openai_name @@ -156,10 +180,6 @@ var appSettingsCommon = [ name: 'AZURE_OPENAI_API_DEPLOYMENT_NAME' value: chatGptDeploymentName } - { - name: 'AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME' - value: embeddingDeploymentName - } { name: 'AZURE_OPENAI_API_VERSION' value: openai_api_version @@ -214,6 +234,13 @@ var appSettingsCommon = [ } ] +var additionalModelSettingsDeployment = [ + for (deployment, i) in mappedAdditionalDeployments: { + name: 'AZURE_OPENAI_API_DEPLOYMENT_NAME_MODEL_${i + 1}' + value: deployment.model.name + } +] + var appSettingsWithLocalAuth = disableLocalAuth ? [] : [ @@ -258,7 +285,7 @@ resource webApp 'Microsoft.Web/sites@2024-04-01' = { appCommandLine: 'next start' ftpsState: 'Disabled' minTlsVersion: '1.2' - appSettings: concat(appSettingsCommon, appSettingsWithLocalAuth) + appSettings: concat(appSettingsCommon, additionalModelSettingsDeployment, appSettingsWithLocalAuth) } } identity: { type: 'SystemAssigned' } @@ -503,12 +530,10 @@ resource llmdeployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05 model: deployment.model /*raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null*/ } - sku: contains(deployment, 'sku') - ? deployment.sku - : { - name: 'Standard' - capacity: deployment.capacity - } + sku: deployment.?sku ?? { + name: 'Standard' + capacity: deployment.capacity + } } ] diff --git a/src/features/chat-page/chat-header/chat-header.tsx b/src/features/chat-page/chat-header/chat-header.tsx index 269bc880e..25f6e4bd6 100644 --- a/src/features/chat-page/chat-header/chat-header.tsx +++ b/src/features/chat-page/chat-header/chat-header.tsx @@ -6,6 +6,14 @@ import { ChatDocumentModel, ChatThreadModel } from "../chat-services/models"; import { DocumentDetail } from "./document-detail"; import { ExtensionDetail } from "./extension-detail"; import { PersonaDetail } from "./persona-detail"; +import { chatStore, useChat } from "@/features/chat-page/chat-store"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/features/ui/select"; interface Props { chatThread: ChatThreadModel; @@ -29,7 +37,9 @@ export const ChatHeader: FC = (props) => { {persona} -
+
+ {/* Model selector */} + = (props) => {
); }; + +const ModelSelect: FC = () => { + const { selectedModel } = useChat(); + const onChange = (v: string) => chatStore.updateSelectedModel(v); + + return ( + + ); +}; diff --git a/src/features/chat-page/chat-services/chat-api/chat-api-extension.ts b/src/features/chat-page/chat-services/chat-api/chat-api-extension.ts index 88f41e649..2ee0c2c95 100644 --- a/src/features/chat-page/chat-services/chat-api/chat-api-extension.ts +++ b/src/features/chat-page/chat-services/chat-api/chat-api-extension.ts @@ -13,10 +13,11 @@ export const ChatApiExtensions = async (props: { history: ChatCompletionMessageParam[]; extensions: RunnableToolFunction[]; signal: AbortSignal; + model?: string; }): Promise => { - const { userMessage, history, signal, chatThread, extensions } = props; + const { userMessage, history, signal, chatThread, extensions, model } = props; - const openAI = OpenAIInstance(); + const openAI = OpenAIInstance(model); const systemMessage = await extensionsSystemMessage(chatThread); return openAI.beta.chat.completions.runTools( { diff --git a/src/features/chat-page/chat-services/chat-api/chat-api-multimodal.tsx b/src/features/chat-page/chat-services/chat-api/chat-api-multimodal.tsx index 45630514e..5e0b2de2d 100644 --- a/src/features/chat-page/chat-services/chat-api/chat-api-multimodal.tsx +++ b/src/features/chat-page/chat-services/chat-api/chat-api-multimodal.tsx @@ -9,10 +9,11 @@ export const ChatApiMultimodal = (props: { userMessage: string; file: string; signal: AbortSignal; + model?: string; }): ChatCompletionStreamingRunner => { - const { chatThread, userMessage, signal, file } = props; + const { chatThread, userMessage, signal, file, model } = props; - const openAI = OpenAIInstance(); + const openAI = OpenAIInstance(model); return openAI.beta.chat.completions.stream( { diff --git a/src/features/chat-page/chat-services/chat-api/chat-api-rag.ts b/src/features/chat-page/chat-services/chat-api/chat-api-rag.ts index 9b4b37e4f..303d4edc1 100644 --- a/src/features/chat-page/chat-services/chat-api/chat-api-rag.ts +++ b/src/features/chat-page/chat-services/chat-api/chat-api-rag.ts @@ -17,10 +17,11 @@ export const ChatApiRAG = async (props: { userMessage: string; history: ChatCompletionMessageParam[]; signal: AbortSignal; + model?: string; }): Promise => { - const { chatThread, userMessage, history, signal } = props; + const { chatThread, userMessage, history, signal, model } = props; - const openAI = OpenAIInstance(); + const openAI = OpenAIInstance(model); const documentResponse = await SimilaritySearch( userMessage, diff --git a/src/features/chat-page/chat-services/chat-api/chat-api.ts b/src/features/chat-page/chat-services/chat-api/chat-api.ts index f176587a0..97b5705f8 100644 --- a/src/features/chat-page/chat-services/chat-api/chat-api.ts +++ b/src/features/chat-page/chat-services/chat-api/chat-api.ts @@ -72,6 +72,7 @@ export const ChatAPIEntry = async (props: UserPrompt, signal: AbortSignal) => { userMessage: props.message, history: history, signal: signal, + model: props.model, }); break; case "multimodal": @@ -80,6 +81,7 @@ export const ChatAPIEntry = async (props: UserPrompt, signal: AbortSignal) => { userMessage: props.message, file: props.multimodalImage, signal: signal, + model: props.model, }); break; case "extensions": @@ -89,6 +91,7 @@ export const ChatAPIEntry = async (props: UserPrompt, signal: AbortSignal) => { history: history, extensions: extension, signal: signal, + model: props.model, }); break; } diff --git a/src/features/chat-page/chat-services/models.ts b/src/features/chat-page/chat-services/models.ts index c8e02d208..0b1cf1c7a 100644 --- a/src/features/chat-page/chat-services/models.ts +++ b/src/features/chat-page/chat-services/models.ts @@ -40,6 +40,7 @@ export interface UserPrompt { id: string; // thread id message: string; multimodalImage: string; + model?: string; } export interface ChatDocumentModel { diff --git a/src/features/chat-page/chat-store.tsx b/src/features/chat-page/chat-store.tsx index 4c9124be0..8581bf641 100644 --- a/src/features/chat-page/chat-store.tsx +++ b/src/features/chat-page/chat-store.tsx @@ -35,6 +35,7 @@ class ChatState { public autoScroll: boolean = false; public userName: string = ""; public chatThreadId: string = ""; + public selectedModel: string = ""; private chatThread: ChatThreadModel | undefined; @@ -58,6 +59,10 @@ class ChatState { this.loading = value; } + public updateSelectedModel(model: string) { + this.selectedModel = model; + } + public initChatSession({ userName, messages, @@ -284,6 +289,7 @@ class ChatState { const body = JSON.stringify({ id: this.chatThreadId, message: this.input, + model: this.selectedModel, }); formData.append("content", body); diff --git a/src/features/common/services/openai.ts b/src/features/common/services/openai.ts index b9962858d..b44a6c26f 100644 --- a/src/features/common/services/openai.ts +++ b/src/features/common/services/openai.ts @@ -4,28 +4,46 @@ import { AzureOpenAI } from "openai"; const USE_MANAGED_IDENTITIES = process.env.USE_MANAGED_IDENTITIES === "true"; -export const OpenAIInstance = () => { +export const OpenAIInstance = (deploymentOverride?: string) => { const endpointSuffix = process.env.AZURE_OPENAI_API_ENDPOINT_SUFFIX || "openai.azure.com"; - let token = process.env.AZURE_OPENAI_API_KEY; + const token = process.env.AZURE_OPENAI_API_KEY; + + // Resolve model/instance/deployment based on optional override + const instanceName = process.env.AZURE_OPENAI_API_INSTANCE_NAME; + const defaultDeployment = process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME; + const apiVersion = process.env.AZURE_OPENAI_API_VERSION; + + let deploymentName = defaultDeployment; + + if (deploymentOverride) { + for (let i = 1; i <= 100; i++) { + const key = `AZURE_OPENAI_API_DEPLOYMENT_NAME_MODEL_${i}`; + const instanceCandidate = process.env[key]; + if (!instanceCandidate) continue; + if (instanceCandidate === deploymentOverride) { + deploymentName = instanceCandidate; + break; + } + } + } + if (USE_MANAGED_IDENTITIES) { const credential = new DefaultAzureCredential(); const scope = "https://cognitiveservices.azure.com/.default"; const azureADTokenProvider = getBearerTokenProvider(credential, scope); - const deployment = process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME; - const apiVersion = process.env.AZURE_OPENAI_API_VERSION; const client = new AzureOpenAI({ azureADTokenProvider, - deployment, + deployment: deploymentName, apiVersion, - baseURL: `https://${process.env.AZURE_OPENAI_API_INSTANCE_NAME}.${endpointSuffix}/openai/deployments/${process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME}` + baseURL: `https://${instanceName}.${endpointSuffix}/openai/deployments/${deploymentName}` }); return client; } else { const openai = new OpenAI({ apiKey: token, - baseURL: `https://${process.env.AZURE_OPENAI_API_INSTANCE_NAME}.${endpointSuffix}/openai/deployments/${process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME}`, - defaultQuery: { "api-version": process.env.AZURE_OPENAI_API_VERSION }, - defaultHeaders: { "api-key": process.env.AZURE_OPENAI_API_KEY }, + baseURL: `https://${instanceName}.${endpointSuffix}/openai/deployments/${deploymentName}`, + defaultQuery: { "api-version": apiVersion }, + defaultHeaders: { "api-key": token }, }); return openai; }