diff --git a/apps/sim/app/api/copilot/chat/route.ts b/apps/sim/app/api/copilot/chat/route.ts index 9d31bf5c36..b82a580748 100644 --- a/apps/sim/app/api/copilot/chat/route.ts +++ b/apps/sim/app/api/copilot/chat/route.ts @@ -285,6 +285,13 @@ export async function POST(req: NextRequest) { apiVersion: 'preview', endpoint: env.AZURE_OPENAI_ENDPOINT, } + } else if (providerEnv === 'azure-anthropic') { + providerConfig = { + provider: 'azure-anthropic', + model: envModel, + apiKey: env.AZURE_ANTHROPIC_API_KEY, + endpoint: env.AZURE_ANTHROPIC_ENDPOINT, + } } else if (providerEnv === 'vertex') { providerConfig = { provider: 'vertex', diff --git a/apps/sim/blocks/utils.ts b/apps/sim/blocks/utils.ts index 7de0b518af..fa0e28590e 100644 --- a/apps/sim/blocks/utils.ts +++ b/apps/sim/blocks/utils.ts @@ -80,7 +80,7 @@ export function getApiKeyCondition() { /** * Returns the standard provider credential subblocks used by LLM-based blocks. - * This includes: Vertex AI OAuth, API Key, Azure OpenAI, Vertex AI config, and Bedrock config. + * This includes: Vertex AI OAuth, API Key, Azure (OpenAI + Anthropic), Vertex AI config, and Bedrock config. * * Usage: Spread into your block's subBlocks array after block-specific fields */ @@ -111,14 +111,14 @@ export function getProviderCredentialSubBlocks(): SubBlockConfig[] { }, { id: 'azureEndpoint', - title: 'Azure OpenAI Endpoint', + title: 'Azure Endpoint', type: 'short-input', password: true, - placeholder: 'https://your-resource.openai.azure.com', + placeholder: 'https://your-resource.services.ai.azure.com', connectionDroppable: false, condition: { field: 'model', - value: providers['azure-openai'].models, + value: [...providers['azure-openai'].models, ...providers['azure-anthropic'].models], }, }, { @@ -202,7 +202,7 @@ export function getProviderCredentialSubBlocks(): SubBlockConfig[] { */ export const PROVIDER_CREDENTIAL_INPUTS = { apiKey: { type: 'string', description: 'Provider API key' }, - azureEndpoint: { type: 'string', description: 'Azure OpenAI endpoint URL' }, + azureEndpoint: { type: 'string', description: 'Azure endpoint URL' }, azureApiVersion: { type: 'string', description: 'Azure API version' }, vertexProject: { type: 'string', description: 'Google Cloud project ID for Vertex AI' }, vertexLocation: { type: 'string', description: 'Google Cloud location for Vertex AI' }, diff --git a/apps/sim/executor/handlers/evaluator/evaluator-handler.ts b/apps/sim/executor/handlers/evaluator/evaluator-handler.ts index 3e95b2f856..4b036fe2e7 100644 --- a/apps/sim/executor/handlers/evaluator/evaluator-handler.ts +++ b/apps/sim/executor/handlers/evaluator/evaluator-handler.ts @@ -130,9 +130,11 @@ export class EvaluatorBlockHandler implements BlockHandler { providerRequest.vertexLocation = evaluatorConfig.vertexLocation } - if (providerId === 'azure-openai') { + if (providerId === 'azure-openai' || providerId === 'azure-anthropic') { providerRequest.azureEndpoint = inputs.azureEndpoint - providerRequest.azureApiVersion = inputs.azureApiVersion + if (providerId === 'azure-openai') { + providerRequest.azureApiVersion = inputs.azureApiVersion + } } if (providerId === 'bedrock') { diff --git a/apps/sim/executor/handlers/router/router-handler.ts b/apps/sim/executor/handlers/router/router-handler.ts index 766a4aac66..636016cd48 100644 --- a/apps/sim/executor/handlers/router/router-handler.ts +++ b/apps/sim/executor/handlers/router/router-handler.ts @@ -105,9 +105,11 @@ export class RouterBlockHandler implements BlockHandler { providerRequest.vertexLocation = routerConfig.vertexLocation } - if (providerId === 'azure-openai') { + if (providerId === 'azure-openai' || providerId === 'azure-anthropic') { providerRequest.azureEndpoint = inputs.azureEndpoint - providerRequest.azureApiVersion = inputs.azureApiVersion + if (providerId === 'azure-openai') { + providerRequest.azureApiVersion = inputs.azureApiVersion + } } if (providerId === 'bedrock') { @@ -262,9 +264,11 @@ export class RouterBlockHandler implements BlockHandler { providerRequest.vertexLocation = routerConfig.vertexLocation } - if (providerId === 'azure-openai') { + if (providerId === 'azure-openai' || providerId === 'azure-anthropic') { providerRequest.azureEndpoint = inputs.azureEndpoint - providerRequest.azureApiVersion = inputs.azureApiVersion + if (providerId === 'azure-openai') { + providerRequest.azureApiVersion = inputs.azureApiVersion + } } if (providerId === 'bedrock') { diff --git a/apps/sim/lib/copilot/config.ts b/apps/sim/lib/copilot/config.ts index 4b9c89274c..5700e99300 100644 --- a/apps/sim/lib/copilot/config.ts +++ b/apps/sim/lib/copilot/config.ts @@ -12,6 +12,7 @@ const VALID_PROVIDER_IDS: readonly ProviderId[] = [ 'openai', 'azure-openai', 'anthropic', + 'azure-anthropic', 'google', 'deepseek', 'xai', diff --git a/apps/sim/lib/copilot/types.ts b/apps/sim/lib/copilot/types.ts index 6ed8133082..c7677dc359 100644 --- a/apps/sim/lib/copilot/types.ts +++ b/apps/sim/lib/copilot/types.ts @@ -147,6 +147,12 @@ export type CopilotProviderConfig = apiVersion?: string endpoint?: string } + | { + provider: 'azure-anthropic' + model: string + apiKey?: string + endpoint?: string + } | { provider: 'vertex' model: string @@ -155,7 +161,7 @@ export type CopilotProviderConfig = vertexLocation?: string } | { - provider: Exclude + provider: Exclude model?: string apiKey?: string } diff --git a/apps/sim/lib/tokenization/constants.ts b/apps/sim/lib/tokenization/constants.ts index 010ef47437..a10b1995da 100644 --- a/apps/sim/lib/tokenization/constants.ts +++ b/apps/sim/lib/tokenization/constants.ts @@ -21,6 +21,11 @@ export const TOKENIZATION_CONFIG = { confidence: 'high', supportedMethods: ['heuristic', 'fallback'], }, + 'azure-anthropic': { + avgCharsPerToken: 4.5, + confidence: 'high', + supportedMethods: ['heuristic', 'fallback'], + }, google: { avgCharsPerToken: 5, confidence: 'medium', diff --git a/apps/sim/lib/tokenization/estimators.ts b/apps/sim/lib/tokenization/estimators.ts index 53ce719658..01aed1c1e6 100644 --- a/apps/sim/lib/tokenization/estimators.ts +++ b/apps/sim/lib/tokenization/estimators.ts @@ -204,6 +204,7 @@ export function estimateTokenCount(text: string, providerId?: string): TokenEsti estimatedTokens = estimateOpenAITokens(text) break case 'anthropic': + case 'azure-anthropic': estimatedTokens = estimateAnthropicTokens(text) break case 'google': diff --git a/apps/sim/providers/azure-openai/index.ts b/apps/sim/providers/azure-openai/index.ts index ca63904df2..c265869207 100644 --- a/apps/sim/providers/azure-openai/index.ts +++ b/apps/sim/providers/azure-openai/index.ts @@ -1,20 +1,8 @@ -import { createLogger } from '@sim/logger' import { AzureOpenAI } from 'openai' -import type { ChatCompletionCreateParamsStreaming } from 'openai/resources/chat/completions' import { env } from '@/lib/core/config/env' +import { createLogger } from '@/lib/logs/console/logger' import type { StreamingExecution } from '@/executor/types' -import { MAX_TOOL_ITERATIONS } from '@/providers' -import { - checkForForcedToolUsage, - createReadableStreamFromAzureOpenAIStream, - extractApiVersionFromUrl, - extractBaseUrl, - extractDeploymentFromUrl, - isChatCompletionsEndpoint, - isResponsesEndpoint, -} from '@/providers/azure-openai/utils' import { getProviderDefaultModel, getProviderModels } from '@/providers/models' -import { executeResponsesProviderRequest } from '@/providers/openai/core' import type { ProviderConfig, ProviderRequest, @@ -22,66 +10,137 @@ import type { TimeSegment, } from '@/providers/types' import { - calculateCost, prepareToolExecution, prepareToolsWithUsageControl, + trackForcedToolUsage, } from '@/providers/utils' import { executeTool } from '@/tools' const logger = createLogger('AzureOpenAIProvider') /** - * Executes a request using the chat completions API. - * Used when the endpoint URL indicates chat completions. + * Determines if the API version uses the Responses API (2025+) or Chat Completions API */ -async function executeChatCompletionsRequest( - request: ProviderRequest, - azureEndpoint: string, - azureApiVersion: string, - deploymentName: string -): Promise { - logger.info('Using Azure OpenAI Chat Completions API', { - model: request.model, - endpoint: azureEndpoint, - deploymentName, - apiVersion: azureApiVersion, - hasSystemPrompt: !!request.systemPrompt, - hasMessages: !!request.messages?.length, - hasTools: !!request.tools?.length, - toolCount: request.tools?.length || 0, - hasResponseFormat: !!request.responseFormat, - stream: !!request.stream, - }) +function useResponsesApi(apiVersion: string): boolean { + // 2025-* versions use the Responses API + // 2024-* and earlier versions use the Chat Completions API + return apiVersion.startsWith('2025-') +} + +/** + * Helper function to convert an Azure OpenAI Responses API stream to a standard ReadableStream + * and collect completion metrics + */ +function createReadableStreamFromResponsesApiStream( + responsesStream: any, + onComplete?: (content: string, usage?: any) => void +): ReadableStream { + let fullContent = '' + let usageData: any = null + + return new ReadableStream({ + async start(controller) { + try { + for await (const event of responsesStream) { + if (event.usage) { + usageData = event.usage + } + + if (event.type === 'response.output_text.delta') { + const content = event.delta || '' + if (content) { + fullContent += content + controller.enqueue(new TextEncoder().encode(content)) + } + } else if (event.type === 'response.content_part.delta') { + const content = event.delta?.text || '' + if (content) { + fullContent += content + controller.enqueue(new TextEncoder().encode(content)) + } + } else if (event.type === 'response.completed' || event.type === 'response.done') { + if (event.response?.usage) { + usageData = event.response.usage + } + } + } - const azureOpenAI = new AzureOpenAI({ - apiKey: request.apiKey, - apiVersion: azureApiVersion, - endpoint: azureEndpoint, + if (onComplete) { + onComplete(fullContent, usageData) + } + + controller.close() + } catch (error) { + controller.error(error) + } + }, }) +} - const allMessages: any[] = [] +/** + * Helper function to convert an Azure OpenAI stream to a standard ReadableStream + * and collect completion metrics + */ +function createReadableStreamFromChatCompletionsStream( + azureOpenAIStream: any, + onComplete?: (content: string, usage?: any) => void +): ReadableStream { + let fullContent = '' + let usageData: any = null + + return new ReadableStream({ + async start(controller) { + try { + for await (const chunk of azureOpenAIStream) { + if (chunk.usage) { + usageData = chunk.usage + } - if (request.systemPrompt) { - allMessages.push({ - role: 'system', - content: request.systemPrompt, - }) - } + const content = chunk.choices[0]?.delta?.content || '' + if (content) { + fullContent += content + controller.enqueue(new TextEncoder().encode(content)) + } + } + + if (onComplete) { + onComplete(fullContent, usageData) + } + + controller.close() + } catch (error) { + controller.error(error) + } + }, + }) +} + +/** + * Executes a request using the Responses API (for 2025+ API versions) + */ +async function executeWithResponsesApi( + azureOpenAI: AzureOpenAI, + request: ProviderRequest, + deploymentName: string, + providerStartTime: number, + providerStartTimeISO: string +): Promise { + const inputMessages: any[] = [] if (request.context) { - allMessages.push({ + inputMessages.push({ role: 'user', content: request.context, }) } if (request.messages) { - allMessages.push(...request.messages) + inputMessages.push(...request.messages) } const tools = request.tools?.length ? request.tools.map((tool) => ({ - type: 'function', + type: 'function' as const, function: { name: tool.id, description: tool.description, @@ -92,88 +151,75 @@ async function executeChatCompletionsRequest( const payload: any = { model: deploymentName, - messages: allMessages, + input: inputMessages.length > 0 ? inputMessages : request.systemPrompt || '', + } + + if (request.systemPrompt) { + payload.instructions = request.systemPrompt } if (request.temperature !== undefined) payload.temperature = request.temperature - if (request.maxTokens != null) payload.max_completion_tokens = request.maxTokens + if (request.maxTokens !== undefined) payload.max_output_tokens = request.maxTokens - if (request.reasoningEffort !== undefined) payload.reasoning_effort = request.reasoningEffort - if (request.verbosity !== undefined) payload.verbosity = request.verbosity + if (request.reasoningEffort !== undefined) { + payload.reasoning = { effort: request.reasoningEffort } + } if (request.responseFormat) { - payload.response_format = { - type: 'json_schema', - json_schema: { - name: request.responseFormat.name || 'response_schema', - schema: request.responseFormat.schema || request.responseFormat, - strict: request.responseFormat.strict !== false, + payload.text = { + format: { + type: 'json_schema', + json_schema: { + name: request.responseFormat.name || 'response_schema', + schema: request.responseFormat.schema || request.responseFormat, + strict: request.responseFormat.strict !== false, + }, }, } - - logger.info('Added JSON schema response format to Azure OpenAI request') + logger.info('Added JSON schema text format to Responses API request') } - let preparedTools: ReturnType | null = null - if (tools?.length) { - preparedTools = prepareToolsWithUsageControl(tools, request.tools, logger, 'azure-openai') - const { tools: filteredTools, toolChoice } = preparedTools - - if (filteredTools?.length && toolChoice) { - payload.tools = filteredTools - payload.tool_choice = toolChoice - - logger.info('Azure OpenAI request configuration:', { - toolCount: filteredTools.length, - toolChoice: - typeof toolChoice === 'string' - ? toolChoice - : toolChoice.type === 'function' - ? `force:${toolChoice.function.name}` - : toolChoice.type === 'tool' - ? `force:${toolChoice.name}` - : toolChoice.type === 'any' - ? `force:${toolChoice.any?.name || 'unknown'}` - : 'unknown', - model: deploymentName, - }) + payload.tools = tools + + const forcedTools = request.tools?.filter((t) => t.usageControl === 'force') || [] + if (forcedTools.length > 0) { + if (forcedTools.length === 1) { + payload.tool_choice = { + type: 'function', + function: { name: forcedTools[0].id }, + } + } else { + payload.tool_choice = 'required' + } + } else { + payload.tool_choice = 'auto' } - } - const providerStartTime = Date.now() - const providerStartTimeISO = new Date(providerStartTime).toISOString() + logger.info('Responses API request configuration:', { + toolCount: tools.length, + model: deploymentName, + }) + } try { if (request.stream && (!tools || tools.length === 0)) { - logger.info('Using streaming response for Azure OpenAI request') + logger.info('Using streaming response for Responses API request') - const streamingParams: ChatCompletionCreateParamsStreaming = { + const streamResponse = await (azureOpenAI as any).responses.create({ ...payload, stream: true, - stream_options: { include_usage: true }, + }) + + const tokenUsage = { + prompt: 0, + completion: 0, + total: 0, } - const streamResponse = await azureOpenAI.chat.completions.create(streamingParams) const streamingResult = { - stream: createReadableStreamFromAzureOpenAIStream(streamResponse, (content, usage) => { + stream: createReadableStreamFromResponsesApiStream(streamResponse, (content, usage) => { streamingResult.execution.output.content = content - streamingResult.execution.output.tokens = { - input: usage.prompt_tokens, - output: usage.completion_tokens, - total: usage.total_tokens, - } - - const costResult = calculateCost( - request.model, - usage.prompt_tokens, - usage.completion_tokens - ) - streamingResult.execution.output.cost = { - input: costResult.input, - output: costResult.output, - total: costResult.total, - } const streamEndTime = Date.now() const streamEndTimeISO = new Date(streamEndTime).toISOString() @@ -190,13 +236,23 @@ async function executeChatCompletionsRequest( streamEndTime - providerStartTime } } + + if (usage) { + streamingResult.execution.output.tokens = { + prompt: usage.input_tokens || usage.prompt_tokens || 0, + completion: usage.output_tokens || usage.completion_tokens || 0, + total: + (usage.input_tokens || usage.prompt_tokens || 0) + + (usage.output_tokens || usage.completion_tokens || 0), + } + } }), execution: { success: true, output: { content: '', model: request.model, - tokens: { input: 0, output: 0, total: 0 }, + tokens: tokenUsage, toolCalls: undefined, providerTiming: { startTime: providerStartTimeISO, @@ -212,7 +268,6 @@ async function executeChatCompletionsRequest( }, ], }, - cost: { input: 0, output: 0, total: 0 }, }, logs: [], metadata: { @@ -223,30 +278,29 @@ async function executeChatCompletionsRequest( }, } as StreamingExecution - return streamingResult as StreamingExecution + return streamingResult } const initialCallTime = Date.now() - const originalToolChoice = payload.tool_choice - const forcedTools = preparedTools?.forcedTools || [] - let usedForcedTools: string[] = [] - - let currentResponse = await azureOpenAI.chat.completions.create(payload) + let currentResponse = await (azureOpenAI as any).responses.create(payload) const firstResponseTime = Date.now() - initialCallTime - let content = currentResponse.choices[0]?.message?.content || '' + let content = currentResponse.output_text || '' + const tokens = { - input: currentResponse.usage?.prompt_tokens || 0, - output: currentResponse.usage?.completion_tokens || 0, - total: currentResponse.usage?.total_tokens || 0, + prompt: currentResponse.usage?.input_tokens || 0, + completion: currentResponse.usage?.output_tokens || 0, + total: + (currentResponse.usage?.input_tokens || 0) + (currentResponse.usage?.output_tokens || 0), } - const toolCalls = [] - const toolResults = [] - const currentMessages = [...allMessages] + + const toolCalls: any[] = [] + const toolResults: any[] = [] let iterationCount = 0 + const MAX_ITERATIONS = 10 + let modelTime = firstResponseTime let toolsTime = 0 - let hasUsedForcedTool = false const timeSegments: TimeSegment[] = [ { @@ -258,168 +312,94 @@ async function executeChatCompletionsRequest( }, ] - const firstCheckResult = checkForForcedToolUsage( - currentResponse, - originalToolChoice, - logger, - forcedTools, - usedForcedTools - ) - hasUsedForcedTool = firstCheckResult.hasUsedForcedTool - usedForcedTools = firstCheckResult.usedForcedTools - - while (iterationCount < MAX_TOOL_ITERATIONS) { - if (currentResponse.choices[0]?.message?.content) { - content = currentResponse.choices[0].message.content - } + while (iterationCount < MAX_ITERATIONS) { + const toolCallsInResponse = + currentResponse.output?.filter((item: any) => item.type === 'function_call') || [] - const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls - if (!toolCallsInResponse || toolCallsInResponse.length === 0) { + if (toolCallsInResponse.length === 0) { break } logger.info( - `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_TOOL_ITERATIONS})` + `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})` ) const toolsStartTime = Date.now() - const toolExecutionPromises = toolCallsInResponse.map(async (toolCall) => { - const toolCallStartTime = Date.now() - const toolName = toolCall.function.name - + for (const toolCall of toolCallsInResponse) { try { - const toolArgs = JSON.parse(toolCall.function.arguments) - const tool = request.tools?.find((t) => t.id === toolName) + const toolName = toolCall.name + const toolArgs = + typeof toolCall.arguments === 'string' + ? JSON.parse(toolCall.arguments) + : toolCall.arguments - if (!tool) return null + const tool = request.tools?.find((t) => t.id === toolName) + if (!tool) continue + const toolCallStartTime = Date.now() const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request) - const result = await executeTool(toolName, executionParams) - const toolCallEndTime = Date.now() - return { - toolCall, - toolName, - toolParams, - result, - startTime: toolCallStartTime, - endTime: toolCallEndTime, - duration: toolCallEndTime - toolCallStartTime, - } - } catch (error) { + const result = await executeTool(toolName, executionParams, true) const toolCallEndTime = Date.now() - logger.error('Error processing tool call:', { error, toolName }) - - return { - toolCall, - toolName, - toolParams: {}, - result: { - success: false, - output: undefined, - error: error instanceof Error ? error.message : 'Tool execution failed', - }, + const toolCallDuration = toolCallEndTime - toolCallStartTime + + timeSegments.push({ + type: 'tool', + name: toolName, startTime: toolCallStartTime, endTime: toolCallEndTime, - duration: toolCallEndTime - toolCallStartTime, + duration: toolCallDuration, + }) + + let resultContent: any + if (result.success) { + toolResults.push(result.output) + resultContent = result.output + } else { + resultContent = { + error: true, + message: result.error || 'Tool execution failed', + tool: toolName, + } } - } - }) - - const executionResults = await Promise.allSettled(toolExecutionPromises) - - currentMessages.push({ - role: 'assistant', - content: null, - tool_calls: toolCallsInResponse.map((tc) => ({ - id: tc.id, - type: 'function', - function: { - name: tc.function.name, - arguments: tc.function.arguments, - }, - })), - }) - - for (const settledResult of executionResults) { - if (settledResult.status === 'rejected' || !settledResult.value) continue - - const { toolCall, toolName, toolParams, result, startTime, endTime, duration } = - settledResult.value - - timeSegments.push({ - type: 'tool', - name: toolName, - startTime: startTime, - endTime: endTime, - duration: duration, - }) - let resultContent: any - if (result.success) { - toolResults.push(result.output) - resultContent = result.output - } else { - resultContent = { - error: true, - message: result.error || 'Tool execution failed', - tool: toolName, - } + toolCalls.push({ + name: toolName, + arguments: toolParams, + startTime: new Date(toolCallStartTime).toISOString(), + endTime: new Date(toolCallEndTime).toISOString(), + duration: toolCallDuration, + result: resultContent, + success: result.success, + }) + + // Add function call output to input for next request + inputMessages.push({ + type: 'function_call_output', + call_id: toolCall.call_id || toolCall.id, + output: JSON.stringify(resultContent), + }) + } catch (error) { + logger.error('Error processing tool call:', { + error, + toolName: toolCall?.name, + }) } - - toolCalls.push({ - name: toolName, - arguments: toolParams, - startTime: new Date(startTime).toISOString(), - endTime: new Date(endTime).toISOString(), - duration: duration, - result: resultContent, - success: result.success, - }) - - currentMessages.push({ - role: 'tool', - tool_call_id: toolCall.id, - content: JSON.stringify(resultContent), - }) } const thisToolsTime = Date.now() - toolsStartTime toolsTime += thisToolsTime + // Make the next request + const nextModelStartTime = Date.now() const nextPayload = { ...payload, - messages: currentMessages, - } - - if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) { - const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool)) - - if (remainingTools.length > 0) { - nextPayload.tool_choice = { - type: 'function', - function: { name: remainingTools[0] }, - } - logger.info(`Forcing next tool: ${remainingTools[0]}`) - } else { - nextPayload.tool_choice = 'auto' - logger.info('All forced tools have been used, switching to auto tool_choice') - } + input: inputMessages, + tool_choice: 'auto', } - const nextModelStartTime = Date.now() - currentResponse = await azureOpenAI.chat.completions.create(nextPayload) - - const nextCheckResult = checkForForcedToolUsage( - currentResponse, - nextPayload.tool_choice, - logger, - forcedTools, - usedForcedTools - ) - hasUsedForcedTool = nextCheckResult.hasUsedForcedTool - usedForcedTools = nextCheckResult.usedForcedTools + currentResponse = await (azureOpenAI as any).responses.create(nextPayload) const nextModelEndTime = Date.now() const thisModelTime = nextModelEndTime - nextModelStartTime @@ -434,60 +414,45 @@ async function executeChatCompletionsRequest( modelTime += thisModelTime - if (currentResponse.choices[0]?.message?.content) { - content = currentResponse.choices[0].message.content + // Update content + if (currentResponse.output_text) { + content = currentResponse.output_text } + // Update token counts if (currentResponse.usage) { - tokens.input += currentResponse.usage.prompt_tokens || 0 - tokens.output += currentResponse.usage.completion_tokens || 0 - tokens.total += currentResponse.usage.total_tokens || 0 + tokens.prompt += currentResponse.usage.input_tokens || 0 + tokens.completion += currentResponse.usage.output_tokens || 0 + tokens.total = tokens.prompt + tokens.completion } iterationCount++ } + // Handle streaming for final response after tool processing if (request.stream) { - logger.info('Using streaming for final response after tool processing') + logger.info('Using streaming for final response after tool processing (Responses API)') - const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output) - - const streamingParams: ChatCompletionCreateParamsStreaming = { + const streamingPayload = { ...payload, - messages: currentMessages, + input: inputMessages, tool_choice: 'auto', stream: true, - stream_options: { include_usage: true }, } - const streamResponse = await azureOpenAI.chat.completions.create(streamingParams) + + const streamResponse = await (azureOpenAI as any).responses.create(streamingPayload) const streamingResult = { - stream: createReadableStreamFromAzureOpenAIStream(streamResponse, (content, usage) => { + stream: createReadableStreamFromResponsesApiStream(streamResponse, (content, usage) => { streamingResult.execution.output.content = content - streamingResult.execution.output.tokens = { - input: tokens.input + usage.prompt_tokens, - output: tokens.output + usage.completion_tokens, - total: tokens.total + usage.total_tokens, - } - - const streamCost = calculateCost( - request.model, - usage.prompt_tokens, - usage.completion_tokens - ) - streamingResult.execution.output.cost = { - input: accumulatedCost.input + streamCost.input, - output: accumulatedCost.output + streamCost.output, - total: accumulatedCost.total + streamCost.total, - } - const streamEndTime = Date.now() - const streamEndTimeISO = new Date(streamEndTime).toISOString() - - if (streamingResult.execution.output.providerTiming) { - streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO - streamingResult.execution.output.providerTiming.duration = - streamEndTime - providerStartTime + if (usage) { + streamingResult.execution.output.tokens = { + prompt: usage.input_tokens || tokens.prompt, + completion: usage.output_tokens || tokens.completion, + total: + (usage.input_tokens || tokens.prompt) + (usage.output_tokens || tokens.completion), + } } }), execution: { @@ -496,8 +461,8 @@ async function executeChatCompletionsRequest( content: '', model: request.model, tokens: { - input: tokens.input, - output: tokens.output, + prompt: tokens.prompt, + completion: tokens.completion, total: tokens.total, }, toolCalls: @@ -517,11 +482,6 @@ async function executeChatCompletionsRequest( iterations: iterationCount + 1, timeSegments: timeSegments, }, - cost: { - input: accumulatedCost.input, - output: accumulatedCost.output, - total: accumulatedCost.total, - }, }, logs: [], metadata: { @@ -532,9 +492,10 @@ async function executeChatCompletionsRequest( }, } as StreamingExecution - return streamingResult as StreamingExecution + return streamingResult } + // Calculate overall timing const providerEndTime = Date.now() const providerEndTimeISO = new Date(providerEndTime).toISOString() const totalDuration = providerEndTime - providerStartTime @@ -561,7 +522,7 @@ async function executeChatCompletionsRequest( const providerEndTimeISO = new Date(providerEndTime).toISOString() const totalDuration = providerEndTime - providerStartTime - logger.error('Error in Azure OpenAI chat completions request:', { + logger.error('Error in Responses API request:', { error, duration: totalDuration, }) @@ -592,7 +553,20 @@ export const azureOpenAIProvider: ProviderConfig = { executeRequest: async ( request: ProviderRequest ): Promise => { + logger.info('Preparing Azure OpenAI request', { + model: request.model || 'azure/gpt-4o', + hasSystemPrompt: !!request.systemPrompt, + hasMessages: !!request.messages?.length, + hasTools: !!request.tools?.length, + toolCount: request.tools?.length || 0, + hasResponseFormat: !!request.responseFormat, + stream: !!request.stream, + }) + + // Extract Azure-specific configuration from request or environment + // Priority: request parameters > environment variables const azureEndpoint = request.azureEndpoint || env.AZURE_OPENAI_ENDPOINT + const azureApiVersion = request.azureApiVersion || env.AZURE_OPENAI_API_VERSION || '2024-10-21' if (!azureEndpoint) { throw new Error( @@ -600,78 +574,533 @@ export const azureOpenAIProvider: ProviderConfig = { ) } - if (!request.apiKey) { - throw new Error('API key is required for Azure OpenAI') - } + // API key is now handled server-side before this function is called + const azureOpenAI = new AzureOpenAI({ + apiKey: request.apiKey, + apiVersion: azureApiVersion, + endpoint: azureEndpoint, + }) - // Check if the endpoint is a full chat completions URL - if (isChatCompletionsEndpoint(azureEndpoint)) { - logger.info('Detected chat completions endpoint URL') + // Build deployment name - use deployment name instead of model name + const deploymentName = (request.model || 'azure/gpt-4o').replace('azure/', '') - // Extract the base URL for the SDK (it needs just the host, not the full path) - const baseUrl = extractBaseUrl(azureEndpoint) + // Start execution timer for the entire provider execution + const providerStartTime = Date.now() + const providerStartTimeISO = new Date(providerStartTime).toISOString() - // Try to extract deployment from URL, fall back to model name - const urlDeployment = extractDeploymentFromUrl(azureEndpoint) - const deploymentName = urlDeployment || request.model.replace('azure/', '') + // Check if we should use the Responses API (2025+ versions) + if (useResponsesApi(azureApiVersion)) { + logger.info('Using Responses API for Azure OpenAI request', { + apiVersion: azureApiVersion, + model: deploymentName, + }) + return executeWithResponsesApi( + azureOpenAI, + request, + deploymentName, + providerStartTime, + providerStartTimeISO + ) + } - // Try to extract api-version from URL, fall back to request param or env or default - const urlApiVersion = extractApiVersionFromUrl(azureEndpoint) - const azureApiVersion = - urlApiVersion || - request.azureApiVersion || - env.AZURE_OPENAI_API_VERSION || - '2024-07-01-preview' + // Continue with Chat Completions API for 2024 and earlier versions + logger.info('Using Chat Completions API for Azure OpenAI request', { + apiVersion: azureApiVersion, + model: deploymentName, + }) - logger.info('Chat completions configuration:', { - originalEndpoint: azureEndpoint, - baseUrl, - deploymentName, - apiVersion: azureApiVersion, + // Start with an empty array for all messages + const allMessages = [] + + // Add system prompt if present + if (request.systemPrompt) { + allMessages.push({ + role: 'system', + content: request.systemPrompt, + }) + } + + // Add context if present + if (request.context) { + allMessages.push({ + role: 'user', + content: request.context, }) + } - return executeChatCompletionsRequest(request, baseUrl, azureApiVersion, deploymentName) + // Add remaining messages + if (request.messages) { + allMessages.push(...request.messages) } - // Check if the endpoint is already a full responses API URL - if (isResponsesEndpoint(azureEndpoint)) { - logger.info('Detected full responses endpoint URL, using it directly') - - const deploymentName = request.model.replace('azure/', '') - - // Use the URL as-is since it's already complete - return executeResponsesProviderRequest(request, { - providerId: 'azure-openai', - providerLabel: 'Azure OpenAI', - modelName: deploymentName, - endpoint: azureEndpoint, - headers: { - 'Content-Type': 'application/json', - 'OpenAI-Beta': 'responses=v1', - 'api-key': request.apiKey, + // Transform tools to Azure OpenAI format if provided + const tools = request.tools?.length + ? request.tools.map((tool) => ({ + type: 'function', + function: { + name: tool.id, + description: tool.description, + parameters: tool.parameters, + }, + })) + : undefined + + // Build the request payload + const payload: any = { + model: deploymentName, // Azure OpenAI uses deployment name + messages: allMessages, + } + + // Add optional parameters + if (request.temperature !== undefined) payload.temperature = request.temperature + if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens + + // Add GPT-5 specific parameters + if (request.reasoningEffort !== undefined) payload.reasoning_effort = request.reasoningEffort + if (request.verbosity !== undefined) payload.verbosity = request.verbosity + + // Add response format for structured output if specified + if (request.responseFormat) { + // Use Azure OpenAI's JSON schema format + payload.response_format = { + type: 'json_schema', + json_schema: { + name: request.responseFormat.name || 'response_schema', + schema: request.responseFormat.schema || request.responseFormat, + strict: request.responseFormat.strict !== false, }, - logger, - }) + } + + logger.info('Added JSON schema response format to Azure OpenAI request') } - // Default: base URL provided, construct the responses API URL - logger.info('Using base endpoint, constructing Responses API URL') - const azureApiVersion = - request.azureApiVersion || env.AZURE_OPENAI_API_VERSION || '2024-07-01-preview' - const deploymentName = request.model.replace('azure/', '') - const apiUrl = `${azureEndpoint.replace(/\/$/, '')}/openai/v1/responses?api-version=${azureApiVersion}` - - return executeResponsesProviderRequest(request, { - providerId: 'azure-openai', - providerLabel: 'Azure OpenAI', - modelName: deploymentName, - endpoint: apiUrl, - headers: { - 'Content-Type': 'application/json', - 'OpenAI-Beta': 'responses=v1', - 'api-key': request.apiKey, - }, - logger, - }) + // Handle tools and tool usage control + let preparedTools: ReturnType | null = null + + if (tools?.length) { + preparedTools = prepareToolsWithUsageControl(tools, request.tools, logger, 'azure-openai') + const { tools: filteredTools, toolChoice } = preparedTools + + if (filteredTools?.length && toolChoice) { + payload.tools = filteredTools + payload.tool_choice = toolChoice + + logger.info('Azure OpenAI request configuration:', { + toolCount: filteredTools.length, + toolChoice: + typeof toolChoice === 'string' + ? toolChoice + : toolChoice.type === 'function' + ? `force:${toolChoice.function.name}` + : toolChoice.type === 'tool' + ? `force:${toolChoice.name}` + : toolChoice.type === 'any' + ? `force:${toolChoice.any?.name || 'unknown'}` + : 'unknown', + model: deploymentName, + }) + } + } + + try { + if (request.stream && (!tools || tools.length === 0)) { + logger.info('Using streaming response for Azure OpenAI request') + + const streamResponse = await azureOpenAI.chat.completions.create({ + ...payload, + stream: true, + stream_options: { include_usage: true }, + }) + + const tokenUsage = { + prompt: 0, + completion: 0, + total: 0, + } + + let _streamContent = '' + + const streamingResult = { + stream: createReadableStreamFromChatCompletionsStream( + streamResponse, + (content, usage) => { + _streamContent = content + streamingResult.execution.output.content = content + + const streamEndTime = Date.now() + const streamEndTimeISO = new Date(streamEndTime).toISOString() + + if (streamingResult.execution.output.providerTiming) { + streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO + streamingResult.execution.output.providerTiming.duration = + streamEndTime - providerStartTime + + if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) { + streamingResult.execution.output.providerTiming.timeSegments[0].endTime = + streamEndTime + streamingResult.execution.output.providerTiming.timeSegments[0].duration = + streamEndTime - providerStartTime + } + } + + if (usage) { + const newTokens = { + prompt: usage.prompt_tokens || tokenUsage.prompt, + completion: usage.completion_tokens || tokenUsage.completion, + total: usage.total_tokens || tokenUsage.total, + } + + streamingResult.execution.output.tokens = newTokens + } + } + ), + execution: { + success: true, + output: { + content: '', + model: request.model, + tokens: tokenUsage, + toolCalls: undefined, + providerTiming: { + startTime: providerStartTimeISO, + endTime: new Date().toISOString(), + duration: Date.now() - providerStartTime, + timeSegments: [ + { + type: 'model', + name: 'Streaming response', + startTime: providerStartTime, + endTime: Date.now(), + duration: Date.now() - providerStartTime, + }, + ], + }, + }, + logs: [], + metadata: { + startTime: providerStartTimeISO, + endTime: new Date().toISOString(), + duration: Date.now() - providerStartTime, + }, + }, + } as StreamingExecution + + return streamingResult as StreamingExecution + } + + const initialCallTime = Date.now() + + const originalToolChoice = payload.tool_choice + + const forcedTools = preparedTools?.forcedTools || [] + let usedForcedTools: string[] = [] + + const checkForForcedToolUsage = ( + response: any, + toolChoice: string | { type: string; function?: { name: string }; name?: string; any?: any } + ) => { + if (typeof toolChoice === 'object' && response.choices[0]?.message?.tool_calls) { + const toolCallsResponse = response.choices[0].message.tool_calls + const result = trackForcedToolUsage( + toolCallsResponse, + toolChoice, + logger, + 'azure-openai', + forcedTools, + usedForcedTools + ) + hasUsedForcedTool = result.hasUsedForcedTool + usedForcedTools = result.usedForcedTools + } + } + + let currentResponse = await azureOpenAI.chat.completions.create(payload) + const firstResponseTime = Date.now() - initialCallTime + + let content = currentResponse.choices[0]?.message?.content || '' + const tokens = { + prompt: currentResponse.usage?.prompt_tokens || 0, + completion: currentResponse.usage?.completion_tokens || 0, + total: currentResponse.usage?.total_tokens || 0, + } + const toolCalls = [] + const toolResults = [] + const currentMessages = [...allMessages] + let iterationCount = 0 + const MAX_ITERATIONS = 10 + + let modelTime = firstResponseTime + let toolsTime = 0 + + let hasUsedForcedTool = false + + const timeSegments: TimeSegment[] = [ + { + type: 'model', + name: 'Initial response', + startTime: initialCallTime, + endTime: initialCallTime + firstResponseTime, + duration: firstResponseTime, + }, + ] + + checkForForcedToolUsage(currentResponse, originalToolChoice) + + while (iterationCount < MAX_ITERATIONS) { + const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls + if (!toolCallsInResponse || toolCallsInResponse.length === 0) { + break + } + + logger.info( + `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})` + ) + + const toolsStartTime = Date.now() + + for (const toolCall of toolCallsInResponse) { + try { + const toolName = toolCall.function.name + const toolArgs = JSON.parse(toolCall.function.arguments) + + const tool = request.tools?.find((t) => t.id === toolName) + if (!tool) continue + + const toolCallStartTime = Date.now() + + const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request) + + const result = await executeTool(toolName, executionParams, true) + const toolCallEndTime = Date.now() + const toolCallDuration = toolCallEndTime - toolCallStartTime + + timeSegments.push({ + type: 'tool', + name: toolName, + startTime: toolCallStartTime, + endTime: toolCallEndTime, + duration: toolCallDuration, + }) + + let resultContent: any + if (result.success) { + toolResults.push(result.output) + resultContent = result.output + } else { + resultContent = { + error: true, + message: result.error || 'Tool execution failed', + tool: toolName, + } + } + + toolCalls.push({ + name: toolName, + arguments: toolParams, + startTime: new Date(toolCallStartTime).toISOString(), + endTime: new Date(toolCallEndTime).toISOString(), + duration: toolCallDuration, + result: resultContent, + success: result.success, + }) + + currentMessages.push({ + role: 'assistant', + content: null, + tool_calls: [ + { + id: toolCall.id, + type: 'function', + function: { + name: toolName, + arguments: toolCall.function.arguments, + }, + }, + ], + }) + + currentMessages.push({ + role: 'tool', + tool_call_id: toolCall.id, + content: JSON.stringify(resultContent), + }) + } catch (error) { + logger.error('Error processing tool call:', { + error, + toolName: toolCall?.function?.name, + }) + } + } + + const thisToolsTime = Date.now() - toolsStartTime + toolsTime += thisToolsTime + + const nextPayload = { + ...payload, + messages: currentMessages, + } + + if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) { + const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool)) + + if (remainingTools.length > 0) { + nextPayload.tool_choice = { + type: 'function', + function: { name: remainingTools[0] }, + } + logger.info(`Forcing next tool: ${remainingTools[0]}`) + } else { + nextPayload.tool_choice = 'auto' + logger.info('All forced tools have been used, switching to auto tool_choice') + } + } + + const nextModelStartTime = Date.now() + + currentResponse = await azureOpenAI.chat.completions.create(nextPayload) + + checkForForcedToolUsage(currentResponse, nextPayload.tool_choice) + + const nextModelEndTime = Date.now() + const thisModelTime = nextModelEndTime - nextModelStartTime + + timeSegments.push({ + type: 'model', + name: `Model response (iteration ${iterationCount + 1})`, + startTime: nextModelStartTime, + endTime: nextModelEndTime, + duration: thisModelTime, + }) + + modelTime += thisModelTime + + if (currentResponse.choices[0]?.message?.content) { + content = currentResponse.choices[0].message.content + } + + if (currentResponse.usage) { + tokens.prompt += currentResponse.usage.prompt_tokens || 0 + tokens.completion += currentResponse.usage.completion_tokens || 0 + tokens.total += currentResponse.usage.total_tokens || 0 + } + + iterationCount++ + } + + if (request.stream) { + logger.info('Using streaming for final response after tool processing') + + const streamingPayload = { + ...payload, + messages: currentMessages, + tool_choice: 'auto', + stream: true, + stream_options: { include_usage: true }, + } + + const streamResponse = await azureOpenAI.chat.completions.create(streamingPayload) + + let _streamContent = '' + + const streamingResult = { + stream: createReadableStreamFromChatCompletionsStream( + streamResponse, + (content, usage) => { + _streamContent = content + streamingResult.execution.output.content = content + + if (usage) { + const newTokens = { + prompt: usage.prompt_tokens || tokens.prompt, + completion: usage.completion_tokens || tokens.completion, + total: usage.total_tokens || tokens.total, + } + + streamingResult.execution.output.tokens = newTokens + } + } + ), + execution: { + success: true, + output: { + content: '', + model: request.model, + tokens: { + prompt: tokens.prompt, + completion: tokens.completion, + total: tokens.total, + }, + toolCalls: + toolCalls.length > 0 + ? { + list: toolCalls, + count: toolCalls.length, + } + : undefined, + providerTiming: { + startTime: providerStartTimeISO, + endTime: new Date().toISOString(), + duration: Date.now() - providerStartTime, + modelTime: modelTime, + toolsTime: toolsTime, + firstResponseTime: firstResponseTime, + iterations: iterationCount + 1, + timeSegments: timeSegments, + }, + // Cost will be calculated in logger + }, + logs: [], // No block logs at provider level + metadata: { + startTime: providerStartTimeISO, + endTime: new Date().toISOString(), + duration: Date.now() - providerStartTime, + }, + }, + } as StreamingExecution + + return streamingResult as StreamingExecution + } + + const providerEndTime = Date.now() + const providerEndTimeISO = new Date(providerEndTime).toISOString() + const totalDuration = providerEndTime - providerStartTime + + return { + content, + model: request.model, + tokens, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + toolResults: toolResults.length > 0 ? toolResults : undefined, + timing: { + startTime: providerStartTimeISO, + endTime: providerEndTimeISO, + duration: totalDuration, + modelTime: modelTime, + toolsTime: toolsTime, + firstResponseTime: firstResponseTime, + iterations: iterationCount + 1, + timeSegments: timeSegments, + }, + } + } catch (error) { + const providerEndTime = Date.now() + const providerEndTimeISO = new Date(providerEndTime).toISOString() + const totalDuration = providerEndTime - providerStartTime + + logger.error('Error in Azure OpenAI request:', { + error, + duration: totalDuration, + }) + + const enhancedError = new Error(error instanceof Error ? error.message : String(error)) + // @ts-ignore - Adding timing property to the error + enhancedError.timing = { + startTime: providerStartTimeISO, + endTime: providerEndTimeISO, + duration: totalDuration, + } + + throw enhancedError + } }, }