From d222e924b10fb0df06445597afc00d50d6e13435 Mon Sep 17 00:00:00 2001 From: waleed Date: Tue, 16 Dec 2025 21:18:40 -0800 Subject: [PATCH 1/2] fix(azure): conditionally added responses api --- apps/sim/providers/azure-openai/index.ts | 668 +++++++++++++++++++---- 1 file changed, 562 insertions(+), 106 deletions(-) diff --git a/apps/sim/providers/azure-openai/index.ts b/apps/sim/providers/azure-openai/index.ts index fd4f71a563..c265869207 100644 --- a/apps/sim/providers/azure-openai/index.ts +++ b/apps/sim/providers/azure-openai/index.ts @@ -18,11 +18,70 @@ import { executeTool } from '@/tools' const logger = createLogger('AzureOpenAIProvider') +/** + * Determines if the API version uses the Responses API (2025+) or Chat Completions API + */ +function useResponsesApi(apiVersion: string): boolean { + // 2025-* versions use the Responses API + // 2024-* and earlier versions use the Chat Completions API + return apiVersion.startsWith('2025-') +} + +/** + * Helper function to convert an Azure OpenAI Responses API stream to a standard ReadableStream + * and collect completion metrics + */ +function createReadableStreamFromResponsesApiStream( + responsesStream: any, + onComplete?: (content: string, usage?: any) => void +): ReadableStream { + let fullContent = '' + let usageData: any = null + + return new ReadableStream({ + async start(controller) { + try { + for await (const event of responsesStream) { + if (event.usage) { + usageData = event.usage + } + + if (event.type === 'response.output_text.delta') { + const content = event.delta || '' + if (content) { + fullContent += content + controller.enqueue(new TextEncoder().encode(content)) + } + } else if (event.type === 'response.content_part.delta') { + const content = event.delta?.text || '' + if (content) { + fullContent += content + controller.enqueue(new TextEncoder().encode(content)) + } + } else if (event.type === 'response.completed' || event.type === 'response.done') { + if (event.response?.usage) { + usageData = event.response.usage + } + } + } + + if (onComplete) { + onComplete(fullContent, usageData) + } + + controller.close() + } catch (error) { + controller.error(error) + } + }, + }) +} + /** * Helper function to convert an Azure OpenAI stream to a standard ReadableStream * and collect completion metrics */ -function createReadableStreamFromAzureOpenAIStream( +function createReadableStreamFromChatCompletionsStream( azureOpenAIStream: any, onComplete?: (content: string, usage?: any) => void ): ReadableStream { @@ -33,7 +92,6 @@ function createReadableStreamFromAzureOpenAIStream( async start(controller) { try { for await (const chunk of azureOpenAIStream) { - // Check for usage data in the final chunk if (chunk.usage) { usageData = chunk.usage } @@ -45,7 +103,6 @@ function createReadableStreamFromAzureOpenAIStream( } } - // Once stream is complete, call the completion callback with the final content and usage if (onComplete) { onComplete(fullContent, usageData) } @@ -58,6 +115,430 @@ function createReadableStreamFromAzureOpenAIStream( }) } +/** + * Executes a request using the Responses API (for 2025+ API versions) + */ +async function executeWithResponsesApi( + azureOpenAI: AzureOpenAI, + request: ProviderRequest, + deploymentName: string, + providerStartTime: number, + providerStartTimeISO: string +): Promise { + const inputMessages: any[] = [] + + if (request.context) { + inputMessages.push({ + role: 'user', + content: request.context, + }) + } + + if (request.messages) { + inputMessages.push(...request.messages) + } + + const tools = request.tools?.length + ? request.tools.map((tool) => ({ + type: 'function' as const, + function: { + name: tool.id, + description: tool.description, + parameters: tool.parameters, + }, + })) + : undefined + + const payload: any = { + model: deploymentName, + input: inputMessages.length > 0 ? inputMessages : request.systemPrompt || '', + } + + if (request.systemPrompt) { + payload.instructions = request.systemPrompt + } + + if (request.temperature !== undefined) payload.temperature = request.temperature + if (request.maxTokens !== undefined) payload.max_output_tokens = request.maxTokens + + if (request.reasoningEffort !== undefined) { + payload.reasoning = { effort: request.reasoningEffort } + } + + if (request.responseFormat) { + payload.text = { + format: { + type: 'json_schema', + json_schema: { + name: request.responseFormat.name || 'response_schema', + schema: request.responseFormat.schema || request.responseFormat, + strict: request.responseFormat.strict !== false, + }, + }, + } + logger.info('Added JSON schema text format to Responses API request') + } + + if (tools?.length) { + payload.tools = tools + + const forcedTools = request.tools?.filter((t) => t.usageControl === 'force') || [] + if (forcedTools.length > 0) { + if (forcedTools.length === 1) { + payload.tool_choice = { + type: 'function', + function: { name: forcedTools[0].id }, + } + } else { + payload.tool_choice = 'required' + } + } else { + payload.tool_choice = 'auto' + } + + logger.info('Responses API request configuration:', { + toolCount: tools.length, + model: deploymentName, + }) + } + + try { + if (request.stream && (!tools || tools.length === 0)) { + logger.info('Using streaming response for Responses API request') + + const streamResponse = await (azureOpenAI as any).responses.create({ + ...payload, + stream: true, + }) + + const tokenUsage = { + prompt: 0, + completion: 0, + total: 0, + } + + const streamingResult = { + stream: createReadableStreamFromResponsesApiStream(streamResponse, (content, usage) => { + streamingResult.execution.output.content = content + + const streamEndTime = Date.now() + const streamEndTimeISO = new Date(streamEndTime).toISOString() + + if (streamingResult.execution.output.providerTiming) { + streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO + streamingResult.execution.output.providerTiming.duration = + streamEndTime - providerStartTime + + if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) { + streamingResult.execution.output.providerTiming.timeSegments[0].endTime = + streamEndTime + streamingResult.execution.output.providerTiming.timeSegments[0].duration = + streamEndTime - providerStartTime + } + } + + if (usage) { + streamingResult.execution.output.tokens = { + prompt: usage.input_tokens || usage.prompt_tokens || 0, + completion: usage.output_tokens || usage.completion_tokens || 0, + total: + (usage.input_tokens || usage.prompt_tokens || 0) + + (usage.output_tokens || usage.completion_tokens || 0), + } + } + }), + execution: { + success: true, + output: { + content: '', + model: request.model, + tokens: tokenUsage, + toolCalls: undefined, + providerTiming: { + startTime: providerStartTimeISO, + endTime: new Date().toISOString(), + duration: Date.now() - providerStartTime, + timeSegments: [ + { + type: 'model', + name: 'Streaming response', + startTime: providerStartTime, + endTime: Date.now(), + duration: Date.now() - providerStartTime, + }, + ], + }, + }, + logs: [], + metadata: { + startTime: providerStartTimeISO, + endTime: new Date().toISOString(), + duration: Date.now() - providerStartTime, + }, + }, + } as StreamingExecution + + return streamingResult + } + + const initialCallTime = Date.now() + let currentResponse = await (azureOpenAI as any).responses.create(payload) + const firstResponseTime = Date.now() - initialCallTime + + let content = currentResponse.output_text || '' + + const tokens = { + prompt: currentResponse.usage?.input_tokens || 0, + completion: currentResponse.usage?.output_tokens || 0, + total: + (currentResponse.usage?.input_tokens || 0) + (currentResponse.usage?.output_tokens || 0), + } + + const toolCalls: any[] = [] + const toolResults: any[] = [] + let iterationCount = 0 + const MAX_ITERATIONS = 10 + + let modelTime = firstResponseTime + let toolsTime = 0 + + const timeSegments: TimeSegment[] = [ + { + type: 'model', + name: 'Initial response', + startTime: initialCallTime, + endTime: initialCallTime + firstResponseTime, + duration: firstResponseTime, + }, + ] + + while (iterationCount < MAX_ITERATIONS) { + const toolCallsInResponse = + currentResponse.output?.filter((item: any) => item.type === 'function_call') || [] + + if (toolCallsInResponse.length === 0) { + break + } + + logger.info( + `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})` + ) + + const toolsStartTime = Date.now() + + for (const toolCall of toolCallsInResponse) { + try { + const toolName = toolCall.name + const toolArgs = + typeof toolCall.arguments === 'string' + ? JSON.parse(toolCall.arguments) + : toolCall.arguments + + const tool = request.tools?.find((t) => t.id === toolName) + if (!tool) continue + + const toolCallStartTime = Date.now() + const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request) + + const result = await executeTool(toolName, executionParams, true) + const toolCallEndTime = Date.now() + const toolCallDuration = toolCallEndTime - toolCallStartTime + + timeSegments.push({ + type: 'tool', + name: toolName, + startTime: toolCallStartTime, + endTime: toolCallEndTime, + duration: toolCallDuration, + }) + + let resultContent: any + if (result.success) { + toolResults.push(result.output) + resultContent = result.output + } else { + resultContent = { + error: true, + message: result.error || 'Tool execution failed', + tool: toolName, + } + } + + toolCalls.push({ + name: toolName, + arguments: toolParams, + startTime: new Date(toolCallStartTime).toISOString(), + endTime: new Date(toolCallEndTime).toISOString(), + duration: toolCallDuration, + result: resultContent, + success: result.success, + }) + + // Add function call output to input for next request + inputMessages.push({ + type: 'function_call_output', + call_id: toolCall.call_id || toolCall.id, + output: JSON.stringify(resultContent), + }) + } catch (error) { + logger.error('Error processing tool call:', { + error, + toolName: toolCall?.name, + }) + } + } + + const thisToolsTime = Date.now() - toolsStartTime + toolsTime += thisToolsTime + + // Make the next request + const nextModelStartTime = Date.now() + const nextPayload = { + ...payload, + input: inputMessages, + tool_choice: 'auto', + } + + currentResponse = await (azureOpenAI as any).responses.create(nextPayload) + + const nextModelEndTime = Date.now() + const thisModelTime = nextModelEndTime - nextModelStartTime + + timeSegments.push({ + type: 'model', + name: `Model response (iteration ${iterationCount + 1})`, + startTime: nextModelStartTime, + endTime: nextModelEndTime, + duration: thisModelTime, + }) + + modelTime += thisModelTime + + // Update content + if (currentResponse.output_text) { + content = currentResponse.output_text + } + + // Update token counts + if (currentResponse.usage) { + tokens.prompt += currentResponse.usage.input_tokens || 0 + tokens.completion += currentResponse.usage.output_tokens || 0 + tokens.total = tokens.prompt + tokens.completion + } + + iterationCount++ + } + + // Handle streaming for final response after tool processing + if (request.stream) { + logger.info('Using streaming for final response after tool processing (Responses API)') + + const streamingPayload = { + ...payload, + input: inputMessages, + tool_choice: 'auto', + stream: true, + } + + const streamResponse = await (azureOpenAI as any).responses.create(streamingPayload) + + const streamingResult = { + stream: createReadableStreamFromResponsesApiStream(streamResponse, (content, usage) => { + streamingResult.execution.output.content = content + + if (usage) { + streamingResult.execution.output.tokens = { + prompt: usage.input_tokens || tokens.prompt, + completion: usage.output_tokens || tokens.completion, + total: + (usage.input_tokens || tokens.prompt) + (usage.output_tokens || tokens.completion), + } + } + }), + execution: { + success: true, + output: { + content: '', + model: request.model, + tokens: { + prompt: tokens.prompt, + completion: tokens.completion, + total: tokens.total, + }, + toolCalls: + toolCalls.length > 0 + ? { + list: toolCalls, + count: toolCalls.length, + } + : undefined, + providerTiming: { + startTime: providerStartTimeISO, + endTime: new Date().toISOString(), + duration: Date.now() - providerStartTime, + modelTime: modelTime, + toolsTime: toolsTime, + firstResponseTime: firstResponseTime, + iterations: iterationCount + 1, + timeSegments: timeSegments, + }, + }, + logs: [], + metadata: { + startTime: providerStartTimeISO, + endTime: new Date().toISOString(), + duration: Date.now() - providerStartTime, + }, + }, + } as StreamingExecution + + return streamingResult + } + + // Calculate overall timing + const providerEndTime = Date.now() + const providerEndTimeISO = new Date(providerEndTime).toISOString() + const totalDuration = providerEndTime - providerStartTime + + return { + content, + model: request.model, + tokens, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + toolResults: toolResults.length > 0 ? toolResults : undefined, + timing: { + startTime: providerStartTimeISO, + endTime: providerEndTimeISO, + duration: totalDuration, + modelTime: modelTime, + toolsTime: toolsTime, + firstResponseTime: firstResponseTime, + iterations: iterationCount + 1, + timeSegments: timeSegments, + }, + } + } catch (error) { + const providerEndTime = Date.now() + const providerEndTimeISO = new Date(providerEndTime).toISOString() + const totalDuration = providerEndTime - providerStartTime + + logger.error('Error in Responses API request:', { + error, + duration: totalDuration, + }) + + const enhancedError = new Error(error instanceof Error ? error.message : String(error)) + // @ts-ignore - Adding timing property to the error + enhancedError.timing = { + startTime: providerStartTimeISO, + endTime: providerEndTimeISO, + duration: totalDuration, + } + + throw enhancedError + } +} + /** * Azure OpenAI provider configuration */ @@ -85,8 +566,7 @@ export const azureOpenAIProvider: ProviderConfig = { // Extract Azure-specific configuration from request or environment // Priority: request parameters > environment variables const azureEndpoint = request.azureEndpoint || env.AZURE_OPENAI_ENDPOINT - const azureApiVersion = - request.azureApiVersion || env.AZURE_OPENAI_API_VERSION || '2024-07-01-preview' + const azureApiVersion = request.azureApiVersion || env.AZURE_OPENAI_API_VERSION || '2024-10-21' if (!azureEndpoint) { throw new Error( @@ -101,6 +581,34 @@ export const azureOpenAIProvider: ProviderConfig = { endpoint: azureEndpoint, }) + // Build deployment name - use deployment name instead of model name + const deploymentName = (request.model || 'azure/gpt-4o').replace('azure/', '') + + // Start execution timer for the entire provider execution + const providerStartTime = Date.now() + const providerStartTimeISO = new Date(providerStartTime).toISOString() + + // Check if we should use the Responses API (2025+ versions) + if (useResponsesApi(azureApiVersion)) { + logger.info('Using Responses API for Azure OpenAI request', { + apiVersion: azureApiVersion, + model: deploymentName, + }) + return executeWithResponsesApi( + azureOpenAI, + request, + deploymentName, + providerStartTime, + providerStartTimeISO + ) + } + + // Continue with Chat Completions API for 2024 and earlier versions + logger.info('Using Chat Completions API for Azure OpenAI request', { + apiVersion: azureApiVersion, + model: deploymentName, + }) + // Start with an empty array for all messages const allMessages = [] @@ -137,8 +645,7 @@ export const azureOpenAIProvider: ProviderConfig = { })) : undefined - // Build the request payload - use deployment name instead of model name - const deploymentName = (request.model || 'azure/gpt-4o').replace('azure/', '') + // Build the request payload const payload: any = { model: deploymentName, // Azure OpenAI uses deployment name messages: allMessages, @@ -195,23 +702,16 @@ export const azureOpenAIProvider: ProviderConfig = { } } - // Start execution timer for the entire provider execution - const providerStartTime = Date.now() - const providerStartTimeISO = new Date(providerStartTime).toISOString() - try { - // Check if we can stream directly (no tools required) if (request.stream && (!tools || tools.length === 0)) { logger.info('Using streaming response for Azure OpenAI request') - // Create a streaming request with token usage tracking const streamResponse = await azureOpenAI.chat.completions.create({ ...payload, stream: true, stream_options: { include_usage: true }, }) - // Start collecting token usage from the stream const tokenUsage = { prompt: 0, completion: 0, @@ -220,47 +720,44 @@ export const azureOpenAIProvider: ProviderConfig = { let _streamContent = '' - // Create a StreamingExecution response with a callback to update content and tokens const streamingResult = { - stream: createReadableStreamFromAzureOpenAIStream(streamResponse, (content, usage) => { - // Update the execution data with the final content and token usage - _streamContent = content - streamingResult.execution.output.content = content - - // Update the timing information with the actual completion time - const streamEndTime = Date.now() - const streamEndTimeISO = new Date(streamEndTime).toISOString() - - if (streamingResult.execution.output.providerTiming) { - streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO - streamingResult.execution.output.providerTiming.duration = - streamEndTime - providerStartTime - - // Update the time segment as well - if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) { - streamingResult.execution.output.providerTiming.timeSegments[0].endTime = - streamEndTime - streamingResult.execution.output.providerTiming.timeSegments[0].duration = + stream: createReadableStreamFromChatCompletionsStream( + streamResponse, + (content, usage) => { + _streamContent = content + streamingResult.execution.output.content = content + + const streamEndTime = Date.now() + const streamEndTimeISO = new Date(streamEndTime).toISOString() + + if (streamingResult.execution.output.providerTiming) { + streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO + streamingResult.execution.output.providerTiming.duration = streamEndTime - providerStartTime - } - } - // Update token usage if available from the stream - if (usage) { - const newTokens = { - prompt: usage.prompt_tokens || tokenUsage.prompt, - completion: usage.completion_tokens || tokenUsage.completion, - total: usage.total_tokens || tokenUsage.total, + if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) { + streamingResult.execution.output.providerTiming.timeSegments[0].endTime = + streamEndTime + streamingResult.execution.output.providerTiming.timeSegments[0].duration = + streamEndTime - providerStartTime + } } - streamingResult.execution.output.tokens = newTokens + if (usage) { + const newTokens = { + prompt: usage.prompt_tokens || tokenUsage.prompt, + completion: usage.completion_tokens || tokenUsage.completion, + total: usage.total_tokens || tokenUsage.total, + } + + streamingResult.execution.output.tokens = newTokens + } } - // We don't need to estimate tokens here as logger.ts will handle that - }), + ), execution: { success: true, output: { - content: '', // Will be filled by the stream completion callback + content: '', model: request.model, tokens: tokenUsage, toolCalls: undefined, @@ -278,9 +775,8 @@ export const azureOpenAIProvider: ProviderConfig = { }, ], }, - // Cost will be calculated in logger }, - logs: [], // No block logs for direct streaming + logs: [], metadata: { startTime: providerStartTimeISO, endTime: new Date().toISOString(), @@ -289,21 +785,16 @@ export const azureOpenAIProvider: ProviderConfig = { }, } as StreamingExecution - // Return the streaming execution object with explicit casting return streamingResult as StreamingExecution } - // Make the initial API request const initialCallTime = Date.now() - // Track the original tool_choice for forced tool tracking const originalToolChoice = payload.tool_choice - // Track forced tools and their usage const forcedTools = preparedTools?.forcedTools || [] let usedForcedTools: string[] = [] - // Helper function to check for forced tool usage in responses const checkForForcedToolUsage = ( response: any, toolChoice: string | { type: string; function?: { name: string }; name?: string; any?: any } @@ -327,7 +818,6 @@ export const azureOpenAIProvider: ProviderConfig = { const firstResponseTime = Date.now() - initialCallTime let content = currentResponse.choices[0]?.message?.content || '' - // Collect token information but don't calculate costs - that will be done in logger.ts const tokens = { prompt: currentResponse.usage?.prompt_tokens || 0, completion: currentResponse.usage?.completion_tokens || 0, @@ -337,16 +827,13 @@ export const azureOpenAIProvider: ProviderConfig = { const toolResults = [] const currentMessages = [...allMessages] let iterationCount = 0 - const MAX_ITERATIONS = 10 // Prevent infinite loops + const MAX_ITERATIONS = 10 - // Track time spent in model vs tools let modelTime = firstResponseTime let toolsTime = 0 - // Track if a forced tool has been used let hasUsedForcedTool = false - // Track each model and tool call segment with timestamps const timeSegments: TimeSegment[] = [ { type: 'model', @@ -357,11 +844,9 @@ export const azureOpenAIProvider: ProviderConfig = { }, ] - // Check if a forced tool was used in the first response checkForForcedToolUsage(currentResponse, originalToolChoice) while (iterationCount < MAX_ITERATIONS) { - // Check for tool calls const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls if (!toolCallsInResponse || toolCallsInResponse.length === 0) { break @@ -371,20 +856,16 @@ export const azureOpenAIProvider: ProviderConfig = { `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})` ) - // Track time for tool calls in this batch const toolsStartTime = Date.now() - // Process each tool call for (const toolCall of toolCallsInResponse) { try { const toolName = toolCall.function.name const toolArgs = JSON.parse(toolCall.function.arguments) - // Get the tool from the tools registry const tool = request.tools?.find((t) => t.id === toolName) if (!tool) continue - // Execute the tool const toolCallStartTime = Date.now() const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request) @@ -393,7 +874,6 @@ export const azureOpenAIProvider: ProviderConfig = { const toolCallEndTime = Date.now() const toolCallDuration = toolCallEndTime - toolCallStartTime - // Add to time segments for both success and failure timeSegments.push({ type: 'tool', name: toolName, @@ -402,13 +882,11 @@ export const azureOpenAIProvider: ProviderConfig = { duration: toolCallDuration, }) - // Prepare result content for the LLM let resultContent: any if (result.success) { toolResults.push(result.output) resultContent = result.output } else { - // Include error information so LLM can respond appropriately resultContent = { error: true, message: result.error || 'Tool execution failed', @@ -426,7 +904,6 @@ export const azureOpenAIProvider: ProviderConfig = { success: result.success, }) - // Add the tool call and result to messages (both success and failure) currentMessages.push({ role: 'assistant', content: null, @@ -455,48 +932,38 @@ export const azureOpenAIProvider: ProviderConfig = { } } - // Calculate tool call time for this iteration const thisToolsTime = Date.now() - toolsStartTime toolsTime += thisToolsTime - // Make the next request with updated messages const nextPayload = { ...payload, messages: currentMessages, } - // Update tool_choice based on which forced tools have been used if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) { - // If we have remaining forced tools, get the next one to force const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool)) if (remainingTools.length > 0) { - // Force the next tool nextPayload.tool_choice = { type: 'function', function: { name: remainingTools[0] }, } logger.info(`Forcing next tool: ${remainingTools[0]}`) } else { - // All forced tools have been used, switch to auto nextPayload.tool_choice = 'auto' logger.info('All forced tools have been used, switching to auto tool_choice') } } - // Time the next model call const nextModelStartTime = Date.now() - // Make the next request currentResponse = await azureOpenAI.chat.completions.create(nextPayload) - // Check if any forced tools were used in this response checkForForcedToolUsage(currentResponse, nextPayload.tool_choice) const nextModelEndTime = Date.now() const thisModelTime = nextModelEndTime - nextModelStartTime - // Add to time segments timeSegments.push({ type: 'model', name: `Model response (iteration ${iterationCount + 1})`, @@ -505,15 +972,12 @@ export const azureOpenAIProvider: ProviderConfig = { duration: thisModelTime, }) - // Add to model time modelTime += thisModelTime - // Update content if we have a text response if (currentResponse.choices[0]?.message?.content) { content = currentResponse.choices[0].message.content } - // Update token counts if (currentResponse.usage) { tokens.prompt += currentResponse.usage.prompt_tokens || 0 tokens.completion += currentResponse.usage.completion_tokens || 0 @@ -523,46 +987,43 @@ export const azureOpenAIProvider: ProviderConfig = { iterationCount++ } - // After all tool processing complete, if streaming was requested, use streaming for the final response if (request.stream) { logger.info('Using streaming for final response after tool processing') - // When streaming after tool calls with forced tools, make sure tool_choice is set to 'auto' - // This prevents Azure OpenAI API from trying to force tool usage again in the final streaming response const streamingPayload = { ...payload, messages: currentMessages, - tool_choice: 'auto', // Always use 'auto' for the streaming response after tool calls + tool_choice: 'auto', stream: true, stream_options: { include_usage: true }, } const streamResponse = await azureOpenAI.chat.completions.create(streamingPayload) - // Create the StreamingExecution object with all collected data let _streamContent = '' const streamingResult = { - stream: createReadableStreamFromAzureOpenAIStream(streamResponse, (content, usage) => { - // Update the execution data with the final content and token usage - _streamContent = content - streamingResult.execution.output.content = content - - // Update token usage if available from the stream - if (usage) { - const newTokens = { - prompt: usage.prompt_tokens || tokens.prompt, - completion: usage.completion_tokens || tokens.completion, - total: usage.total_tokens || tokens.total, + stream: createReadableStreamFromChatCompletionsStream( + streamResponse, + (content, usage) => { + _streamContent = content + streamingResult.execution.output.content = content + + if (usage) { + const newTokens = { + prompt: usage.prompt_tokens || tokens.prompt, + completion: usage.completion_tokens || tokens.completion, + total: usage.total_tokens || tokens.total, + } + + streamingResult.execution.output.tokens = newTokens } - - streamingResult.execution.output.tokens = newTokens } - }), + ), execution: { success: true, output: { - content: '', // Will be filled by the callback + content: '', model: request.model, tokens: { prompt: tokens.prompt, @@ -597,11 +1058,9 @@ export const azureOpenAIProvider: ProviderConfig = { }, } as StreamingExecution - // Return the streaming execution object with explicit casting return streamingResult as StreamingExecution } - // Calculate overall timing const providerEndTime = Date.now() const providerEndTimeISO = new Date(providerEndTime).toISOString() const totalDuration = providerEndTime - providerStartTime @@ -622,10 +1081,8 @@ export const azureOpenAIProvider: ProviderConfig = { iterations: iterationCount + 1, timeSegments: timeSegments, }, - // We're not calculating cost here as it will be handled in logger.ts } } catch (error) { - // Include timing information even for errors const providerEndTime = Date.now() const providerEndTimeISO = new Date(providerEndTime).toISOString() const totalDuration = providerEndTime - providerStartTime @@ -635,7 +1092,6 @@ export const azureOpenAIProvider: ProviderConfig = { duration: totalDuration, }) - // Create a new error with timing information const enhancedError = new Error(error instanceof Error ? error.message : String(error)) // @ts-ignore - Adding timing property to the error enhancedError.timing = { From 5e1ff24169de79fb97aeb8b7bee725d5b130da60 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Fri, 6 Feb 2026 12:48:31 -0800 Subject: [PATCH 2/2] fix(azure): add azure-anthropic support to router, evaluator, copilot, and tokenization --- apps/sim/app/api/copilot/chat/route.ts | 7 +++++++ apps/sim/blocks/utils.ts | 10 +++++----- .../executor/handlers/evaluator/evaluator-handler.ts | 6 ++++-- apps/sim/executor/handlers/router/router-handler.ts | 12 ++++++++---- apps/sim/lib/copilot/config.ts | 1 + apps/sim/lib/copilot/types.ts | 8 +++++++- apps/sim/lib/tokenization/constants.ts | 5 +++++ apps/sim/lib/tokenization/estimators.ts | 1 + 8 files changed, 38 insertions(+), 12 deletions(-) diff --git a/apps/sim/app/api/copilot/chat/route.ts b/apps/sim/app/api/copilot/chat/route.ts index 9d31bf5c36..b82a580748 100644 --- a/apps/sim/app/api/copilot/chat/route.ts +++ b/apps/sim/app/api/copilot/chat/route.ts @@ -285,6 +285,13 @@ export async function POST(req: NextRequest) { apiVersion: 'preview', endpoint: env.AZURE_OPENAI_ENDPOINT, } + } else if (providerEnv === 'azure-anthropic') { + providerConfig = { + provider: 'azure-anthropic', + model: envModel, + apiKey: env.AZURE_ANTHROPIC_API_KEY, + endpoint: env.AZURE_ANTHROPIC_ENDPOINT, + } } else if (providerEnv === 'vertex') { providerConfig = { provider: 'vertex', diff --git a/apps/sim/blocks/utils.ts b/apps/sim/blocks/utils.ts index 7de0b518af..fa0e28590e 100644 --- a/apps/sim/blocks/utils.ts +++ b/apps/sim/blocks/utils.ts @@ -80,7 +80,7 @@ export function getApiKeyCondition() { /** * Returns the standard provider credential subblocks used by LLM-based blocks. - * This includes: Vertex AI OAuth, API Key, Azure OpenAI, Vertex AI config, and Bedrock config. + * This includes: Vertex AI OAuth, API Key, Azure (OpenAI + Anthropic), Vertex AI config, and Bedrock config. * * Usage: Spread into your block's subBlocks array after block-specific fields */ @@ -111,14 +111,14 @@ export function getProviderCredentialSubBlocks(): SubBlockConfig[] { }, { id: 'azureEndpoint', - title: 'Azure OpenAI Endpoint', + title: 'Azure Endpoint', type: 'short-input', password: true, - placeholder: 'https://your-resource.openai.azure.com', + placeholder: 'https://your-resource.services.ai.azure.com', connectionDroppable: false, condition: { field: 'model', - value: providers['azure-openai'].models, + value: [...providers['azure-openai'].models, ...providers['azure-anthropic'].models], }, }, { @@ -202,7 +202,7 @@ export function getProviderCredentialSubBlocks(): SubBlockConfig[] { */ export const PROVIDER_CREDENTIAL_INPUTS = { apiKey: { type: 'string', description: 'Provider API key' }, - azureEndpoint: { type: 'string', description: 'Azure OpenAI endpoint URL' }, + azureEndpoint: { type: 'string', description: 'Azure endpoint URL' }, azureApiVersion: { type: 'string', description: 'Azure API version' }, vertexProject: { type: 'string', description: 'Google Cloud project ID for Vertex AI' }, vertexLocation: { type: 'string', description: 'Google Cloud location for Vertex AI' }, diff --git a/apps/sim/executor/handlers/evaluator/evaluator-handler.ts b/apps/sim/executor/handlers/evaluator/evaluator-handler.ts index 3e95b2f856..4b036fe2e7 100644 --- a/apps/sim/executor/handlers/evaluator/evaluator-handler.ts +++ b/apps/sim/executor/handlers/evaluator/evaluator-handler.ts @@ -130,9 +130,11 @@ export class EvaluatorBlockHandler implements BlockHandler { providerRequest.vertexLocation = evaluatorConfig.vertexLocation } - if (providerId === 'azure-openai') { + if (providerId === 'azure-openai' || providerId === 'azure-anthropic') { providerRequest.azureEndpoint = inputs.azureEndpoint - providerRequest.azureApiVersion = inputs.azureApiVersion + if (providerId === 'azure-openai') { + providerRequest.azureApiVersion = inputs.azureApiVersion + } } if (providerId === 'bedrock') { diff --git a/apps/sim/executor/handlers/router/router-handler.ts b/apps/sim/executor/handlers/router/router-handler.ts index 766a4aac66..636016cd48 100644 --- a/apps/sim/executor/handlers/router/router-handler.ts +++ b/apps/sim/executor/handlers/router/router-handler.ts @@ -105,9 +105,11 @@ export class RouterBlockHandler implements BlockHandler { providerRequest.vertexLocation = routerConfig.vertexLocation } - if (providerId === 'azure-openai') { + if (providerId === 'azure-openai' || providerId === 'azure-anthropic') { providerRequest.azureEndpoint = inputs.azureEndpoint - providerRequest.azureApiVersion = inputs.azureApiVersion + if (providerId === 'azure-openai') { + providerRequest.azureApiVersion = inputs.azureApiVersion + } } if (providerId === 'bedrock') { @@ -262,9 +264,11 @@ export class RouterBlockHandler implements BlockHandler { providerRequest.vertexLocation = routerConfig.vertexLocation } - if (providerId === 'azure-openai') { + if (providerId === 'azure-openai' || providerId === 'azure-anthropic') { providerRequest.azureEndpoint = inputs.azureEndpoint - providerRequest.azureApiVersion = inputs.azureApiVersion + if (providerId === 'azure-openai') { + providerRequest.azureApiVersion = inputs.azureApiVersion + } } if (providerId === 'bedrock') { diff --git a/apps/sim/lib/copilot/config.ts b/apps/sim/lib/copilot/config.ts index 4b9c89274c..5700e99300 100644 --- a/apps/sim/lib/copilot/config.ts +++ b/apps/sim/lib/copilot/config.ts @@ -12,6 +12,7 @@ const VALID_PROVIDER_IDS: readonly ProviderId[] = [ 'openai', 'azure-openai', 'anthropic', + 'azure-anthropic', 'google', 'deepseek', 'xai', diff --git a/apps/sim/lib/copilot/types.ts b/apps/sim/lib/copilot/types.ts index 6ed8133082..c7677dc359 100644 --- a/apps/sim/lib/copilot/types.ts +++ b/apps/sim/lib/copilot/types.ts @@ -147,6 +147,12 @@ export type CopilotProviderConfig = apiVersion?: string endpoint?: string } + | { + provider: 'azure-anthropic' + model: string + apiKey?: string + endpoint?: string + } | { provider: 'vertex' model: string @@ -155,7 +161,7 @@ export type CopilotProviderConfig = vertexLocation?: string } | { - provider: Exclude + provider: Exclude model?: string apiKey?: string } diff --git a/apps/sim/lib/tokenization/constants.ts b/apps/sim/lib/tokenization/constants.ts index 010ef47437..a10b1995da 100644 --- a/apps/sim/lib/tokenization/constants.ts +++ b/apps/sim/lib/tokenization/constants.ts @@ -21,6 +21,11 @@ export const TOKENIZATION_CONFIG = { confidence: 'high', supportedMethods: ['heuristic', 'fallback'], }, + 'azure-anthropic': { + avgCharsPerToken: 4.5, + confidence: 'high', + supportedMethods: ['heuristic', 'fallback'], + }, google: { avgCharsPerToken: 5, confidence: 'medium', diff --git a/apps/sim/lib/tokenization/estimators.ts b/apps/sim/lib/tokenization/estimators.ts index 53ce719658..01aed1c1e6 100644 --- a/apps/sim/lib/tokenization/estimators.ts +++ b/apps/sim/lib/tokenization/estimators.ts @@ -204,6 +204,7 @@ export function estimateTokenCount(text: string, providerId?: string): TokenEsti estimatedTokens = estimateOpenAITokens(text) break case 'anthropic': + case 'azure-anthropic': estimatedTokens = estimateAnthropicTokens(text) break case 'google':