diff --git a/apps/sim/app/api/copilot/chat/route.ts b/apps/sim/app/api/copilot/chat/route.ts
index 9d31bf5c36..b82a580748 100644
--- a/apps/sim/app/api/copilot/chat/route.ts
+++ b/apps/sim/app/api/copilot/chat/route.ts
@@ -285,6 +285,13 @@ export async function POST(req: NextRequest) {
           apiVersion: 'preview',
           endpoint: env.AZURE_OPENAI_ENDPOINT,
         }
+      } else if (providerEnv === 'azure-anthropic') {
+        providerConfig = {
+          provider: 'azure-anthropic',
+          model: envModel,
+          apiKey: env.AZURE_ANTHROPIC_API_KEY,
+          endpoint: env.AZURE_ANTHROPIC_ENDPOINT,
+        }
       } else if (providerEnv === 'vertex') {
         providerConfig = {
           provider: 'vertex',
diff --git a/apps/sim/blocks/utils.ts b/apps/sim/blocks/utils.ts
index 7de0b518af..fa0e28590e 100644
--- a/apps/sim/blocks/utils.ts
+++ b/apps/sim/blocks/utils.ts
@@ -80,7 +80,7 @@ export function getApiKeyCondition() {
 
 /**
  * Returns the standard provider credential subblocks used by LLM-based blocks.
- * This includes: Vertex AI OAuth, API Key, Azure OpenAI, Vertex AI config, and Bedrock config.
+ * This includes: Vertex AI OAuth, API Key, Azure (OpenAI + Anthropic), Vertex AI config, and Bedrock config.
  *
  * Usage: Spread into your block's subBlocks array after block-specific fields
  */
@@ -111,14 +111,14 @@ export function getProviderCredentialSubBlocks(): SubBlockConfig[] {
     },
     {
       id: 'azureEndpoint',
-      title: 'Azure OpenAI Endpoint',
+      title: 'Azure Endpoint',
       type: 'short-input',
       password: true,
-      placeholder: 'https://your-resource.openai.azure.com',
+      placeholder: 'https://your-resource.services.ai.azure.com',
       connectionDroppable: false,
       condition: {
         field: 'model',
-        value: providers['azure-openai'].models,
+        value: [...providers['azure-openai'].models, ...providers['azure-anthropic'].models],
       },
     },
     {
@@ -202,7 +202,7 @@ export function getProviderCredentialSubBlocks(): SubBlockConfig[] {
  */
 export const PROVIDER_CREDENTIAL_INPUTS = {
   apiKey: { type: 'string', description: 'Provider API key' },
-  azureEndpoint: { type: 'string', description: 'Azure OpenAI endpoint URL' },
+  azureEndpoint: { type: 'string', description: 'Azure endpoint URL' },
   azureApiVersion: { type: 'string', description: 'Azure API version' },
   vertexProject: { type: 'string', description: 'Google Cloud project ID for Vertex AI' },
   vertexLocation: { type: 'string', description: 'Google Cloud location for Vertex AI' },
diff --git a/apps/sim/executor/handlers/evaluator/evaluator-handler.ts b/apps/sim/executor/handlers/evaluator/evaluator-handler.ts
index 3e95b2f856..4b036fe2e7 100644
--- a/apps/sim/executor/handlers/evaluator/evaluator-handler.ts
+++ b/apps/sim/executor/handlers/evaluator/evaluator-handler.ts
@@ -130,9 +130,11 @@ export class EvaluatorBlockHandler implements BlockHandler {
         providerRequest.vertexLocation = evaluatorConfig.vertexLocation
       }
 
-      if (providerId === 'azure-openai') {
+      if (providerId === 'azure-openai' || providerId === 'azure-anthropic') {
         providerRequest.azureEndpoint = inputs.azureEndpoint
-        providerRequest.azureApiVersion = inputs.azureApiVersion
+        if (providerId === 'azure-openai') {
+          providerRequest.azureApiVersion = inputs.azureApiVersion
+        }
       }
 
       if (providerId === 'bedrock') {
diff --git a/apps/sim/executor/handlers/router/router-handler.ts b/apps/sim/executor/handlers/router/router-handler.ts
index 766a4aac66..636016cd48 100644
--- a/apps/sim/executor/handlers/router/router-handler.ts
+++ b/apps/sim/executor/handlers/router/router-handler.ts
@@ -105,9 +105,11 @@ export class RouterBlockHandler implements BlockHandler {
         providerRequest.vertexLocation = routerConfig.vertexLocation
       }
 
-      if (providerId === 'azure-openai') {
+      if (providerId === 'azure-openai' || providerId === 'azure-anthropic') {
         providerRequest.azureEndpoint = inputs.azureEndpoint
-        providerRequest.azureApiVersion = inputs.azureApiVersion
+        if (providerId === 'azure-openai') {
+          providerRequest.azureApiVersion = inputs.azureApiVersion
+        }
       }
 
       if (providerId === 'bedrock') {
@@ -262,9 +264,11 @@ export class RouterBlockHandler implements BlockHandler {
         providerRequest.vertexLocation = routerConfig.vertexLocation
       }
 
-      if (providerId === 'azure-openai') {
+      if (providerId === 'azure-openai' || providerId === 'azure-anthropic') {
         providerRequest.azureEndpoint = inputs.azureEndpoint
-        providerRequest.azureApiVersion = inputs.azureApiVersion
+        if (providerId === 'azure-openai') {
+          providerRequest.azureApiVersion = inputs.azureApiVersion
+        }
       }
 
       if (providerId === 'bedrock') {
diff --git a/apps/sim/lib/copilot/config.ts b/apps/sim/lib/copilot/config.ts
index 4b9c89274c..5700e99300 100644
--- a/apps/sim/lib/copilot/config.ts
+++ b/apps/sim/lib/copilot/config.ts
@@ -12,6 +12,7 @@ const VALID_PROVIDER_IDS: readonly ProviderId[] = [
   'openai',
   'azure-openai',
   'anthropic',
+  'azure-anthropic',
   'google',
   'deepseek',
   'xai',
diff --git a/apps/sim/lib/copilot/types.ts b/apps/sim/lib/copilot/types.ts
index 6ed8133082..c7677dc359 100644
--- a/apps/sim/lib/copilot/types.ts
+++ b/apps/sim/lib/copilot/types.ts
@@ -147,6 +147,12 @@ export type CopilotProviderConfig =
       apiVersion?: string
       endpoint?: string
     }
+  | {
+      provider: 'azure-anthropic'
+      model: string
+      apiKey?: string
+      endpoint?: string
+    }
   | {
       provider: 'vertex'
       model: string
@@ -155,7 +161,7 @@ export type CopilotProviderConfig =
       vertexLocation?: string
     }
   | {
-      provider: Exclude<ProviderId, 'azure-openai' | 'vertex'>
+      provider: Exclude<ProviderId, 'azure-openai' | 'azure-anthropic' | 'vertex'>
       model?: string
       apiKey?: string
     }
diff --git a/apps/sim/lib/tokenization/constants.ts b/apps/sim/lib/tokenization/constants.ts
index 010ef47437..a10b1995da 100644
--- a/apps/sim/lib/tokenization/constants.ts
+++ b/apps/sim/lib/tokenization/constants.ts
@@ -21,6 +21,11 @@ export const TOKENIZATION_CONFIG = {
       confidence: 'high',
       supportedMethods: ['heuristic', 'fallback'],
     },
+    'azure-anthropic': {
+      avgCharsPerToken: 4.5,
+      confidence: 'high',
+      supportedMethods: ['heuristic', 'fallback'],
+    },
     google: {
       avgCharsPerToken: 5,
       confidence: 'medium',
diff --git a/apps/sim/lib/tokenization/estimators.ts b/apps/sim/lib/tokenization/estimators.ts
index 53ce719658..01aed1c1e6 100644
--- a/apps/sim/lib/tokenization/estimators.ts
+++ b/apps/sim/lib/tokenization/estimators.ts
@@ -204,6 +204,7 @@ export function estimateTokenCount(text: string, providerId?: string): TokenEsti
       estimatedTokens = estimateOpenAITokens(text)
       break
     case 'anthropic':
+    case 'azure-anthropic':
       estimatedTokens = estimateAnthropicTokens(text)
       break
     case 'google':
diff --git a/apps/sim/providers/azure-openai/index.ts b/apps/sim/providers/azure-openai/index.ts
index ca63904df2..c265869207 100644
--- a/apps/sim/providers/azure-openai/index.ts
+++ b/apps/sim/providers/azure-openai/index.ts
@@ -1,20 +1,8 @@
-import { createLogger } from '@sim/logger'
 import { AzureOpenAI } from 'openai'
-import type { ChatCompletionCreateParamsStreaming } from 'openai/resources/chat/completions'
 import { env } from '@/lib/core/config/env'
+import { createLogger } from '@/lib/logs/console/logger'
 import type { StreamingExecution } from '@/executor/types'
-import { MAX_TOOL_ITERATIONS } from '@/providers'
-import {
-  checkForForcedToolUsage,
-  createReadableStreamFromAzureOpenAIStream,
-  extractApiVersionFromUrl,
-  extractBaseUrl,
-  extractDeploymentFromUrl,
-  isChatCompletionsEndpoint,
-  isResponsesEndpoint,
-} from '@/providers/azure-openai/utils'
 import { getProviderDefaultModel, getProviderModels } from '@/providers/models'
-import { executeResponsesProviderRequest } from '@/providers/openai/core'
 import type {
   ProviderConfig,
   ProviderRequest,
@@ -22,66 +10,137 @@ import type {
   TimeSegment,
 } from '@/providers/types'
 import {
-  calculateCost,
   prepareToolExecution,
   prepareToolsWithUsageControl,
+  trackForcedToolUsage,
 } from '@/providers/utils'
 import { executeTool } from '@/tools'
 
 const logger = createLogger('AzureOpenAIProvider')
 
 /**
- * Executes a request using the chat completions API.
- * Used when the endpoint URL indicates chat completions.
+ * Determines if the API version uses the Responses API (2025+) or Chat Completions API
  */
-async function executeChatCompletionsRequest(
-  request: ProviderRequest,
-  azureEndpoint: string,
-  azureApiVersion: string,
-  deploymentName: string
-): Promise<ProviderResponse | StreamingExecution> {
-  logger.info('Using Azure OpenAI Chat Completions API', {
-    model: request.model,
-    endpoint: azureEndpoint,
-    deploymentName,
-    apiVersion: azureApiVersion,
-    hasSystemPrompt: !!request.systemPrompt,
-    hasMessages: !!request.messages?.length,
-    hasTools: !!request.tools?.length,
-    toolCount: request.tools?.length || 0,
-    hasResponseFormat: !!request.responseFormat,
-    stream: !!request.stream,
-  })
+function useResponsesApi(apiVersion: string): boolean {
+  // 2025-* versions use the Responses API
+  // 2024-* and earlier versions use the Chat Completions API
+  return apiVersion.startsWith('2025-')
+}
+
+/**
+ * Helper function to convert an Azure OpenAI Responses API stream to a standard ReadableStream
+ * and collect completion metrics
+ */
+function createReadableStreamFromResponsesApiStream(
+  responsesStream: any,
+  onComplete?: (content: string, usage?: any) => void
+): ReadableStream {
+  let fullContent = ''
+  let usageData: any = null
+
+  return new ReadableStream({
+    async start(controller) {
+      try {
+        for await (const event of responsesStream) {
+          if (event.usage) {
+            usageData = event.usage
+          }
+
+          if (event.type === 'response.output_text.delta') {
+            const content = event.delta || ''
+            if (content) {
+              fullContent += content
+              controller.enqueue(new TextEncoder().encode(content))
+            }
+          } else if (event.type === 'response.content_part.delta') {
+            const content = event.delta?.text || ''
+            if (content) {
+              fullContent += content
+              controller.enqueue(new TextEncoder().encode(content))
+            }
+          } else if (event.type === 'response.completed' || event.type === 'response.done') {
+            if (event.response?.usage) {
+              usageData = event.response.usage
+            }
+          }
+        }
 
-  const azureOpenAI = new AzureOpenAI({
-    apiKey: request.apiKey,
-    apiVersion: azureApiVersion,
-    endpoint: azureEndpoint,
+        if (onComplete) {
+          onComplete(fullContent, usageData)
+        }
+
+        controller.close()
+      } catch (error) {
+        controller.error(error)
+      }
+    },
   })
+}
 
-  const allMessages: any[] = []
+/**
+ * Helper function to convert an Azure OpenAI stream to a standard ReadableStream
+ * and collect completion metrics
+ */
+function createReadableStreamFromChatCompletionsStream(
+  azureOpenAIStream: any,
+  onComplete?: (content: string, usage?: any) => void
+): ReadableStream {
+  let fullContent = ''
+  let usageData: any = null
+
+  return new ReadableStream({
+    async start(controller) {
+      try {
+        for await (const chunk of azureOpenAIStream) {
+          if (chunk.usage) {
+            usageData = chunk.usage
+          }
 
-  if (request.systemPrompt) {
-    allMessages.push({
-      role: 'system',
-      content: request.systemPrompt,
-    })
-  }
+          const content = chunk.choices[0]?.delta?.content || ''
+          if (content) {
+            fullContent += content
+            controller.enqueue(new TextEncoder().encode(content))
+          }
+        }
+
+        if (onComplete) {
+          onComplete(fullContent, usageData)
+        }
+
+        controller.close()
+      } catch (error) {
+        controller.error(error)
+      }
+    },
+  })
+}
+
+/**
+ * Executes a request using the Responses API (for 2025+ API versions)
+ */
+async function executeWithResponsesApi(
+  azureOpenAI: AzureOpenAI,
+  request: ProviderRequest,
+  deploymentName: string,
+  providerStartTime: number,
+  providerStartTimeISO: string
+): Promise<ProviderResponse | StreamingExecution> {
+  const inputMessages: any[] = []
 
   if (request.context) {
-    allMessages.push({
+    inputMessages.push({
       role: 'user',
       content: request.context,
     })
   }
 
   if (request.messages) {
-    allMessages.push(...request.messages)
+    inputMessages.push(...request.messages)
   }
 
   const tools = request.tools?.length
     ? request.tools.map((tool) => ({
-        type: 'function',
+        type: 'function' as const,
         function: {
           name: tool.id,
           description: tool.description,
@@ -92,88 +151,75 @@ async function executeChatCompletionsRequest(
 
   const payload: any = {
     model: deploymentName,
-    messages: allMessages,
+    input: inputMessages.length > 0 ? inputMessages : request.systemPrompt || '',
+  }
+
+  if (request.systemPrompt) {
+    payload.instructions = request.systemPrompt
   }
 
   if (request.temperature !== undefined) payload.temperature = request.temperature
-  if (request.maxTokens != null) payload.max_completion_tokens = request.maxTokens
+  if (request.maxTokens !== undefined) payload.max_output_tokens = request.maxTokens
 
-  if (request.reasoningEffort !== undefined) payload.reasoning_effort = request.reasoningEffort
-  if (request.verbosity !== undefined) payload.verbosity = request.verbosity
+  if (request.reasoningEffort !== undefined) {
+    payload.reasoning = { effort: request.reasoningEffort }
+  }
 
   if (request.responseFormat) {
-    payload.response_format = {
-      type: 'json_schema',
-      json_schema: {
-        name: request.responseFormat.name || 'response_schema',
-        schema: request.responseFormat.schema || request.responseFormat,
-        strict: request.responseFormat.strict !== false,
+    payload.text = {
+      format: {
+        type: 'json_schema',
+        json_schema: {
+          name: request.responseFormat.name || 'response_schema',
+          schema: request.responseFormat.schema || request.responseFormat,
+          strict: request.responseFormat.strict !== false,
+        },
       },
     }
-
-    logger.info('Added JSON schema response format to Azure OpenAI request')
+    logger.info('Added JSON schema text format to Responses API request')
   }
 
-  let preparedTools: ReturnType<typeof prepareToolsWithUsageControl> | null = null
-
   if (tools?.length) {
-    preparedTools = prepareToolsWithUsageControl(tools, request.tools, logger, 'azure-openai')
-    const { tools: filteredTools, toolChoice } = preparedTools
-
-    if (filteredTools?.length && toolChoice) {
-      payload.tools = filteredTools
-      payload.tool_choice = toolChoice
-
-      logger.info('Azure OpenAI request configuration:', {
-        toolCount: filteredTools.length,
-        toolChoice:
-          typeof toolChoice === 'string'
-            ? toolChoice
-            : toolChoice.type === 'function'
-              ? `force:${toolChoice.function.name}`
-              : toolChoice.type === 'tool'
-                ? `force:${toolChoice.name}`
-                : toolChoice.type === 'any'
-                  ? `force:${toolChoice.any?.name || 'unknown'}`
-                  : 'unknown',
-        model: deploymentName,
-      })
+    payload.tools = tools
+
+    const forcedTools = request.tools?.filter((t) => t.usageControl === 'force') || []
+    if (forcedTools.length > 0) {
+      if (forcedTools.length === 1) {
+        payload.tool_choice = {
+          type: 'function',
+          function: { name: forcedTools[0].id },
+        }
+      } else {
+        payload.tool_choice = 'required'
+      }
+    } else {
+      payload.tool_choice = 'auto'
     }
-  }
 
-  const providerStartTime = Date.now()
-  const providerStartTimeISO = new Date(providerStartTime).toISOString()
+    logger.info('Responses API request configuration:', {
+      toolCount: tools.length,
+      model: deploymentName,
+    })
+  }
 
   try {
     if (request.stream && (!tools || tools.length === 0)) {
-      logger.info('Using streaming response for Azure OpenAI request')
+      logger.info('Using streaming response for Responses API request')
 
-      const streamingParams: ChatCompletionCreateParamsStreaming = {
+      const streamResponse = await (azureOpenAI as any).responses.create({
         ...payload,
         stream: true,
-        stream_options: { include_usage: true },
+      })
+
+      const tokenUsage = {
+        prompt: 0,
+        completion: 0,
+        total: 0,
       }
-      const streamResponse = await azureOpenAI.chat.completions.create(streamingParams)
 
       const streamingResult = {
-        stream: createReadableStreamFromAzureOpenAIStream(streamResponse, (content, usage) => {
+        stream: createReadableStreamFromResponsesApiStream(streamResponse, (content, usage) => {
           streamingResult.execution.output.content = content
-          streamingResult.execution.output.tokens = {
-            input: usage.prompt_tokens,
-            output: usage.completion_tokens,
-            total: usage.total_tokens,
-          }
-
-          const costResult = calculateCost(
-            request.model,
-            usage.prompt_tokens,
-            usage.completion_tokens
-          )
-          streamingResult.execution.output.cost = {
-            input: costResult.input,
-            output: costResult.output,
-            total: costResult.total,
-          }
 
           const streamEndTime = Date.now()
           const streamEndTimeISO = new Date(streamEndTime).toISOString()
@@ -190,13 +236,23 @@ async function executeChatCompletionsRequest(
                 streamEndTime - providerStartTime
             }
           }
+
+          if (usage) {
+            streamingResult.execution.output.tokens = {
+              prompt: usage.input_tokens || usage.prompt_tokens || 0,
+              completion: usage.output_tokens || usage.completion_tokens || 0,
+              total:
+                (usage.input_tokens || usage.prompt_tokens || 0) +
+                (usage.output_tokens || usage.completion_tokens || 0),
+            }
+          }
         }),
         execution: {
           success: true,
           output: {
             content: '',
             model: request.model,
-            tokens: { input: 0, output: 0, total: 0 },
+            tokens: tokenUsage,
             toolCalls: undefined,
             providerTiming: {
               startTime: providerStartTimeISO,
@@ -212,7 +268,6 @@ async function executeChatCompletionsRequest(
                 },
               ],
             },
-            cost: { input: 0, output: 0, total: 0 },
           },
           logs: [],
           metadata: {
@@ -223,30 +278,29 @@ async function executeChatCompletionsRequest(
         },
       } as StreamingExecution
 
-      return streamingResult as StreamingExecution
+      return streamingResult
     }
 
     const initialCallTime = Date.now()
-    const originalToolChoice = payload.tool_choice
-    const forcedTools = preparedTools?.forcedTools || []
-    let usedForcedTools: string[] = []
-
-    let currentResponse = await azureOpenAI.chat.completions.create(payload)
+    let currentResponse = await (azureOpenAI as any).responses.create(payload)
     const firstResponseTime = Date.now() - initialCallTime
 
-    let content = currentResponse.choices[0]?.message?.content || ''
+    let content = currentResponse.output_text || ''
+
     const tokens = {
-      input: currentResponse.usage?.prompt_tokens || 0,
-      output: currentResponse.usage?.completion_tokens || 0,
-      total: currentResponse.usage?.total_tokens || 0,
+      prompt: currentResponse.usage?.input_tokens || 0,
+      completion: currentResponse.usage?.output_tokens || 0,
+      total:
+        (currentResponse.usage?.input_tokens || 0) + (currentResponse.usage?.output_tokens || 0),
     }
-    const toolCalls = []
-    const toolResults = []
-    const currentMessages = [...allMessages]
+
+    const toolCalls: any[] = []
+    const toolResults: any[] = []
     let iterationCount = 0
+    const MAX_ITERATIONS = 10
+
     let modelTime = firstResponseTime
     let toolsTime = 0
-    let hasUsedForcedTool = false
 
     const timeSegments: TimeSegment[] = [
       {
@@ -258,168 +312,94 @@ async function executeChatCompletionsRequest(
       },
     ]
 
-    const firstCheckResult = checkForForcedToolUsage(
-      currentResponse,
-      originalToolChoice,
-      logger,
-      forcedTools,
-      usedForcedTools
-    )
-    hasUsedForcedTool = firstCheckResult.hasUsedForcedTool
-    usedForcedTools = firstCheckResult.usedForcedTools
-
-    while (iterationCount < MAX_TOOL_ITERATIONS) {
-      if (currentResponse.choices[0]?.message?.content) {
-        content = currentResponse.choices[0].message.content
-      }
+    while (iterationCount < MAX_ITERATIONS) {
+      const toolCallsInResponse =
+        currentResponse.output?.filter((item: any) => item.type === 'function_call') || []
 
-      const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
-      if (!toolCallsInResponse || toolCallsInResponse.length === 0) {
+      if (toolCallsInResponse.length === 0) {
         break
       }
 
       logger.info(
-        `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_TOOL_ITERATIONS})`
+        `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})`
       )
 
       const toolsStartTime = Date.now()
 
-      const toolExecutionPromises = toolCallsInResponse.map(async (toolCall) => {
-        const toolCallStartTime = Date.now()
-        const toolName = toolCall.function.name
-
+      for (const toolCall of toolCallsInResponse) {
         try {
-          const toolArgs = JSON.parse(toolCall.function.arguments)
-          const tool = request.tools?.find((t) => t.id === toolName)
+          const toolName = toolCall.name
+          const toolArgs =
+            typeof toolCall.arguments === 'string'
+              ? JSON.parse(toolCall.arguments)
+              : toolCall.arguments
 
-          if (!tool) return null
+          const tool = request.tools?.find((t) => t.id === toolName)
+          if (!tool) continue
 
+          const toolCallStartTime = Date.now()
           const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request)
-          const result = await executeTool(toolName, executionParams)
-          const toolCallEndTime = Date.now()
 
-          return {
-            toolCall,
-            toolName,
-            toolParams,
-            result,
-            startTime: toolCallStartTime,
-            endTime: toolCallEndTime,
-            duration: toolCallEndTime - toolCallStartTime,
-          }
-        } catch (error) {
+          const result = await executeTool(toolName, executionParams, true)
           const toolCallEndTime = Date.now()
-          logger.error('Error processing tool call:', { error, toolName })
-
-          return {
-            toolCall,
-            toolName,
-            toolParams: {},
-            result: {
-              success: false,
-              output: undefined,
-              error: error instanceof Error ? error.message : 'Tool execution failed',
-            },
+          const toolCallDuration = toolCallEndTime - toolCallStartTime
+
+          timeSegments.push({
+            type: 'tool',
+            name: toolName,
             startTime: toolCallStartTime,
             endTime: toolCallEndTime,
-            duration: toolCallEndTime - toolCallStartTime,
+            duration: toolCallDuration,
+          })
+
+          let resultContent: any
+          if (result.success) {
+            toolResults.push(result.output)
+            resultContent = result.output
+          } else {
+            resultContent = {
+              error: true,
+              message: result.error || 'Tool execution failed',
+              tool: toolName,
+            }
           }
-        }
-      })
-
-      const executionResults = await Promise.allSettled(toolExecutionPromises)
-
-      currentMessages.push({
-        role: 'assistant',
-        content: null,
-        tool_calls: toolCallsInResponse.map((tc) => ({
-          id: tc.id,
-          type: 'function',
-          function: {
-            name: tc.function.name,
-            arguments: tc.function.arguments,
-          },
-        })),
-      })
-
-      for (const settledResult of executionResults) {
-        if (settledResult.status === 'rejected' || !settledResult.value) continue
-
-        const { toolCall, toolName, toolParams, result, startTime, endTime, duration } =
-          settledResult.value
-
-        timeSegments.push({
-          type: 'tool',
-          name: toolName,
-          startTime: startTime,
-          endTime: endTime,
-          duration: duration,
-        })
 
-        let resultContent: any
-        if (result.success) {
-          toolResults.push(result.output)
-          resultContent = result.output
-        } else {
-          resultContent = {
-            error: true,
-            message: result.error || 'Tool execution failed',
-            tool: toolName,
-          }
+          toolCalls.push({
+            name: toolName,
+            arguments: toolParams,
+            startTime: new Date(toolCallStartTime).toISOString(),
+            endTime: new Date(toolCallEndTime).toISOString(),
+            duration: toolCallDuration,
+            result: resultContent,
+            success: result.success,
+          })
+
+          // Add function call output to input for next request
+          inputMessages.push({
+            type: 'function_call_output',
+            call_id: toolCall.call_id || toolCall.id,
+            output: JSON.stringify(resultContent),
+          })
+        } catch (error) {
+          logger.error('Error processing tool call:', {
+            error,
+            toolName: toolCall?.name,
+          })
         }
-
-        toolCalls.push({
-          name: toolName,
-          arguments: toolParams,
-          startTime: new Date(startTime).toISOString(),
-          endTime: new Date(endTime).toISOString(),
-          duration: duration,
-          result: resultContent,
-          success: result.success,
-        })
-
-        currentMessages.push({
-          role: 'tool',
-          tool_call_id: toolCall.id,
-          content: JSON.stringify(resultContent),
-        })
       }
 
       const thisToolsTime = Date.now() - toolsStartTime
       toolsTime += thisToolsTime
 
+      // Make the next request
+      const nextModelStartTime = Date.now()
       const nextPayload = {
         ...payload,
-        messages: currentMessages,
-      }
-
-      if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) {
-        const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool))
-
-        if (remainingTools.length > 0) {
-          nextPayload.tool_choice = {
-            type: 'function',
-            function: { name: remainingTools[0] },
-          }
-          logger.info(`Forcing next tool: ${remainingTools[0]}`)
-        } else {
-          nextPayload.tool_choice = 'auto'
-          logger.info('All forced tools have been used, switching to auto tool_choice')
-        }
+        input: inputMessages,
+        tool_choice: 'auto',
       }
 
-      const nextModelStartTime = Date.now()
-      currentResponse = await azureOpenAI.chat.completions.create(nextPayload)
-
-      const nextCheckResult = checkForForcedToolUsage(
-        currentResponse,
-        nextPayload.tool_choice,
-        logger,
-        forcedTools,
-        usedForcedTools
-      )
-      hasUsedForcedTool = nextCheckResult.hasUsedForcedTool
-      usedForcedTools = nextCheckResult.usedForcedTools
+      currentResponse = await (azureOpenAI as any).responses.create(nextPayload)
 
       const nextModelEndTime = Date.now()
       const thisModelTime = nextModelEndTime - nextModelStartTime
@@ -434,60 +414,45 @@ async function executeChatCompletionsRequest(
 
       modelTime += thisModelTime
 
-      if (currentResponse.choices[0]?.message?.content) {
-        content = currentResponse.choices[0].message.content
+      // Update content
+      if (currentResponse.output_text) {
+        content = currentResponse.output_text
       }
 
+      // Update token counts
       if (currentResponse.usage) {
-        tokens.input += currentResponse.usage.prompt_tokens || 0
-        tokens.output += currentResponse.usage.completion_tokens || 0
-        tokens.total += currentResponse.usage.total_tokens || 0
+        tokens.prompt += currentResponse.usage.input_tokens || 0
+        tokens.completion += currentResponse.usage.output_tokens || 0
+        tokens.total = tokens.prompt + tokens.completion
       }
 
       iterationCount++
     }
 
+    // Handle streaming for final response after tool processing
     if (request.stream) {
-      logger.info('Using streaming for final response after tool processing')
+      logger.info('Using streaming for final response after tool processing (Responses API)')
 
-      const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output)
-
-      const streamingParams: ChatCompletionCreateParamsStreaming = {
+      const streamingPayload = {
         ...payload,
-        messages: currentMessages,
+        input: inputMessages,
         tool_choice: 'auto',
         stream: true,
-        stream_options: { include_usage: true },
       }
-      const streamResponse = await azureOpenAI.chat.completions.create(streamingParams)
+
+      const streamResponse = await (azureOpenAI as any).responses.create(streamingPayload)
 
       const streamingResult = {
-        stream: createReadableStreamFromAzureOpenAIStream(streamResponse, (content, usage) => {
+        stream: createReadableStreamFromResponsesApiStream(streamResponse, (content, usage) => {
           streamingResult.execution.output.content = content
-          streamingResult.execution.output.tokens = {
-            input: tokens.input + usage.prompt_tokens,
-            output: tokens.output + usage.completion_tokens,
-            total: tokens.total + usage.total_tokens,
-          }
-
-          const streamCost = calculateCost(
-            request.model,
-            usage.prompt_tokens,
-            usage.completion_tokens
-          )
-          streamingResult.execution.output.cost = {
-            input: accumulatedCost.input + streamCost.input,
-            output: accumulatedCost.output + streamCost.output,
-            total: accumulatedCost.total + streamCost.total,
-          }
 
-          const streamEndTime = Date.now()
-          const streamEndTimeISO = new Date(streamEndTime).toISOString()
-
-          if (streamingResult.execution.output.providerTiming) {
-            streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
-            streamingResult.execution.output.providerTiming.duration =
-              streamEndTime - providerStartTime
+          if (usage) {
+            streamingResult.execution.output.tokens = {
+              prompt: usage.input_tokens || tokens.prompt,
+              completion: usage.output_tokens || tokens.completion,
+              total:
+                (usage.input_tokens || tokens.prompt) + (usage.output_tokens || tokens.completion),
+            }
           }
         }),
         execution: {
@@ -496,8 +461,8 @@ async function executeChatCompletionsRequest(
             content: '',
             model: request.model,
             tokens: {
-              input: tokens.input,
-              output: tokens.output,
+              prompt: tokens.prompt,
+              completion: tokens.completion,
               total: tokens.total,
             },
             toolCalls:
@@ -517,11 +482,6 @@ async function executeChatCompletionsRequest(
               iterations: iterationCount + 1,
               timeSegments: timeSegments,
             },
-            cost: {
-              input: accumulatedCost.input,
-              output: accumulatedCost.output,
-              total: accumulatedCost.total,
-            },
           },
           logs: [],
           metadata: {
@@ -532,9 +492,10 @@ async function executeChatCompletionsRequest(
         },
       } as StreamingExecution
 
-      return streamingResult as StreamingExecution
+      return streamingResult
     }
 
+    // Calculate overall timing
     const providerEndTime = Date.now()
     const providerEndTimeISO = new Date(providerEndTime).toISOString()
     const totalDuration = providerEndTime - providerStartTime
@@ -561,7 +522,7 @@ async function executeChatCompletionsRequest(
     const providerEndTimeISO = new Date(providerEndTime).toISOString()
     const totalDuration = providerEndTime - providerStartTime
 
-    logger.error('Error in Azure OpenAI chat completions request:', {
+    logger.error('Error in Responses API request:', {
       error,
       duration: totalDuration,
     })
@@ -592,7 +553,20 @@ export const azureOpenAIProvider: ProviderConfig = {
   executeRequest: async (
     request: ProviderRequest
   ): Promise<ProviderResponse | StreamingExecution> => {
+    logger.info('Preparing Azure OpenAI request', {
+      model: request.model || 'azure/gpt-4o',
+      hasSystemPrompt: !!request.systemPrompt,
+      hasMessages: !!request.messages?.length,
+      hasTools: !!request.tools?.length,
+      toolCount: request.tools?.length || 0,
+      hasResponseFormat: !!request.responseFormat,
+      stream: !!request.stream,
+    })
+
+    // Extract Azure-specific configuration from request or environment
+    // Priority: request parameters > environment variables
     const azureEndpoint = request.azureEndpoint || env.AZURE_OPENAI_ENDPOINT
+    const azureApiVersion = request.azureApiVersion || env.AZURE_OPENAI_API_VERSION || '2024-10-21'
 
     if (!azureEndpoint) {
       throw new Error(
@@ -600,78 +574,533 @@ export const azureOpenAIProvider: ProviderConfig = {
       )
     }
 
-    if (!request.apiKey) {
-      throw new Error('API key is required for Azure OpenAI')
-    }
+    // API key is now handled server-side before this function is called
+    const azureOpenAI = new AzureOpenAI({
+      apiKey: request.apiKey,
+      apiVersion: azureApiVersion,
+      endpoint: azureEndpoint,
+    })
 
-    // Check if the endpoint is a full chat completions URL
-    if (isChatCompletionsEndpoint(azureEndpoint)) {
-      logger.info('Detected chat completions endpoint URL')
+    // Build deployment name - use deployment name instead of model name
+    const deploymentName = (request.model || 'azure/gpt-4o').replace('azure/', '')
 
-      // Extract the base URL for the SDK (it needs just the host, not the full path)
-      const baseUrl = extractBaseUrl(azureEndpoint)
+    // Start execution timer for the entire provider execution
+    const providerStartTime = Date.now()
+    const providerStartTimeISO = new Date(providerStartTime).toISOString()
 
-      // Try to extract deployment from URL, fall back to model name
-      const urlDeployment = extractDeploymentFromUrl(azureEndpoint)
-      const deploymentName = urlDeployment || request.model.replace('azure/', '')
+    // Check if we should use the Responses API (2025+ versions)
+    if (useResponsesApi(azureApiVersion)) {
+      logger.info('Using Responses API for Azure OpenAI request', {
+        apiVersion: azureApiVersion,
+        model: deploymentName,
+      })
+      return executeWithResponsesApi(
+        azureOpenAI,
+        request,
+        deploymentName,
+        providerStartTime,
+        providerStartTimeISO
+      )
+    }
 
-      // Try to extract api-version from URL, fall back to request param or env or default
-      const urlApiVersion = extractApiVersionFromUrl(azureEndpoint)
-      const azureApiVersion =
-        urlApiVersion ||
-        request.azureApiVersion ||
-        env.AZURE_OPENAI_API_VERSION ||
-        '2024-07-01-preview'
+    // Continue with Chat Completions API for 2024 and earlier versions
+    logger.info('Using Chat Completions API for Azure OpenAI request', {
+      apiVersion: azureApiVersion,
+      model: deploymentName,
+    })
 
-      logger.info('Chat completions configuration:', {
-        originalEndpoint: azureEndpoint,
-        baseUrl,
-        deploymentName,
-        apiVersion: azureApiVersion,
+    // Start with an empty array for all messages
+    const allMessages = []
+
+    // Add system prompt if present
+    if (request.systemPrompt) {
+      allMessages.push({
+        role: 'system',
+        content: request.systemPrompt,
+      })
+    }
+
+    // Add context if present
+    if (request.context) {
+      allMessages.push({
+        role: 'user',
+        content: request.context,
       })
+    }
 
-      return executeChatCompletionsRequest(request, baseUrl, azureApiVersion, deploymentName)
+    // Add remaining messages
+    if (request.messages) {
+      allMessages.push(...request.messages)
     }
 
-    // Check if the endpoint is already a full responses API URL
-    if (isResponsesEndpoint(azureEndpoint)) {
-      logger.info('Detected full responses endpoint URL, using it directly')
-
-      const deploymentName = request.model.replace('azure/', '')
-
-      // Use the URL as-is since it's already complete
-      return executeResponsesProviderRequest(request, {
-        providerId: 'azure-openai',
-        providerLabel: 'Azure OpenAI',
-        modelName: deploymentName,
-        endpoint: azureEndpoint,
-        headers: {
-          'Content-Type': 'application/json',
-          'OpenAI-Beta': 'responses=v1',
-          'api-key': request.apiKey,
+    // Transform tools to Azure OpenAI format if provided
+    const tools = request.tools?.length
+      ? request.tools.map((tool) => ({
+          type: 'function',
+          function: {
+            name: tool.id,
+            description: tool.description,
+            parameters: tool.parameters,
+          },
+        }))
+      : undefined
+
+    // Build the request payload
+    const payload: any = {
+      model: deploymentName, // Azure OpenAI uses deployment name
+      messages: allMessages,
+    }
+
+    // Add optional parameters
+    if (request.temperature !== undefined) payload.temperature = request.temperature
+    if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens
+
+    // Add GPT-5 specific parameters
+    if (request.reasoningEffort !== undefined) payload.reasoning_effort = request.reasoningEffort
+    if (request.verbosity !== undefined) payload.verbosity = request.verbosity
+
+    // Add response format for structured output if specified
+    if (request.responseFormat) {
+      // Use Azure OpenAI's JSON schema format
+      payload.response_format = {
+        type: 'json_schema',
+        json_schema: {
+          name: request.responseFormat.name || 'response_schema',
+          schema: request.responseFormat.schema || request.responseFormat,
+          strict: request.responseFormat.strict !== false,
         },
-        logger,
-      })
+      }
+
+      logger.info('Added JSON schema response format to Azure OpenAI request')
     }
 
-    // Default: base URL provided, construct the responses API URL
-    logger.info('Using base endpoint, constructing Responses API URL')
-    const azureApiVersion =
-      request.azureApiVersion || env.AZURE_OPENAI_API_VERSION || '2024-07-01-preview'
-    const deploymentName = request.model.replace('azure/', '')
-    const apiUrl = `${azureEndpoint.replace(/\/$/, '')}/openai/v1/responses?api-version=${azureApiVersion}`
-
-    return executeResponsesProviderRequest(request, {
-      providerId: 'azure-openai',
-      providerLabel: 'Azure OpenAI',
-      modelName: deploymentName,
-      endpoint: apiUrl,
-      headers: {
-        'Content-Type': 'application/json',
-        'OpenAI-Beta': 'responses=v1',
-        'api-key': request.apiKey,
-      },
-      logger,
-    })
+    // Handle tools and tool usage control
+    let preparedTools: ReturnType<typeof prepareToolsWithUsageControl> | null = null
+
+    if (tools?.length) {
+      preparedTools = prepareToolsWithUsageControl(tools, request.tools, logger, 'azure-openai')
+      const { tools: filteredTools, toolChoice } = preparedTools
+
+      if (filteredTools?.length && toolChoice) {
+        payload.tools = filteredTools
+        payload.tool_choice = toolChoice
+
+        logger.info('Azure OpenAI request configuration:', {
+          toolCount: filteredTools.length,
+          toolChoice:
+            typeof toolChoice === 'string'
+              ? toolChoice
+              : toolChoice.type === 'function'
+                ? `force:${toolChoice.function.name}`
+                : toolChoice.type === 'tool'
+                  ? `force:${toolChoice.name}`
+                  : toolChoice.type === 'any'
+                    ? `force:${toolChoice.any?.name || 'unknown'}`
+                    : 'unknown',
+          model: deploymentName,
+        })
+      }
+    }
+
+    try {
+      if (request.stream && (!tools || tools.length === 0)) {
+        logger.info('Using streaming response for Azure OpenAI request')
+
+        const streamResponse = await azureOpenAI.chat.completions.create({
+          ...payload,
+          stream: true,
+          stream_options: { include_usage: true },
+        })
+
+        const tokenUsage = {
+          prompt: 0,
+          completion: 0,
+          total: 0,
+        }
+
+        let _streamContent = ''
+
+        const streamingResult = {
+          stream: createReadableStreamFromChatCompletionsStream(
+            streamResponse,
+            (content, usage) => {
+              _streamContent = content
+              streamingResult.execution.output.content = content
+
+              const streamEndTime = Date.now()
+              const streamEndTimeISO = new Date(streamEndTime).toISOString()
+
+              if (streamingResult.execution.output.providerTiming) {
+                streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
+                streamingResult.execution.output.providerTiming.duration =
+                  streamEndTime - providerStartTime
+
+                if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
+                  streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
+                    streamEndTime
+                  streamingResult.execution.output.providerTiming.timeSegments[0].duration =
+                    streamEndTime - providerStartTime
+                }
+              }
+
+              if (usage) {
+                const newTokens = {
+                  prompt: usage.prompt_tokens || tokenUsage.prompt,
+                  completion: usage.completion_tokens || tokenUsage.completion,
+                  total: usage.total_tokens || tokenUsage.total,
+                }
+
+                streamingResult.execution.output.tokens = newTokens
+              }
+            }
+          ),
+          execution: {
+            success: true,
+            output: {
+              content: '',
+              model: request.model,
+              tokens: tokenUsage,
+              toolCalls: undefined,
+              providerTiming: {
+                startTime: providerStartTimeISO,
+                endTime: new Date().toISOString(),
+                duration: Date.now() - providerStartTime,
+                timeSegments: [
+                  {
+                    type: 'model',
+                    name: 'Streaming response',
+                    startTime: providerStartTime,
+                    endTime: Date.now(),
+                    duration: Date.now() - providerStartTime,
+                  },
+                ],
+              },
+            },
+            logs: [],
+            metadata: {
+              startTime: providerStartTimeISO,
+              endTime: new Date().toISOString(),
+              duration: Date.now() - providerStartTime,
+            },
+          },
+        } as StreamingExecution
+
+        return streamingResult as StreamingExecution
+      }
+
+      const initialCallTime = Date.now()
+
+      const originalToolChoice = payload.tool_choice
+
+      const forcedTools = preparedTools?.forcedTools || []
+      let usedForcedTools: string[] = []
+
+      const checkForForcedToolUsage = (
+        response: any,
+        toolChoice: string | { type: string; function?: { name: string }; name?: string; any?: any }
+      ) => {
+        if (typeof toolChoice === 'object' && response.choices[0]?.message?.tool_calls) {
+          const toolCallsResponse = response.choices[0].message.tool_calls
+          const result = trackForcedToolUsage(
+            toolCallsResponse,
+            toolChoice,
+            logger,
+            'azure-openai',
+            forcedTools,
+            usedForcedTools
+          )
+          hasUsedForcedTool = result.hasUsedForcedTool
+          usedForcedTools = result.usedForcedTools
+        }
+      }
+
+      let currentResponse = await azureOpenAI.chat.completions.create(payload)
+      const firstResponseTime = Date.now() - initialCallTime
+
+      let content = currentResponse.choices[0]?.message?.content || ''
+      const tokens = {
+        prompt: currentResponse.usage?.prompt_tokens || 0,
+        completion: currentResponse.usage?.completion_tokens || 0,
+        total: currentResponse.usage?.total_tokens || 0,
+      }
+      const toolCalls = []
+      const toolResults = []
+      const currentMessages = [...allMessages]
+      let iterationCount = 0
+      const MAX_ITERATIONS = 10
+
+      let modelTime = firstResponseTime
+      let toolsTime = 0
+
+      let hasUsedForcedTool = false
+
+      const timeSegments: TimeSegment[] = [
+        {
+          type: 'model',
+          name: 'Initial response',
+          startTime: initialCallTime,
+          endTime: initialCallTime + firstResponseTime,
+          duration: firstResponseTime,
+        },
+      ]
+
+      checkForForcedToolUsage(currentResponse, originalToolChoice)
+
+      while (iterationCount < MAX_ITERATIONS) {
+        const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
+        if (!toolCallsInResponse || toolCallsInResponse.length === 0) {
+          break
+        }
+
+        logger.info(
+          `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})`
+        )
+
+        const toolsStartTime = Date.now()
+
+        for (const toolCall of toolCallsInResponse) {
+          try {
+            const toolName = toolCall.function.name
+            const toolArgs = JSON.parse(toolCall.function.arguments)
+
+            const tool = request.tools?.find((t) => t.id === toolName)
+            if (!tool) continue
+
+            const toolCallStartTime = Date.now()
+
+            const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request)
+
+            const result = await executeTool(toolName, executionParams, true)
+            const toolCallEndTime = Date.now()
+            const toolCallDuration = toolCallEndTime - toolCallStartTime
+
+            timeSegments.push({
+              type: 'tool',
+              name: toolName,
+              startTime: toolCallStartTime,
+              endTime: toolCallEndTime,
+              duration: toolCallDuration,
+            })
+
+            let resultContent: any
+            if (result.success) {
+              toolResults.push(result.output)
+              resultContent = result.output
+            } else {
+              resultContent = {
+                error: true,
+                message: result.error || 'Tool execution failed',
+                tool: toolName,
+              }
+            }
+
+            toolCalls.push({
+              name: toolName,
+              arguments: toolParams,
+              startTime: new Date(toolCallStartTime).toISOString(),
+              endTime: new Date(toolCallEndTime).toISOString(),
+              duration: toolCallDuration,
+              result: resultContent,
+              success: result.success,
+            })
+
+            currentMessages.push({
+              role: 'assistant',
+              content: null,
+              tool_calls: [
+                {
+                  id: toolCall.id,
+                  type: 'function',
+                  function: {
+                    name: toolName,
+                    arguments: toolCall.function.arguments,
+                  },
+                },
+              ],
+            })
+
+            currentMessages.push({
+              role: 'tool',
+              tool_call_id: toolCall.id,
+              content: JSON.stringify(resultContent),
+            })
+          } catch (error) {
+            logger.error('Error processing tool call:', {
+              error,
+              toolName: toolCall?.function?.name,
+            })
+          }
+        }
+
+        const thisToolsTime = Date.now() - toolsStartTime
+        toolsTime += thisToolsTime
+
+        const nextPayload = {
+          ...payload,
+          messages: currentMessages,
+        }
+
+        if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) {
+          const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool))
+
+          if (remainingTools.length > 0) {
+            nextPayload.tool_choice = {
+              type: 'function',
+              function: { name: remainingTools[0] },
+            }
+            logger.info(`Forcing next tool: ${remainingTools[0]}`)
+          } else {
+            nextPayload.tool_choice = 'auto'
+            logger.info('All forced tools have been used, switching to auto tool_choice')
+          }
+        }
+
+        const nextModelStartTime = Date.now()
+
+        currentResponse = await azureOpenAI.chat.completions.create(nextPayload)
+
+        checkForForcedToolUsage(currentResponse, nextPayload.tool_choice)
+
+        const nextModelEndTime = Date.now()
+        const thisModelTime = nextModelEndTime - nextModelStartTime
+
+        timeSegments.push({
+          type: 'model',
+          name: `Model response (iteration ${iterationCount + 1})`,
+          startTime: nextModelStartTime,
+          endTime: nextModelEndTime,
+          duration: thisModelTime,
+        })
+
+        modelTime += thisModelTime
+
+        if (currentResponse.choices[0]?.message?.content) {
+          content = currentResponse.choices[0].message.content
+        }
+
+        if (currentResponse.usage) {
+          tokens.prompt += currentResponse.usage.prompt_tokens || 0
+          tokens.completion += currentResponse.usage.completion_tokens || 0
+          tokens.total += currentResponse.usage.total_tokens || 0
+        }
+
+        iterationCount++
+      }
+
+      if (request.stream) {
+        logger.info('Using streaming for final response after tool processing')
+
+        const streamingPayload = {
+          ...payload,
+          messages: currentMessages,
+          tool_choice: 'auto',
+          stream: true,
+          stream_options: { include_usage: true },
+        }
+
+        const streamResponse = await azureOpenAI.chat.completions.create(streamingPayload)
+
+        let _streamContent = ''
+
+        const streamingResult = {
+          stream: createReadableStreamFromChatCompletionsStream(
+            streamResponse,
+            (content, usage) => {
+              _streamContent = content
+              streamingResult.execution.output.content = content
+
+              if (usage) {
+                const newTokens = {
+                  prompt: usage.prompt_tokens || tokens.prompt,
+                  completion: usage.completion_tokens || tokens.completion,
+                  total: usage.total_tokens || tokens.total,
+                }
+
+                streamingResult.execution.output.tokens = newTokens
+              }
+            }
+          ),
+          execution: {
+            success: true,
+            output: {
+              content: '',
+              model: request.model,
+              tokens: {
+                prompt: tokens.prompt,
+                completion: tokens.completion,
+                total: tokens.total,
+              },
+              toolCalls:
+                toolCalls.length > 0
+                  ? {
+                      list: toolCalls,
+                      count: toolCalls.length,
+                    }
+                  : undefined,
+              providerTiming: {
+                startTime: providerStartTimeISO,
+                endTime: new Date().toISOString(),
+                duration: Date.now() - providerStartTime,
+                modelTime: modelTime,
+                toolsTime: toolsTime,
+                firstResponseTime: firstResponseTime,
+                iterations: iterationCount + 1,
+                timeSegments: timeSegments,
+              },
+              // Cost will be calculated in logger
+            },
+            logs: [], // No block logs at provider level
+            metadata: {
+              startTime: providerStartTimeISO,
+              endTime: new Date().toISOString(),
+              duration: Date.now() - providerStartTime,
+            },
+          },
+        } as StreamingExecution
+
+        return streamingResult as StreamingExecution
+      }
+
+      const providerEndTime = Date.now()
+      const providerEndTimeISO = new Date(providerEndTime).toISOString()
+      const totalDuration = providerEndTime - providerStartTime
+
+      return {
+        content,
+        model: request.model,
+        tokens,
+        toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+        toolResults: toolResults.length > 0 ? toolResults : undefined,
+        timing: {
+          startTime: providerStartTimeISO,
+          endTime: providerEndTimeISO,
+          duration: totalDuration,
+          modelTime: modelTime,
+          toolsTime: toolsTime,
+          firstResponseTime: firstResponseTime,
+          iterations: iterationCount + 1,
+          timeSegments: timeSegments,
+        },
+      }
+    } catch (error) {
+      const providerEndTime = Date.now()
+      const providerEndTimeISO = new Date(providerEndTime).toISOString()
+      const totalDuration = providerEndTime - providerStartTime
+
+      logger.error('Error in Azure OpenAI request:', {
+        error,
+        duration: totalDuration,
+      })
+
+      const enhancedError = new Error(error instanceof Error ? error.message : String(error))
+      // @ts-ignore - Adding timing property to the error
+      enhancedError.timing = {
+        startTime: providerStartTimeISO,
+        endTime: providerEndTimeISO,
+        duration: totalDuration,
+      }
+
+      throw enhancedError
+    }
   },
 }