diff --git a/ai-cli.sln b/ai-cli.sln
index da387f46..222f109e 100644
--- a/ai-cli.sln
+++ b/ai-cli.sln
@@ -39,6 +39,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Telemetry", "Telemetry", "{
 		src\telemetry\NuGet.config = src\telemetry\NuGet.config
 	EndProjectSection
 EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "inference_extension", "src\extensions\inference_extension\inference_extension.csproj", "{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -181,6 +183,18 @@ Global
 		{306A3CD6-91C2-450B-9995-79701CE63FE2}.Release|x64.Build.0 = Release|Any CPU
 		{306A3CD6-91C2-450B-9995-79701CE63FE2}.Release|x86.ActiveCfg = Release|Any CPU
 		{306A3CD6-91C2-450B-9995-79701CE63FE2}.Release|x86.Build.0 = Release|Any CPU
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}.Debug|x64.ActiveCfg = Debug|Any CPU
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}.Debug|x64.Build.0 = Debug|Any CPU
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}.Debug|x86.Build.0 = Debug|Any CPU
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}.Release|Any CPU.Build.0 = Release|Any CPU
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}.Release|x64.ActiveCfg = Release|Any CPU
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}.Release|x64.Build.0 = Release|Any CPU
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}.Release|x86.ActiveCfg = Release|Any CPU
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8}.Release|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -195,6 +209,7 @@ Global
 		{9499C018-FA08-4133-93B3-FC0F3863A6CC} = {C8AFF891-D6AA-4B8F-BC21-10404DF4B355}
 		{CED7C805-0435-4BF7-A42F-9F3BBF14A18F} = {644B75F1-C768-4DB3-BAF2-C69A1F36DD28}
 		{306A3CD6-91C2-450B-9995-79701CE63FE2} = {975EBC5A-506D-49B5-AA7F-70D3119F009D}
+		{7BF26AB6-8931-46CB-A330-D83DF55AB4E8} = {644B75F1-C768-4DB3-BAF2-C69A1F36DD28}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {002655B1-E1E1-4F2A-8D53-C9CD55136AE2}
diff --git a/src/ai/.x/config/chat.default.config b/src/ai/.x/config/chat.default.config
index 6486aa83..6ef5af5a 100644
--- a/src/ai/.x/config/chat.default.config
+++ b/src/ai/.x/config/chat.default.config
@@ -2,6 +2,7 @@
 @default.deployment
 @default.search.connection
 @default.assistant
+@default.model
 @default.log
 @default.path
 @default.output
diff --git a/src/ai/.x/config/chat.default.model b/src/ai/.x/config/chat.default.model
new file mode 100644
index 00000000..bf2002d7
--- /dev/null
+++ b/src/ai/.x/config/chat.default.model
@@ -0,0 +1 @@
+chat.model.name=@chat.model
\ No newline at end of file
diff --git a/src/ai/.x/config/connection.from.endpoint b/src/ai/.x/config/connection.from.endpoint
index 957c7df2..99ee5622 100644
--- a/src/ai/.x/config/connection.from.endpoint
+++ b/src/ai/.x/config/connection.from.endpoint
@@ -1,3 +1,4 @@
 service.config.region=@region
 service.config.endpoint.uri=@endpoint
+service.config.endpoint.type=@endpoint.type
 service.config.key=@key
\ No newline at end of file
diff --git a/src/ai/.x/config/endpoint.type b/src/ai/.x/config/endpoint.type
new file mode 100644
index 00000000..e69de29b
diff --git a/src/ai/.x/templates/aml-chat-streaming-cs/AzureAIInferencingChatCompletionsStreaming.csproj._ b/src/ai/.x/templates/aml-chat-streaming-cs/AzureAIInferencingChatCompletionsStreaming.csproj._
new file mode 100644
index 00000000..e19c066f
--- /dev/null
+++ b/src/ai/.x/templates/aml-chat-streaming-cs/AzureAIInferencingChatCompletionsStreaming.csproj._
@@ -0,0 +1,16 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net8.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+    <EnableDefaultCompileItems>true</EnableDefaultCompileItems>
+    <OutputType>Exe</OutputType>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Azure.Identity" Version="1.12.0" />
+    <PackageReference Include="Azure.AI.Inference" Version="1.0.0-beta.1" />
+  </ItemGroup>
+
+</Project>
\ No newline at end of file
diff --git a/src/ai/.x/templates/aml-chat-streaming-cs/AzureAIInferencingChatCompletionsStreamingClass.cs b/src/ai/.x/templates/aml-chat-streaming-cs/AzureAIInferencingChatCompletionsStreamingClass.cs
new file mode 100644
index 00000000..1a6fa820
--- /dev/null
+++ b/src/ai/.x/templates/aml-chat-streaming-cs/AzureAIInferencingChatCompletionsStreamingClass.cs
@@ -0,0 +1,66 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+using Azure;
+using Azure.Identity;
+using Azure.AI.Inference;
+using System;
+
+public class {ClassName}
+{
+    public {ClassName}(string aiChatEndpoint, string aiChatAPIKey, string? aiChatModel, string systemPrompt)
+    {
+        _systemPrompt = systemPrompt;
+        _aiChatModel = aiChatModel;
+
+        _client = string.IsNullOrEmpty(aiChatAPIKey)
+            ? new ChatCompletionsClient(new Uri(aiChatEndpoint), new DefaultAzureCredential())
+            : new ChatCompletionsClient(new Uri(aiChatEndpoint), new AzureKeyCredential(aiChatAPIKey));
+        _messages = new List<ChatRequestMessage>();
+
+        ClearConversation();
+    }
+
+    public void ClearConversation()
+    {
+        _messages.Clear();
+        _messages.Add(new ChatRequestSystemMessage(_systemPrompt));
+    }
+
+    public async Task<string> GetChatCompletionsStreamingAsync(string userPrompt, Action<StreamingChatCompletionsUpdate>? callback = null)
+    {
+        _messages.Add(new ChatRequestUserMessage(userPrompt));
+        var options = new ChatCompletionsOptions(_messages);
+        if (!string.IsNullOrEmpty(_aiChatModel))
+        {
+            options.Model = _aiChatModel;
+        }
+
+        var responseContent = string.Empty;
+        var response = await _client.CompleteStreamingAsync(options);
+        await foreach (var update in response)
+        {
+            var content = update.ContentUpdate;
+
+            if (update.FinishReason == CompletionsFinishReason.ContentFiltered)
+            {
+                content = $"{content}\nWARNING: Content filtered!";
+            }
+
+            if (string.IsNullOrEmpty(content)) continue;
+
+            responseContent += content;
+            if (callback != null) callback(update);
+        }
+
+        _messages.Add(new ChatRequestAssistantMessage() { Content = responseContent });
+        return responseContent;
+    }
+
+    private string _systemPrompt;
+    private string? _aiChatModel;
+    private ChatCompletionsClient _client;
+    private List<ChatRequestMessage> _messages;
+}
\ No newline at end of file
diff --git a/src/ai/.x/templates/aml-chat-streaming-cs/Program.cs b/src/ai/.x/templates/aml-chat-streaming-cs/Program.cs
new file mode 100644
index 00000000..2c9e4f9e
--- /dev/null
+++ b/src/ai/.x/templates/aml-chat-streaming-cs/Program.cs
@@ -0,0 +1,42 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+using System;
+
+public class Program
+{
+    public static async Task Main(string[] args)
+    {
+        var aiChatAPIKey = Environment.GetEnvironmentVariable("AZURE_AI_CHAT_API_KEY") ?? "<insert your OpenAI API key here>";
+        var aiChatEndpoint = Environment.GetEnvironmentVariable("AZURE_AI_CHAT_ENDPOINT") ?? "<insert your OpenAI endpoint here>";
+        var aiChatModel = Environment.GetEnvironmentVariable("AZURE_AI_CHAT_MODEL"); // null is fine
+        var systemPrompt = Environment.GetEnvironmentVariable("SYSTEM_PROMPT") ?? "You are a helpful AI assistant.";
+
+        if (string.IsNullOrEmpty(aiChatAPIKey) || aiChatAPIKey.StartsWith("<insert") ||
+            string.IsNullOrEmpty(aiChatEndpoint) || aiChatEndpoint.StartsWith("<insert") ||
+            string.IsNullOrEmpty(systemPrompt) || systemPrompt.StartsWith("<insert"))
+        {
+            Console.WriteLine("To use Azure AI Inference, set the following environment variables:");
+            Console.WriteLine("- AZURE_AI_CHAT_API_KEY\n- AZURE_AI_CHAT_ENDPOINT\n- AZURE_AI_CHAT_MODEL (optional)\n- SYSTEM_PROMPT (optional)");
+            Environment.Exit(1);
+        }
+
+		var chat = new {ClassName}(aiChatEndpoint, aiChatAPIKey, aiChatModel, systemPrompt);
+
+        while (true)
+        {
+            Console.Write("User: ");
+            var userPrompt = Console.ReadLine();
+            if (string.IsNullOrEmpty(userPrompt) || userPrompt == "exit") break;
+
+            Console.Write("\nAssistant: ");
+            var response = await chat.GetChatCompletionsStreamingAsync(userPrompt, update => {
+                var text = update.ContentUpdate;
+                Console.Write(text);
+            });
+            Console.WriteLine("\n");
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/ai/.x/templates/aml-chat-streaming-cs/_.json b/src/ai/.x/templates/aml-chat-streaming-cs/_.json
new file mode 100644
index 00000000..7cd818b5
--- /dev/null
+++ b/src/ai/.x/templates/aml-chat-streaming-cs/_.json
@@ -0,0 +1,6 @@
+{
+  "_LongName": "Azure AI Inference Chat Completions (Streaming)",
+  "_ShortName": "az-inference-chat-streaming",
+  "_Language": "C#",
+  "ClassName": "AzureAIInferenceChatCompletionsStreaming"
+}
\ No newline at end of file
diff --git a/src/ai/.x/templates/aml-chat-streaming-py/_.json b/src/ai/.x/templates/aml-chat-streaming-py/_.json
index d35d6a7c..d0b6c592 100644
--- a/src/ai/.x/templates/aml-chat-streaming-py/_.json
+++ b/src/ai/.x/templates/aml-chat-streaming-py/_.json
@@ -1,6 +1,6 @@
 {
-  "_LongName": "AzureML Chat Completions (Streaming)",
-  "_ShortName": "aml-chat-streaming",
+  "_LongName": "Azure AI Inference Chat Completions (Streaming)",
+  "_ShortName": "az-inference-chat-streaming",
   "_Language": "Python",
-  "ClassName": "AzureMLChatCompletionsStreaming"
+  "ClassName": "AzureAIInferenceChatCompletionsStreaming"
 }
\ No newline at end of file
diff --git a/src/ai/.x/templates/aml-chat-streaming-py/azureml_chat_completions_streaming.py b/src/ai/.x/templates/aml-chat-streaming-py/azureml_chat_completions_streaming.py
index 8bf30ade..daf323a5 100644
--- a/src/ai/.x/templates/aml-chat-streaming-py/azureml_chat_completions_streaming.py
+++ b/src/ai/.x/templates/aml-chat-streaming-py/azureml_chat_completions_streaming.py
@@ -3,8 +3,9 @@
 from azure.core.credentials import AzureKeyCredential
 
 class {ClassName}:
-    def __init__(self, chat_endpoint, chat_api_key, chat_system_prompt):
+    def __init__(self, chat_endpoint, chat_api_key, chat_model, chat_system_prompt):
         self.chat_system_prompt = chat_system_prompt
+        self.chat_model = chat_model
         self.client = ChatCompletionsClient(endpoint=chat_endpoint, credential=AzureKeyCredential(chat_api_key))
         self.clear_conversation()
 
@@ -19,11 +20,15 @@ def get_chat_completions(self, user_input, callback):
         complete_content = ''
         response = self.client.complete(
             messages=self.messages,
+            model=self.chat_model,
             stream=True,
         )
 
         for update in response:
 
+            if update.choices is None or len(update.choices) == 0: 
+                continue
+
             content = update.choices[0].delta.content or ""
             if content is None: continue
 
diff --git a/src/ai/.x/templates/aml-chat-streaming-py/main.py b/src/ai/.x/templates/aml-chat-streaming-py/main.py
index 4a91a4e2..8bfa41c6 100644
--- a/src/ai/.x/templates/aml-chat-streaming-py/main.py
+++ b/src/ai/.x/templates/aml-chat-streaming-py/main.py
@@ -3,21 +3,23 @@
 import sys
 
 def main():
-    chat_api_key = os.getenv("AZURE_AI_INFERENCE_CHAT_API_KEY", '<insert your Azure AI Inference API key here>')
-    chat_endpoint = os.getenv("AZURE_AI_INFERENCE_CHAT_ENDPOINT", '<insert your Azure AI Inference endpoint here>')
-    chat_system_prompt = os.getenv('AZURE_AI_INFERENCE_CHAT_SYSTEM_PROMPT', 'You are a helpful AI assistant.')
+    chat_api_key = os.getenv("AZURE_AI_CHAT_API_KEY", '<insert your Azure AI Inference API key here>')
+    chat_endpoint = os.getenv("AZURE_AI_CHAT_ENDPOINT", '<insert your Azure AI Inference endpoint here>')
+    chat_model = os.getenv('AZURE_AI_CHAT_MODEL', '')
+    chat_system_prompt = os.getenv('SYSTEM_PROMPT', 'You are a helpful AI assistant.')
 
     ok = all([chat_api_key, chat_endpoint, chat_system_prompt]) and \
          all([not s.startswith('<insert') for s in [chat_api_key, chat_endpoint, chat_system_prompt]])
     if not ok:
         print(
             'To use Azure AI Chat Streaming, set the following environment variables:' +
-            '\n- AZURE_AI_INFERENCE_CHAT_API_KEY' +
-            '\n- AZURE_AI_INFERENCE_CHAT_ENDPOINT' +
-            '\n- AZURE_AI_INFERENCE_CHAT_SYSTEM_PROMPT (optional)')
+            '\n- AZURE_AI_CHAT_API_KEY' +
+            '\n- AZURE_AI_CHAT_ENDPOINT' +
+            '\n- AZURE_AI_CHAT_MODEL (optional)' +
+            '\n- SYSTEM_PROMPT (optional)')
         sys.exit(1)
 
-    chat = {ClassName}(chat_endpoint, chat_api_key, chat_system_prompt)
+    chat = {ClassName}(chat_endpoint, chat_api_key, chat_model, chat_system_prompt)
 
     while True:
         user_input = input('User: ')
diff --git a/src/ai/ai-cli.csproj b/src/ai/ai-cli.csproj
index cccd738b..4bfa3c60 100644
--- a/src/ai/ai-cli.csproj
+++ b/src/ai/ai-cli.csproj
@@ -129,6 +129,7 @@
   <ItemGroup>
     <ProjectReference Include="..\common\common.csproj" />
     <ProjectReference Include="..\extensions\helper_functions_extension\helper_functions_extension.csproj" />
+    <ProjectReference Include="..\extensions\inference_extension\inference_extension.csproj" />
     <ProjectReference Include="..\extensions\speech_extension\speech-extension.csproj" />
     <ProjectReference Include="..\extensions\template_extension\template_extension.csproj" />
     <ProjectReference Include="..\extensions\testframework\YamlTestFramework.csproj" />
diff --git a/src/ai/commands/chat_command.cs b/src/ai/commands/chat_command.cs
index 33642b33..4b619caf 100644
--- a/src/ai/commands/chat_command.cs
+++ b/src/ai/commands/chat_command.cs
@@ -5,6 +5,7 @@
 
 using Azure.AI.Details.Common.CLI.ConsoleGui;
 using Azure.AI.Details.Common.CLI.Extensions.HelperFunctions;
+using Azure.AI.Details.Common.CLI.Extensions.Inference;
 using Azure.AI.OpenAI;
 using Azure.Core.Diagnostics;
 using Microsoft.CognitiveServices.Speech;
@@ -204,12 +205,15 @@ private async Task<Func<string, Task>> GetChatTextHandlerAsync(bool interactive)
             var parameterFile = InputChatParameterFileToken.Data().GetOrDefault(_values);
             if (!string.IsNullOrEmpty(parameterFile)) SetValuesFromParameterFile(parameterFile);
 
+            var endpointType = ConfigEndpointTypeToken.Data().GetOrDefault(_values);
+            var inferenceEndpointOk = endpointType == "inference";
+            if (inferenceEndpointOk) return GetInferenceChatTextHandler(interactive);
+
             var assistantId = _values["chat.assistant.id"];
             var assistantIdOk = !string.IsNullOrEmpty(assistantId);
+            if (assistantIdOk) return await GetAssistantsAPITextHandlerAsync(interactive, assistantId);
 
-            return assistantIdOk
-                ? await GetAssistantsAPITextHandlerAsync(interactive, assistantId)
-                : GetChatCompletionsTextHandler(interactive);
+            return GetChatCompletionsTextHandler(interactive);
         }
 
         private async Task<Func<string, Task>> GetAssistantsAPITextHandlerAsync(bool interactive, string assistantId)
@@ -219,7 +223,7 @@ private async Task<Func<string, Task>> GetAssistantsAPITextHandlerAsync(bool int
             var client = CreateAssistantClient();
             var thread = await CreateOrGetAssistantThread(client, threadId);
 
-            _ = CheckWriteChatHistoryOutputFileAsync(client, thread);
+            _ = CheckWriteChatHistoryOutputFileAsync(fileName => thread.SaveChatHistoryToFileAsync(client, fileName));
 
             threadId = thread.Id;
             _values.Reset("chat.thread.id", threadId);
@@ -239,13 +243,35 @@ private async Task<Func<string, Task>> GetAssistantsAPITextHandlerAsync(bool int
             };
         }
 
+        private Func<string, Task> GetInferenceChatTextHandler(bool interactive)
+        {
+            var aiChatEndpoint = _values["service.config.endpoint.uri"];
+            var aiChatAPIKey = _values["service.config.key"];
+
+            var systemPrompt = _values.GetOrDefault("chat.message.system.prompt", DefaultSystemPrompt);
+            var chatHistoryJsonFile = InputChatHistoryJsonFileToken.Data().GetOrDefault(_values);
+            var aiChatModel = ChatModelNameToken.Data().GetOrDefault(_values);
+            var chat = new AzureAIInferenceChatCompletionsStreaming(aiChatEndpoint, aiChatAPIKey, aiChatModel, systemPrompt, chatHistoryJsonFile);
+
+            return async (string text) =>
+            {
+                if (interactive && text.ToLower() == "reset")
+                {
+                    chat.ClearConversation();
+                    return;
+                }
+
+                await GetInferenceChatTextHandlerAsync(chat, text);
+            };
+        }
+
         private Func<string, Task> GetChatCompletionsTextHandler(bool interactive)
         {
             var client = CreateOpenAIClient(out var deployment);
             var chatClient = client.GetChatClient(deployment);
 
             var options = CreateChatCompletionOptions(out var messages);
-            CheckWriteChatHistoryOutputFile(messages);
+            CheckWriteChatHistoryOutputFile(fileName => messages.SaveChatHistoryToFile(fileName));
 
             var funcContext = CreateChatCompletionsFunctionFactoryAndCallContext(messages, options);
 
@@ -413,7 +439,7 @@ private void DisplayAssistantFunctionCall(string functionName, string functionAr
         public async Task GetAssistantsAPIResponseAsync(AssistantClient assistantClient, string assistantId, AssistantThread thread, RunCreationOptions options, HelperFunctionFactory factory, string userInput)
         {
             await assistantClient.CreateMessageAsync(thread, [ userInput ]);
-            _ = CheckWriteChatHistoryOutputFileAsync(assistantClient, thread);
+            _ = CheckWriteChatHistoryOutputFileAsync(fileName => thread.SaveChatHistoryToFileAsync(assistantClient, fileName));
 
             DisplayAssistantPromptLabel();
 
@@ -461,14 +487,30 @@ public async Task GetAssistantsAPIResponseAsync(AssistantClient assistantClient,
             }
             while (run?.Status.IsTerminal == false);
 
-            await CheckWriteChatHistoryOutputFileAsync(assistantClient, thread);
+            await CheckWriteChatHistoryOutputFileAsync(fileName => thread.SaveChatHistoryToFileAsync(assistantClient, fileName));
+        }
+
+        private async Task GetInferenceChatTextHandlerAsync(AzureAIInferenceChatCompletionsStreaming chat, string text)
+        {
+            CheckWriteChatHistoryOutputFile(fileName => chat.Messages.SaveChatHistoryToFile(fileName));
+            DisplayAssistantPromptLabel();
+
+            var response = await chat.GetChatCompletionsStreamingAsync(text, update =>
+            {
+                var content = update.ContentUpdate;
+                DisplayAssistantPromptTextStreaming(content);
+            });
+
+            DisplayAssistantPromptTextStreamingDone();
+            CheckWriteChatAnswerOutputFile(response);
+            CheckWriteChatHistoryOutputFile(fileName => chat.Messages.SaveChatHistoryToFile(fileName));
         }
 
         private async Task<string> GetChatCompletionsAsync(ChatClient client, List<ChatMessage> messages, ChatCompletionOptions options, HelperFunctionCallContext functionCallContext, string text)
         {
             var requestMessage = new UserChatMessage(text);
             messages.Add(requestMessage);
-            CheckWriteChatHistoryOutputFile(messages);
+            CheckWriteChatHistoryOutputFile(fileName => messages.SaveChatHistoryToFile(fileName));
 
             DisplayAssistantPromptLabel();
 
@@ -497,7 +539,7 @@ private async Task<string> GetChatCompletionsAsync(ChatClient client, List<ChatM
                 if (functionCallContext.TryCallFunctions(contentComplete, (name, args, result) => DisplayAssistantFunctionCall(name, args, result)))
                 {
                     functionCallContext.Clear();
-                    CheckWriteChatHistoryOutputFile(messages);
+                    CheckWriteChatHistoryOutputFile(fileName => messages.SaveChatHistoryToFile(fileName));
                     continue;
                 }
 
@@ -507,7 +549,7 @@ private async Task<string> GetChatCompletionsAsync(ChatClient client, List<ChatM
                 var currentContent = new AssistantChatMessage(contentComplete);
                 messages.Add(currentContent);
                 
-                CheckWriteChatHistoryOutputFile(messages);
+                CheckWriteChatHistoryOutputFile(fileName => messages.SaveChatHistoryToFile(fileName));
 
                 return contentComplete;
             }
@@ -530,30 +572,30 @@ private void CheckWriteChatAnswerOutputFile(string completeResponse)
             }
         }
 
-        private async Task CheckWriteChatHistoryOutputFileAsync(AssistantClient client, AssistantThread thread)
+        private async Task CheckWriteChatHistoryOutputFileAsync(Func<string, Task> saveChatHistoryToFile)
         {
             var outputHistoryFile = OutputChatHistoryFileToken.Data().GetOrDefault(_values);
             if (!string.IsNullOrEmpty(outputHistoryFile))
             {
                 var fileName = FileHelpers.GetOutputDataFileName(outputHistoryFile, _values);
-                await thread.SaveChatHistoryToFileAsync(client, fileName);
+                await saveChatHistoryToFile(fileName);
             }
         }
 
-        private void CheckWriteChatHistoryOutputFile(IList<ChatMessage> messages)
+        private void CheckWriteChatHistoryOutputFile(Action<string> saveChatHistoryToFile)
         {
             var outputHistoryFile = OutputChatHistoryFileToken.Data().GetOrDefault(_values);
             if (!string.IsNullOrEmpty(outputHistoryFile))
             {
                 var fileName = FileHelpers.GetOutputDataFileName(outputHistoryFile, _values);
-                messages.SaveChatHistoryToFile(fileName);
+                saveChatHistoryToFile(fileName);
             }
         }
 
         private void ClearMessageHistory(List<ChatMessage> messages)
         {
             messages.RemoveRange(1, messages.Count - 1);
-            CheckWriteChatHistoryOutputFile(messages);
+            CheckWriteChatHistoryOutputFile(fileName => messages.SaveChatHistoryToFile(fileName));
 
             DisplayAssistantPromptLabel();
             DisplayAssistantPromptTextStreaming("I've reset the conversation. How can I help you today?");
diff --git a/src/ai/commands/init_command.cs b/src/ai/commands/init_command.cs
index b59e7e53..68d8cd7d 100644
--- a/src/ai/commands/init_command.cs
+++ b/src/ai/commands/init_command.cs
@@ -74,6 +74,9 @@ private async Task DoCommand(string command)
                 case "init.aiservices": await DoInitRootCognitiveServicesAIServicesKind(interactive); break;
                 case "init.cognitiveservices": await DoInitRootCognitiveServicesCognitiveServicesKind(interactive); break;
 
+                case "init.inference": await DoInitRootAzureAiInference(interactive); break;
+                case "init.github": await DoInitRootGitHub(interactive); break;
+
                 case "init":
                 case "init.openai": await DoInitRootOpenAi(interactive, false, false, false, true, true, true); break;
                 case "init.openai.chat": await DoInitRootOpenAi(interactive, false, false, true, true, true, true); break;
@@ -596,6 +599,80 @@ private async Task DoInitCognitiveServicesAIServicesKind(bool interactive)
             ResourceGroupNameToken.Data().Set(_values, resource.Group);
         }
 
+        private async Task DoInitRootGitHub(bool interactive)
+        {
+            ConsoleHelpers.WriteLineWithHighlight($"`GITHUB MODELS`");
+
+            Console.WriteLine("You can use GitHub Models to find and experiment with AI models for free.");
+            Console.WriteLine("Once you are ready to bring your application to production, you can switch");
+            Console.WriteLine("to a token from a paid Azure account.\n");
+            Console.WriteLine("Create a token: https://github.com/settings/tokens");
+            Console.WriteLine("Review models:  https://github.com/marketplace/models/");
+
+            ConsoleHelpers.WriteLineWithHighlight($"\n`GITHUB CONFIGURATION`");
+            Console.Write("Token: ");
+            var color = Console.ForegroundColor;
+            Console.ForegroundColor = Console.BackgroundColor;
+            var token = Console.ReadLine();
+            Console.ForegroundColor = color;
+
+            if (string.IsNullOrEmpty(token))
+            {
+                throw new ApplicationException($"CANCELED: No token provided");
+            }
+            else if (token.Length < 4)
+            {
+                throw new ApplicationException($"CANCELED: Token is too short");
+            }
+
+            Console.Write("Model: ");
+            var model = Console.ReadLine();
+            if (string.IsNullOrEmpty(model))
+            {
+                throw new ApplicationException($"CANCELED: No model provided");
+            }
+
+            var endpoint = "https://models.inference.ai.azure.com";
+            ConfigSetHelpers.ConfigGitHub(endpoint, model, token);
+
+            await Task.CompletedTask;
+        }
+
+        private async Task DoInitRootAzureAiInference(bool interactive)
+        {
+            await DoInitAzureAiInference(interactive);
+        }
+
+        private async Task DoInitAzureAiInference(bool interactive)
+        {
+            ConsoleHelpers.WriteLineWithHighlight($"`AZURE AI INFERENCE`");
+
+            Console.Write("Endpoint: ");
+            var endpoint = Console.ReadLine();
+            if (string.IsNullOrEmpty(endpoint))
+            {
+                throw new ApplicationException($"CANCELED: No endpoint provided");
+            }
+
+            Console.Write("Key: ");
+            var color = Console.ForegroundColor;
+            Console.ForegroundColor = Console.BackgroundColor;
+            var key = Console.ReadLine();
+            Console.ForegroundColor = color;
+            if (string.IsNullOrEmpty(key))
+            {
+                throw new ApplicationException($"CANCELED: No key provided");
+            }
+            else if (key.Length < 4)
+            {
+                throw new ApplicationException($"CANCELED: Key is too short");
+            }
+
+            ConfigSetHelpers.ConfigAzureAiInference(endpoint, key);
+
+            await Task.CompletedTask;
+        }
+
         private async Task DoInitRootCognitiveServicesCognitiveServicesKind(bool interactive)
         {
             if (!interactive) ThrowInteractiveNotSupportedApplicationException(); // POST-IGNITE: TODO: Add back non-interactive mode support
diff --git a/src/ai/commands/parsers/chat_command_parser.cs b/src/ai/commands/parsers/chat_command_parser.cs
index f250c811..10781086 100644
--- a/src/ai/commands/parsers/chat_command_parser.cs
+++ b/src/ai/commands/parsers/chat_command_parser.cs
@@ -101,6 +101,7 @@ public CommonChatNamedValueTokenParsers() : base(
                     new IniFileNamedValueTokenParser(),
                     new ExpandFileNameNamedValueTokenParser(),
 
+                    ConfigEndpointTypeToken.Parser(),
                     ConfigEndpointUriToken.Parser(),
                     ConfigDeploymentToken.Parser(),
 
@@ -159,6 +160,8 @@ public CommonChatNamedValueTokenParsers() : base(
 
             new OutputFileNameNamedValueTokenParser(null, "chat.output.thread.id", "0111", "chat.assistant.thread.output.id"),
             new OutputFileNameNamedValueTokenParser(null, "chat.output.add.thread.id", "01111", "chat.assistant.thread.output.add.id"),
+
+            ChatModelNameToken.Parser(),
         };
 
         private static INamedValueTokenParser[] _chatAssistantCreateCommandParsers = {
diff --git a/src/ai/commands/parsers/eval_command_parser.cs b/src/ai/commands/parsers/eval_command_parser.cs
index 34a4682e..2ce87bc0 100644
--- a/src/ai/commands/parsers/eval_command_parser.cs
+++ b/src/ai/commands/parsers/eval_command_parser.cs
@@ -34,6 +34,7 @@ public static bool ParseCommandValues(INamedValueTokens tokens, ICommandValues v
             new IniFileNamedValueTokenParser(),
             new ExpandFileNameNamedValueTokenParser(),
 
+            ConfigEndpointTypeToken.Parser(),
             ConfigEndpointUriToken.Parser(),
             ConfigDeploymentToken.Parser(),
 
diff --git a/src/ai/commands/parsers/init_command_parser.cs b/src/ai/commands/parsers/init_command_parser.cs
index b9ebc174..277337c0 100644
--- a/src/ai/commands/parsers/init_command_parser.cs
+++ b/src/ai/commands/parsers/init_command_parser.cs
@@ -23,6 +23,8 @@ public static bool ParseCommandValues(INamedValueTokens tokens, ICommandValues v
         private static readonly (string name, bool valuesRequired)[] _commands =  {
             ("init.aiservices", false),
             ("init.cognitiveservices", false),
+            ("init.inference", false),
+            ("init.github", false),
             ("init.openai.chat", false),
             ("init.openai.embeddings", false),
             ("init.openai.evaluations", false),
@@ -52,6 +54,8 @@ private static INamedValueTokenParser[] GetCommandParsers(ICommandValues values)
             {
                 case "init.aiservices":
                 case "init.cognitiveservices":
+                case "init.inference":
+                case "init.github":
                 case "init.openai":
                 case "init.openai.chat":
                 case "init.openai.embeddings":
diff --git a/src/ai/commands/parsers/scenario_wizard_command_parser.cs b/src/ai/commands/parsers/scenario_wizard_command_parser.cs
index 1a887346..f656dd89 100644
--- a/src/ai/commands/parsers/scenario_wizard_command_parser.cs
+++ b/src/ai/commands/parsers/scenario_wizard_command_parser.cs
@@ -55,6 +55,7 @@ public CommonScenarioNamedValueTokenParsers() : base(
                 new IniFileNamedValueTokenParser(),
                 new ExpandFileNameNamedValueTokenParser(),
 
+                ConfigEndpointTypeToken.Parser(),
                 ConfigEndpointUriToken.Parser(),
                 ConfigDeploymentToken.Parser(),
 
diff --git a/src/ai/commands/parsers/search_command_parser.cs b/src/ai/commands/parsers/search_command_parser.cs
index 2df7354c..299f9ad8 100644
--- a/src/ai/commands/parsers/search_command_parser.cs
+++ b/src/ai/commands/parsers/search_command_parser.cs
@@ -79,6 +79,7 @@ public CommonSearchNamedValueTokenParsers() : base(
                     new IniFileNamedValueTokenParser(),
                     new ExpandFileNameNamedValueTokenParser(),
 
+                    ConfigEndpointTypeToken.Parser(),
                     ConfigEndpointUriToken.Parser(),
                     ConfigDeploymentToken.Parser()
 
diff --git a/src/ai/commands/parsers/service_command_parser.cs b/src/ai/commands/parsers/service_command_parser.cs
index ef83f8a4..04a167ec 100644
--- a/src/ai/commands/parsers/service_command_parser.cs
+++ b/src/ai/commands/parsers/service_command_parser.cs
@@ -120,6 +120,7 @@ public CommonServiceNamedValueTokenParsers() : base(
                     new IniFileNamedValueTokenParser(),
                     new ExpandFileNameNamedValueTokenParser(),
 
+                    ConfigEndpointTypeToken.Parser(),
                     ConfigEndpointUriToken.Parser(),
                     ConfigDeploymentToken.Parser(),
                     SubscriptionToken.Parser()
diff --git a/src/ai/commands/parsers/tool_command_parser.cs b/src/ai/commands/parsers/tool_command_parser.cs
index c0d59e84..b75df66c 100644
--- a/src/ai/commands/parsers/tool_command_parser.cs
+++ b/src/ai/commands/parsers/tool_command_parser.cs
@@ -58,6 +58,7 @@ public CommonToolNamedValueTokenParsers() : base(
 
                 new ExpandFileNameNamedValueTokenParser(),
 
+                ConfigEndpointTypeToken.Parser(),
                 ConfigEndpointUriToken.Parser(),
                 ConfigDeploymentToken.Parser()
 
diff --git a/src/ai/commands/parsers/vision_command_parser.cs b/src/ai/commands/parsers/vision_command_parser.cs
index 48455a2b..6aff23d1 100644
--- a/src/ai/commands/parsers/vision_command_parser.cs
+++ b/src/ai/commands/parsers/vision_command_parser.cs
@@ -83,6 +83,7 @@ public CommonVisionNamedValueTokenParsers() : base(
                     new CommonNamedValueTokenParsers(),
                     new IniFileNamedValueTokenParser(),
                     new ExpandFileNameNamedValueTokenParser(),
+                    new Any1ValueNamedValueTokenParser(null, "service.config.endpoint.type", "0011"),
                     new Any1ValueNamedValueTokenParser(null, "service.config.endpoint.uri", "0011"),
                     new Any1PinnedNamedValueTokenParser("--url", "vision.input.url", "001", "url", "vision.input.type"),
                     new Any1PinnedNamedValueTokenParser("--file", "vision.input.file", "001", "file", "vision.input.type"),
diff --git a/src/ai/helpers/config_environment_helpers.cs b/src/ai/helpers/config_environment_helpers.cs
index 11b9159b..9d045a0f 100644
--- a/src/ai/helpers/config_environment_helpers.cs
+++ b/src/ai/helpers/config_environment_helpers.cs
@@ -25,18 +25,30 @@ public static Dictionary<string, string> GetEnvironment(INamedValues values)
             env.Add("AZURE_AI_RESOURCE_NAME", ReadConfig(values, "resource"));
 #endif
 
-            env.Add("AZURE_OPENAI_KEY", ReadConfig(values, "chat.key"));
-            env.Add("AZURE_OPENAI_API_KEY", ReadConfig(values, "chat.key"));
-            env.Add("AZURE_OPENAI_API_VERSION", ChatCommand.GetOpenAIClientVersionNumber());
-            env.Add("AZURE_OPENAI_ENDPOINT", ReadConfig(values, "chat.endpoint"));
-
-            env.Add("AZURE_OPENAI_CHAT_DEPLOYMENT", ReadConfig(values, "chat.deployment"));
-            env.Add("AZURE_OPENAI_EVALUATION_DEPLOYMENT", ReadConfig(values, "chat.evaluation.model.deployment.name") ?? ReadConfig(values, "chat.deployment"));
-            env.Add("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", ReadConfig(values, "search.embedding.model.deployment.name"));
-
-            env.Add("AZURE_OPENAI_CHAT_MODEL", ReadConfig(values, "chat.model"));
-            env.Add("AZURE_OPENAI_EVALUATION_MODEL", ReadConfig(values, "chat.evaluation.model.name") ?? ReadConfig(values, "chat.model"));
-            env.Add("AZURE_OPENAI_EMBEDDING_MODEL", ReadConfig(values, "search.embedding.model.name"));
+            var endpointType = ReadConfig(values, "chat.endpoint.type");
+            var endpointTypeIsInference = endpointType == "inference";
+
+            if (endpointTypeIsInference)
+            {
+                env.Add("AZURE_AI_CHAT_API_KEY", ReadConfig(values, "chat.key"));
+                env.Add("AZURE_AI_CHAT_ENDPOINT", ReadConfig(values, "chat.endpoint"));
+                env.Add("AZURE_AI_CHAT_MODEL", ReadConfig(values, "chat.model"));
+            }
+            else
+            {
+                env.Add("AZURE_OPENAI_KEY", ReadConfig(values, "chat.key"));
+                env.Add("AZURE_OPENAI_API_KEY", ReadConfig(values, "chat.key"));
+                env.Add("AZURE_OPENAI_API_VERSION", ChatCommand.GetOpenAIClientVersionNumber());
+                env.Add("AZURE_OPENAI_ENDPOINT", ReadConfig(values, "chat.endpoint"));
+
+                env.Add("AZURE_OPENAI_CHAT_DEPLOYMENT", ReadConfig(values, "chat.deployment"));
+                env.Add("AZURE_OPENAI_EVALUATION_DEPLOYMENT", ReadConfig(values, "chat.evaluation.model.deployment.name") ?? ReadConfig(values, "chat.deployment"));
+                env.Add("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", ReadConfig(values, "search.embedding.model.deployment.name"));
+
+                env.Add("AZURE_OPENAI_CHAT_MODEL", ReadConfig(values, "chat.model"));
+                env.Add("AZURE_OPENAI_EVALUATION_MODEL", ReadConfig(values, "chat.evaluation.model.name") ?? ReadConfig(values, "chat.model"));
+                env.Add("AZURE_OPENAI_EMBEDDING_MODEL", ReadConfig(values, "search.embedding.model.name"));
+            }
 
             env.Add("AZURE_AI_SEARCH_ENDPOINT", ReadConfig(values, "search.endpoint"));
             env.Add("AZURE_AI_SEARCH_INDEX_NAME", ReadConfig(values, "search.index.name"));
diff --git a/src/clis/vz/commands/parsers/common_vision_token_parsers.cs b/src/clis/vz/commands/parsers/common_vision_token_parsers.cs
index 4265ae36..6eaff4b4 100644
--- a/src/clis/vz/commands/parsers/common_vision_token_parsers.cs
+++ b/src/clis/vz/commands/parsers/common_vision_token_parsers.cs
@@ -18,6 +18,7 @@ public VisionServiceOptionsTokenParser() : base(
             new Any1ValueNamedValueTokenParser(null, "service.config.endpoint.query.string", "00011"),
             new Any1ValueNamedValueTokenParser(null, "service.config.endpoint.http.header", "00011"),
             new Any1ValueNamedValueTokenParser(null, "service.config.endpoint.traffic.type", "00011"),
+            new Any1ValueNamedValueTokenParser(null, "service.config.endpoint.type", "0011"),
             new Any1ValueNamedValueTokenParser("--uri", "service.config.endpoint.uri", "0010;0001"),
 
             new Any1ValueNamedValueTokenParser("--token.value", "service.config.token.value", "0010"),
diff --git a/src/common/details/helpers/config_set_helpers.cs b/src/common/details/helpers/config_set_helpers.cs
index f6dabdeb..29b60dd9 100644
--- a/src/common/details/helpers/config_set_helpers.cs
+++ b/src/common/details/helpers/config_set_helpers.cs
@@ -40,6 +40,8 @@ public static void ConfigCognitiveServicesAIServicesKindResource(string subscrip
             actions.Add(ConfigSetLambda("@chat.key", key, "Key (chat)", key.Substring(0, 4) + "****************************", ref maxLabelWidth));
             actions.Add(ConfigSetLambda("@chat.region", region, "Region (chat)", region, ref maxLabelWidth));
             actions.Add(ConfigSetLambda("@chat.endpoint", endpoint, "Endpoint (chat)", endpoint, ref maxLabelWidth));
+            ConfigSet("@chat.endpoint.type", "ais");
+
             if (chatDeployment != null)
             {
                 actions.Add(ConfigSetLambda("@chat.deployment", chatDeployment.Value.Name, "Deployment (chat)", chatDeployment.Value.Name, ref maxLabelWidth));
@@ -101,6 +103,7 @@ public static void ConfigOpenAiResource(string subscriptionId, string region, st
 
             actions.Add(ConfigSetLambda("@chat.key", key, "Key (chat)", key.Substring(0, 4) + "****************************", ref maxLabelWidth));
             actions.Add(ConfigSetLambda("@chat.endpoint", endpoint, "Endpoint (chat)", endpoint, ref maxLabelWidth));
+            ConfigSet("@chat.endpoint.type", "aoai");
             if (chatDeployment != null)
             {
                 actions.Add(ConfigSetLambda("@chat.deployment", chatDeployment.Value.Name, "Deployment (chat)", chatDeployment.Value.Name, ref maxLabelWidth));
@@ -206,5 +209,33 @@ public static string ConfigSet(string atFile, string setValue, bool print = fals
 
             return fileName;
         }
+
+        public static void ConfigAzureAiInference(string endpoint, string key)
+        {
+            ConsoleHelpers.WriteLineWithHighlight($"\n`CONFIG AZURE AI INFERENCE`");
+            Console.WriteLine();
+
+            int maxLabelWidth = 0;
+            var actions = new List<Action<int>>(new Action<int>[] {
+                ConfigSetLambda("@chat.endpoint", endpoint, "Endpoint", endpoint, ref maxLabelWidth),
+                ConfigSetLambda("@chat.key", key, "Key", key.Substring(0, 4) + "****************************", ref maxLabelWidth),
+            });
+            ConfigSet("@chat.endpoint.type", "inference");
+            actions.ForEach(x => x?.Invoke(maxLabelWidth));
+        }
+
+        public static void ConfigGitHub(string endpoint, string model, string token)
+        {
+            ConsoleHelpers.WriteLineWithHighlight($"\n`CONFIG AZURE AI INFERENCE/GITHUB MODELS`");
+            Console.WriteLine();
+
+            int maxLabelWidth = 0;
+            var actions = new List<Action<int>>(new Action<int>[] {
+                ConfigSetLambda("@chat.endpoint", endpoint, "Endpoint", endpoint, ref maxLabelWidth),
+                ConfigSetLambda("@chat.key", token, "Token", token.Substring(0, 4) + "****************************", ref maxLabelWidth),
+                ConfigSetLambda("@chat.model", model, "Model", model, ref maxLabelWidth),
+            });
+            actions.ForEach(x => x?.Invoke(maxLabelWidth));
+        }
     }
 }
diff --git a/src/common/details/named_values/tokens/chat_model_name_token.cs b/src/common/details/named_values/tokens/chat_model_name_token.cs
new file mode 100644
index 00000000..ea9e2df2
--- /dev/null
+++ b/src/common/details/named_values/tokens/chat_model_name_token.cs
@@ -0,0 +1,18 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+namespace Azure.AI.Details.Common.CLI
+{
+    public class ChatModelNameToken
+    {
+        public static NamedValueTokenData Data() => new NamedValueTokenData(_optionName, _fullName, _optionExample, _requiredDisplayName);
+        public static INamedValueTokenParser Parser(bool requireChatPart = false) => new Any1ValueNamedValueTokenParser(_optionName, _fullName, requireChatPart ? "110" : "010");
+
+        private const string _requiredDisplayName = "model name";
+        private const string _optionName = "--model";
+        private const string _optionExample = "NAME";
+        private const string _fullName = "chat.model.name";
+    }
+}
diff --git a/src/common/details/named_values/tokens/config_endpoint_type_token.cs b/src/common/details/named_values/tokens/config_endpoint_type_token.cs
new file mode 100644
index 00000000..c723dc03
--- /dev/null
+++ b/src/common/details/named_values/tokens/config_endpoint_type_token.cs
@@ -0,0 +1,18 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+namespace Azure.AI.Details.Common.CLI
+{
+    public class ConfigEndpointTypeToken
+    {
+        public static NamedValueTokenData Data() => new NamedValueTokenData(_optionName, _fullName, _optionExample, _requiredDisplayName);
+        public static INamedValueTokenParser Parser() => new Any1ValueNamedValueTokenParser(_optionName, _fullName, "0011");
+
+        private const string _requiredDisplayName = "endpoint type";
+        private const string _optionName = "--endpoint-type";
+        private const string _optionExample = "TYPE";
+        private const string _fullName = "service.config.endpoint.type";
+    }
+}
diff --git a/src/extensions/inference_extension/AzureAIInferenceChatCompletionsStreaming.cs b/src/extensions/inference_extension/AzureAIInferenceChatCompletionsStreaming.cs
new file mode 100644
index 00000000..5aa19530
--- /dev/null
+++ b/src/extensions/inference_extension/AzureAIInferenceChatCompletionsStreaming.cs
@@ -0,0 +1,76 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+using Azure;
+using Azure.Identity;
+using Azure.AI.Inference;
+
+namespace Azure.AI.Details.Common.CLI.Extensions.Inference;
+
+public class AzureAIInferenceChatCompletionsStreaming
+{
+    public AzureAIInferenceChatCompletionsStreaming(string aiChatEndpoint, string aiChatAPIKey, string? aiChatModel, string systemPrompt, string? chatHistoryJsonFile = null)
+    {
+        _systemPrompt = systemPrompt;
+        _aiChatModel = aiChatModel;
+
+        _client = string.IsNullOrEmpty(aiChatAPIKey)
+            ? new ChatCompletionsClient(new Uri(aiChatEndpoint), new DefaultAzureCredential())
+            : new ChatCompletionsClient(new Uri(aiChatEndpoint), new AzureKeyCredential(aiChatAPIKey));
+
+        _messages = new List<ChatRequestMessage>();
+        if (!string.IsNullOrEmpty(chatHistoryJsonFile))
+        {
+            _messages.ReadChatHistoryFromFile(chatHistoryJsonFile);
+        }
+        else
+        {
+            ClearConversation();
+        }
+    }
+
+    public void ClearConversation()
+    {
+        _messages.Clear();
+        _messages.Add(new ChatRequestSystemMessage(_systemPrompt));
+    }
+
+    public List<ChatRequestMessage> Messages { get => _messages; }
+
+    public async Task<string> GetChatCompletionsStreamingAsync(string userPrompt, Action<StreamingChatCompletionsUpdate>? callback = null)
+    {
+        _messages.Add(new ChatRequestUserMessage(userPrompt));
+        var options = new ChatCompletionsOptions(_messages);
+        if (!string.IsNullOrEmpty(_aiChatModel))
+        {
+            options.Model = _aiChatModel;
+        }
+
+        var responseContent = string.Empty;
+        var response = await _client.CompleteStreamingAsync(options);
+        await foreach (var update in response)
+        {
+            var content = update.ContentUpdate;
+
+            if (update.FinishReason == CompletionsFinishReason.ContentFiltered)
+            {
+                content = $"{content}\nWARNING: Content filtered!";
+            }
+
+            if (string.IsNullOrEmpty(content)) continue;
+
+            responseContent += content;
+            if (callback != null) callback(update);
+        }
+
+        _messages.Add(new ChatRequestAssistantMessage() { Content = responseContent });
+        return responseContent;
+    }
+
+    private string _systemPrompt;
+    private string? _aiChatModel;
+    private ChatCompletionsClient _client;
+    private List<ChatRequestMessage> _messages;
+}
\ No newline at end of file
diff --git a/src/extensions/inference_extension/AzureInferenceHelpers.cs b/src/extensions/inference_extension/AzureInferenceHelpers.cs
new file mode 100644
index 00000000..ad4a5a88
--- /dev/null
+++ b/src/extensions/inference_extension/AzureInferenceHelpers.cs
@@ -0,0 +1,75 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+using Azure.AI.Inference;
+using System.Text;
+using System.ClientModel.Primitives;
+using System.Text.Json;
+
+namespace Azure.AI.Details.Common.CLI.Extensions.Inference;
+
+public static class AzureInferenceHelpers
+{
+        public static void ReadChatHistoryFromFile(this List<ChatRequestMessage> messages, string fileName)
+        {
+            var historyFile = FileHelpers.ReadAllText(fileName, Encoding.UTF8);
+
+            var historyFileLines = historyFile.Split(Environment.NewLine);
+            var clearIfSystem = () =>
+            {
+                messages.Clear();
+                return typeof(ChatRequestSystemMessage);
+            };
+
+            foreach (var line in historyFileLines)
+            {
+                var jsonObject = JsonDocument.Parse(line);
+                JsonElement roleObj;
+
+                if (!jsonObject.RootElement.TryGetProperty("role", out roleObj))
+                {
+                    continue;
+                }
+
+                var role = roleObj.GetString();
+
+                var type = role?.ToLowerInvariant() switch
+                {
+                    "user" => typeof(ChatRequestUserMessage),
+                    "assistant" => typeof(ChatRequestAssistantMessage),
+                    "system" => clearIfSystem(),
+                    "tool" => typeof(ChatRequestToolMessage),
+                    _ => throw new Exception($"Unknown chat role {role}")
+                };
+
+                var message = ModelReaderWriter.Read(BinaryData.FromString(line), type, ModelReaderWriterOptions.Json) as ChatRequestMessage;
+                messages.Add(message!);
+            }
+        }
+
+    public static void SaveChatHistoryToFile(this IList<ChatRequestMessage> messages, string fileName)
+    {
+        var history = new StringBuilder();
+
+        foreach (var message in messages)
+        {
+            var messageText = message switch
+            {
+                ChatRequestUserMessage userMessage => ModelReaderWriter.Write(userMessage, ModelReaderWriterOptions.Json).ToString(),
+                ChatRequestAssistantMessage assistantMessage => ModelReaderWriter.Write(assistantMessage, ModelReaderWriterOptions.Json).ToString(),
+                ChatRequestSystemMessage systemMessage => ModelReaderWriter.Write(systemMessage, ModelReaderWriterOptions.Json).ToString(),
+                ChatRequestToolMessage toolMessage => ModelReaderWriter.Write(toolMessage, ModelReaderWriterOptions.Json).ToString(),
+                _ => null
+            };
+
+            if (!string.IsNullOrEmpty(messageText))
+            {
+                history.AppendLine(messageText);
+            }
+        }
+
+        FileHelpers.WriteAllText(fileName, history.ToString(), Encoding.UTF8);
+    }
+}
diff --git a/src/extensions/inference_extension/BuildCommon.targets b/src/extensions/inference_extension/BuildCommon.targets
new file mode 100644
index 00000000..9813b863
--- /dev/null
+++ b/src/extensions/inference_extension/BuildCommon.targets
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+
+  <PropertyGroup>
+    <CLIAssemblyVersion>1.0.0</CLIAssemblyVersion>
+    <ErrorOnDuplicatePublishOutputFiles>false</ErrorOnDuplicatePublishOutputFiles>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <CurrentDate>$([System.DateTime]::Now.ToString('yyyyMMdd'))</CurrentDate>
+    <UserName>$([System.Environment]::UserName)</UserName>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <CLIAssemblyInformationalVersion Condition="'$(CLIAssemblyInformationalVersion)' == ''">$(CLIAssemblyVersion)-DEV-$(UserName)-$(CurrentDate)</CLIAssemblyInformationalVersion>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <FileVersion>$(CLIAssemblyVersion)</FileVersion>
+    <AssemblyVersion>$(CLIAssemblyVersion)</AssemblyVersion>
+    <InformationalVersion>$(CLIAssemblyInformationalVersion)</InformationalVersion>
+  </PropertyGroup>
+
+</Project>
diff --git a/src/extensions/inference_extension/inference_extension.csproj b/src/extensions/inference_extension/inference_extension.csproj
new file mode 100644
index 00000000..8af3c252
--- /dev/null
+++ b/src/extensions/inference_extension/inference_extension.csproj
@@ -0,0 +1,23 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <AssemblyName>Azure.AI.CLI.Extensions.Inference</AssemblyName>
+    <TargetFramework>net8.0</TargetFramework>
+    <RootNamespace>Azure.AI.Details.Common.CLI.Extensions.Inference</RootNamespace>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+    <SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\common\common.csproj" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Azure.Identity" Version="1.12.0" />
+    <PackageReference Include="Azure.AI.Inference" Version="1.0.0-beta.1" />
+  </ItemGroup>
+
+  <Import Project="BuildCommon.targets" />
+
+</Project>
diff --git a/src/extensions/speech_extension/commands/parsers/common_speech_token_parsers.cs b/src/extensions/speech_extension/commands/parsers/common_speech_token_parsers.cs
index ee980284..67e80ba2 100644
--- a/src/extensions/speech_extension/commands/parsers/common_speech_token_parsers.cs
+++ b/src/extensions/speech_extension/commands/parsers/common_speech_token_parsers.cs
@@ -18,6 +18,7 @@ public SpeechConfigServiceConnectionTokenParser() : base(
             new Any1ValueNamedValueTokenParser(null, "service.config.endpoint.query.string", "00011"),
             new Any1ValueNamedValueTokenParser(null, "service.config.endpoint.http.header", "00011"),
             new Any1ValueNamedValueTokenParser(null, "service.config.endpoint.traffic.type", "00011"),
+            new Any1ValueNamedValueTokenParser(null, "service.config.endpoint.type", "0011"),
             new Any1ValueNamedValueTokenParser("--uri", "service.config.endpoint.uri", "0010;0001"),
 
             new Any1ValueNamedValueTokenParser("--token.value", "service.config.token.value", "0010"),
diff --git a/tests/test.yaml b/tests/test.yaml
index c69fb172..84d01203 100644
--- a/tests/test.yaml
+++ b/tests/test.yaml
@@ -86,7 +86,7 @@
       ^Name +Short +Name +Language +\r?$\n
       ^-+ +-+ +-+\r?$\n
       ^Environment +Variables +\.env *\r?$\n
-      ^AzureML Chat Completions \(Streaming\) +aml-chat-streaming +Python *\r?$\n
+      ^Azure AI Inference Chat Completions \(Streaming\) +az-inference-chat-streaming +C#, +Python *\r?$\n
       ^Helper +Function +Class +Library +helper-functions +C# *\r?$\n
       ^OpenAI +Assistants +openai-asst +C#, +JavaScript, +Python *\r?$\n
       ^OpenAI +Assistants +\(Streaming\) +openai-asst-streaming +C#, +JavaScript, +Python *\r?$\n
diff --git a/tests/test3.yaml b/tests/test3.yaml
index 8a1aa9ad..68750d95 100644
--- a/tests/test3.yaml
+++ b/tests/test3.yaml
@@ -1566,16 +1566,32 @@
         exit
       tag: skip
 
-- area: ai dev new aml-chat-streaming (key)
+- area: ai dev new az-inference-chat-streaming (key)
   tests:
 
-  - class: dev new aml-chat-streaming (python)
+  - class: dev new az-inference-chat-streaming (c#)
     steps:
     - name: generate template
-      command: ai dev new aml-chat-streaming --python
+      command: ai dev new az-inference-chat-streaming --cs
+    - name: build template
+      bash: |
+        cd az-inference-chat-streaming-cs
+        dotnet build
+    - name: run template
+      command: ai dev shell --bash "cd az-inference-chat-streaming-cs;./bin/Debug/net8.0/AzureAIInferencingChatCompletionsStreaming"
+      input: |-
+        Tell me a joke
+        Tell me another joke
+        exit
+      tag: skip
+
+  - class: dev new az-inference-chat-streaming (python)
+    steps:
+    - name: generate template
+      command: ai dev new az-inference-chat-streaming --python
     - name: install requirements
       bash: |
-        cd aml-chat-streaming-py
+        cd az-inference-chat-streaming-py
         if [ -f /etc/os-release ]; then
           python3 -m venv env
           source env/bin/activate
@@ -1588,7 +1604,7 @@
       command: ai dev shell
       arguments:
         bash: |
-          cd aml-chat-streaming-py
+          cd az-inference-chat-streaming-py
           if [ -f /etc/os-release ]; then
             source env/bin/activate
             python main.py