From ec9b980e3024369ccd8bb34761fde8d2702630dd Mon Sep 17 00:00:00 2001
From: Eduard van Valkenburg <eavanvalkenburg@users.noreply.github.com>
Date: Wed, 29 Jan 2025 01:48:26 +0100
Subject: [PATCH] Python: improve agent samples and chat history handling
 (#10301)

### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->
When passing the same object to `agent.reduce_history` as is present in
the `history_reducer` attribute of the agent that the function doesn't
accurately behave. This fixes that.

Also updates the sample to be a bit more concise.

Also fixes the way the single_dispatch is setup in ChatHistory.

Also ensures system/developer messages are not reduced away as that
might impact performance.

The `reduce_history` method was removed from the agent base class, in
favor of having the caller manage the change history and reduction as
needed. The `reduce_history` was added to the agent group chat, as the
chat history is managed internally as agents are invoked.

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:

---------

Co-authored-by: Evan Mattson <evan.mattson@microsoft.com>
Co-authored-by: Evan Mattson <35585003+moonbox3@users.noreply.github.com>
---
 .../agents/chat_completion_history_reducer.py | 303 ++++++++----------
 ...le_chatbot_with_summary_history_reducer.py |  15 +-
 ...mmary_history_reducer_keep_func_content.py |  19 +-
 .../json_structured_outputs.py                |   5 +-
 .../step10_assistant_tool_file_search.py      |  56 ++--
 .../step1_agent.py                            |  73 ++---
 .../step2_plugins.py                          | 114 ++++---
 .../getting_started_with_agents/step3_chat.py |  78 ++---
 .../step4_kernel_function_strategies.py       |  47 ++-
 .../step5_json_result.py                      |  97 +++---
 .../step6_logging.py                          |  59 ++--
 .../step7_assistant.py                        |  43 ++-
 .../step8_assistant_vision.py                 | 126 ++++----
 .../step9_assistant_tool_code_interpreter.py  |  45 ++-
 python/semantic_kernel/agents/agent.py        |  42 +--
 .../agents/channels/chat_history_channel.py   |   6 -
 .../chat_completion/chat_completion_agent.py  |  20 +-
 .../agents/group_chat/agent_chat.py           |   6 +-
 .../agents/group_chat/agent_group_chat.py     |  25 +-
 .../agents/strategies/__init__.py             |   2 +
 python/semantic_kernel/contents/__init__.py   |   4 +
 .../semantic_kernel/contents/chat_history.py  | 162 ++++++----
 .../history_reducer/chat_history_reducer.py   |  34 +-
 .../chat_history_reducer_utils.py             |  12 +-
 .../chat_history_summarization_reducer.py     | 109 +++----
 .../chat_history_truncation_reducer.py        |  29 +-
 python/tests/unit/agents/test_agent.py        |  56 +---
 ...test_chat_history_summarization_reducer.py |  34 +-
 28 files changed, 763 insertions(+), 858 deletions(-)

diff --git a/python/samples/concepts/agents/chat_completion_history_reducer.py b/python/samples/concepts/agents/chat_completion_history_reducer.py
index 1cdffefe7b78..8dcdbe14aa55 100644
--- a/python/samples/concepts/agents/chat_completion_history_reducer.py
+++ b/python/samples/concepts/agents/chat_completion_history_reducer.py
@@ -2,20 +2,20 @@
 
 import asyncio
 import logging
-from typing import TYPE_CHECKING
 
+from semantic_kernel import Kernel
 from semantic_kernel.agents import (
     AgentGroupChat,
     ChatCompletionAgent,
 )
 from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion, OpenAIChatCompletion
-from semantic_kernel.contents import AuthorRole, ChatHistory, ChatMessageContent
-from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import ChatHistorySummarizationReducer
-from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
-from semantic_kernel.kernel import Kernel
-
-if TYPE_CHECKING:
-    from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
+from semantic_kernel.contents import (
+    AuthorRole,
+    ChatHistorySummarizationReducer,
+    ChatHistoryTruncationReducer,
+    ChatMessageContent,
+)
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 
 #####################################################################
 # The following sample demonstrates how to implement a chat history #
@@ -31,7 +31,7 @@
 
 # Flag to determine whether to use Azure OpenAI services or OpenAI
 # Set this to True if using Azure OpenAI (requires appropriate configuration)
-use_azure_openai = True
+use_azure_openai = False
 
 
 # Helper function to create and configure a Kernel with the desired chat completion service
@@ -49,179 +49,120 @@ def _create_kernel_with_chat_completion(service_id: str) -> Kernel:
 
 class HistoryReducerExample:
     """
-    Demonstrates how to create a ChatCompletionAgent with a ChatHistoryReducer
+    Demonstrates how to create a ChatCompletionAgent with both types of ChatHistoryReducer
     (either truncation or summarization) and how to invoke that agent
     multiple times while applying the history reduction.
+
+    This can be done both directly on the agent itself, or through a group chat.
     """
 
     # Agent-specific settings
-    TRANSLATOR_NAME = "NumeroTranslator"  # Name of the agent
-    TRANSLATOR_INSTRUCTIONS = "Add one to the latest user number and spell it in Spanish without explanation."
+    AGENT_NAME = "NumeroTranslator"
+    AGENT_INSTRUCTIONS = "Add one to the latest user number and spell it in Spanish without explanation."
 
-    def create_truncating_agent(
-        self, reducer_msg_count: int, reducer_threshold: int
-    ) -> tuple[ChatCompletionAgent, "ChatHistoryReducer"]:
+    def create_chat_completion_agent(self, service_id: str) -> ChatCompletionAgent:
         """
-        Creates a ChatCompletionAgent with a truncation-based history reducer.
+        Creates a ChatCompletionAgent.
 
-        Parameters:
-        - reducer_msg_count: Target number of messages to retain after truncation.
-        - reducer_threshold: Threshold number of messages to trigger truncation.
+        Args:
+            service_id: The service ID for the chat completion service.
 
         Returns:
-        - A configured ChatCompletionAgent instance with truncation enabled.
+            A configured ChatCompletionAgent instance.
         """
-        truncation_reducer = ChatHistoryTruncationReducer(
-            target_count=reducer_msg_count, threshold_count=reducer_threshold
-        )
-
         return ChatCompletionAgent(
-            name=self.TRANSLATOR_NAME,
-            instructions=self.TRANSLATOR_INSTRUCTIONS,
-            kernel=_create_kernel_with_chat_completion("truncate_agent"),
-            history_reducer=truncation_reducer,
-        ), truncation_reducer
-
-    def create_summarizing_agent(
-        self, reducer_msg_count: int, reducer_threshold: int
-    ) -> tuple[ChatCompletionAgent, "ChatHistoryReducer"]:
-        """
-        Creates a ChatCompletionAgent with a summarization-based history reducer.
-
-        Parameters:
-        - reducer_msg_count: Target number of messages to retain after summarization.
-        - reducer_threshold: Threshold number of messages to trigger summarization.
-
-        Returns:
-        - A configured ChatCompletionAgent instance with summarization enabled.
-        """
-        kernel = _create_kernel_with_chat_completion("summarize_agent")
-
-        summarization_reducer = ChatHistorySummarizationReducer(
-            service=kernel.get_service(service_id="summarize_agent"),
-            target_count=reducer_msg_count,
-            threshold_count=reducer_threshold,
+            name=self.AGENT_NAME,
+            instructions=self.AGENT_INSTRUCTIONS,
+            kernel=_create_kernel_with_chat_completion(service_id=service_id),
         )
 
-        return ChatCompletionAgent(
-            name=self.TRANSLATOR_NAME,
-            instructions=self.TRANSLATOR_INSTRUCTIONS,
-            kernel=kernel,
-            history_reducer=summarization_reducer,
-        ), summarization_reducer
-
-    async def invoke_agent(self, agent: ChatCompletionAgent, chat_history: ChatHistory, message_count: int):
-        """
-        Demonstrates agent invocation with direct history management and reduction.
+    async def invoke_agent(
+        self, agent: ChatCompletionAgent, chat_history_reducer: ChatHistoryReducer, message_count: int
+    ):
+        """Demonstrates agent invocation with direct history management and reduction.
 
-        Parameters:
-        - agent: The ChatCompletionAgent to invoke.
-        - message_count: The number of messages to simulate in the conversation.
+        Args:
+            agent: The ChatCompletionAgent to invoke.
+            chat_history_reducer: The chat history to use for the conversation.
+            message_count: The number of messages to simulate in the conversation.
         """
 
-        index = 1
-        while index <= message_count:
+        # The index is incremented by 2 because the agent is told to:
+        # "Add one to the latest user number and spell it in Spanish without explanation."
+        # The user sends 1, 3, 5, etc., and the agent responds with 2, 4, 6, etc. (in Spanish)
+        for index in range(1, message_count + 1, 2):
             # Provide user input
-            user_message = ChatMessageContent(role=AuthorRole.USER, content=str(index))
-            chat_history.messages.append(user_message)
+            chat_history_reducer.add_user_message(str(index))
             print(f"# User: '{index}'")
 
-            # Attempt history reduction if a reducer is present
-            is_reduced = False
-            if agent.history_reducer is not None:
-                reduced = await agent.history_reducer.reduce()
-                if reduced is not None:
-                    chat_history.messages.clear()
-                    chat_history.messages.extend(reduced)
-                    is_reduced = True
-                    print("@ (History was reduced!)")
+            # Try history reduction
+            if is_reduced := await chat_history_reducer.reduce():
+                print(f"@ History reduced to {len(chat_history_reducer.messages)} messages.")
 
             # Invoke the agent and display its response
-            async for response in agent.invoke(chat_history):
-                chat_history.messages.append(response)
-                print(f"# {response.role} - {response.name}: '{response.content}'")
+            async for response in agent.invoke(chat_history_reducer):
+                chat_history_reducer.add_message(response)
+                print(f"# Agent - {response.name}: '{response.content}'")
 
-            # The index is incremented by 2 because the agent is told to:
-            # "Add one to the latest user number and spell it in Spanish without explanation."
-            # The user sends 1, 3, 5, etc., and the agent responds with 2, 4, 6, etc. (in Spanish)
-            index += 2
-            print(f"@ Message Count: {len(chat_history.messages)}\n")
+            print(f"@ Message Count: {len(chat_history_reducer.messages)}\n")
 
-            # If history was reduced, and the chat history is of type `ChatHistorySummarizationReducer`,
+            # If history was reduced, and the agent uses `ChatHistorySummarizationReducer`,
             # print summaries as it will contain the __summary__ metadata key.
-            if is_reduced and isinstance(chat_history, ChatHistorySummarizationReducer):
-                self._print_summaries_from_front(chat_history.messages)
+            if is_reduced and isinstance(chat_history_reducer, ChatHistorySummarizationReducer):
+                self._print_summaries(chat_history_reducer.messages)
 
-    async def invoke_chat(self, agent: ChatCompletionAgent, message_count: int):
+    async def invoke_chat(
+        self, agent: ChatCompletionAgent, chat_history_reducer: ChatHistoryReducer, message_count: int
+    ):
         """
         Demonstrates agent invocation within a group chat.
 
-        Parameters:
-        - agent: The ChatCompletionAgent to invoke.
-        - message_count: The number of messages to simulate in the conversation.
+        Args:
+            agent: The ChatCompletionAgent to invoke.
+            chat_history_reducer: The chat history to use for the conversation.
+            message_count: The number of messages to simulate in the conversation.
         """
-        chat = AgentGroupChat()  # Initialize a new group chat
-        last_history_count = 0
+        chat = AgentGroupChat(chat_history=chat_history_reducer)  # Initialize a new group chat with the history reducer
 
-        index = 1
-        while index <= message_count:
+        # The index is incremented by 2 because the agent is told to:
+        # "Add one to the latest user number and spell it in Spanish without explanation."
+        # The user sends 1, 3, 5, etc., and the agent responds with 2, 4, 6, etc. (in Spanish)
+        for index in range(1, message_count, 2):
             # Add user message to the chat
-            user_msg = ChatMessageContent(role=AuthorRole.USER, content=str(index))
-            await chat.add_chat_message(user_msg)
+            await chat.add_chat_message(ChatMessageContent(role=AuthorRole.USER, content=str(index)))
             print(f"# User: '{index}'")
 
+            # Try history reduction
+            if is_reduced := await chat.reduce_history():
+                print(f"@ History reduced to {len(chat_history_reducer.messages)} messages.")
+
             # Invoke the agent and display its response
             async for message in chat.invoke(agent):
                 print(f"# {message.role} - {message.name or '*'}: '{message.content}'")
 
-            # The index is incremented by 2 because the agent is told to:
-            # "Add one to the latest user number and spell it in Spanish without explanation."
-            # The user sends 1, 3, 5, etc., and the agent responds with 2, 4, 6, etc. (in Spanish)
-            index += 2
-
             # Retrieve chat messages in descending order (newest first)
             msgs = []
             async for m in chat.get_chat_messages(agent):
                 msgs.append(m)
-
             print(f"@ Message Count: {len(msgs)}\n")
 
             # Check for reduction in message count and print summaries
-            if len(msgs) < last_history_count:
-                self._print_summaries_from_back(msgs)
-
-            last_history_count = len(msgs)
+            if is_reduced and isinstance(chat_history_reducer, ChatHistorySummarizationReducer):
+                self._print_summaries(msgs)
 
-    def _print_summaries_from_front(self, messages: list[ChatMessageContent]):
+    def _print_summaries(self, messages: list[ChatMessageContent]):
         """
         Prints summaries from the front of the message list.
 
-        Parameters:
-        - messages: List of chat messages to process.
-        """
-        summary_index = 0
-        while summary_index < len(messages):
-            msg = messages[summary_index]
-            if msg.metadata and msg.metadata.get("__summary__"):
-                print(f"\tSummary: {msg.content}")
-                summary_index += 1
-            else:
-                break
-
-    def _print_summaries_from_back(self, messages: list[ChatMessageContent]):
-        """
-        Prints summaries from the back of the message list.
+        This assumes that the ChatHistorySummarizationReducer uses the default value for:
+        `use_single_summary` which is True, and there is therefor only one summary message.
 
-        Parameters:
-        - messages: List of chat messages to process.
+        Args:
+            messages: List of chat messages to process.
         """
-        summary_index = len(messages) - 1
-        while summary_index >= 0:
-            msg = messages[summary_index]
+        for msg in messages:
             if msg.metadata and msg.metadata.get("__summary__"):
                 print(f"\tSummary: {msg.content}")
-                summary_index -= 1
-            else:
                 break
 
 
@@ -230,51 +171,69 @@ async def main():
     # Initialize the example class
     example = HistoryReducerExample()
 
-    # Demonstrate truncation-based reduction
-    trunc_agent, history_reducer = example.create_truncating_agent(
-        # reducer_msg_count:
-        # Purpose: Defines the target number of messages to retain after applying truncation or summarization.
-        # What it controls: This parameter determines how much of the most recent conversation history
-        #                   is preserved while discarding or summarizing older messages.
-        # Why change it?:
-        # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history
-        #   to maintain context.
-        # - Larger values: Use when retaining more conversational context is critical for accurate responses
-        #   or maintaining a richer dialogue.
-        reducer_msg_count=10,
-        # reducer_threshold:
-        # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds
-        #          reducer_msg_count by a small margin.
-        # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response)
-        #                   are not "orphaned" or lost during truncation or summarization.
-        # Why change it?:
-        # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older
-        #   pairs of messages sooner.
-        # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation,
-        #   especially for sensitive interactions like API function calls or complex responses.
-        reducer_threshold=10,
+    # Demonstrate truncation-based reduction, there are two important settings to consider:
+    # reducer_msg_count:
+    #   Purpose: Defines the target number of messages to retain after applying truncation or summarization.
+    #   What it controls: This parameter determines how much of the most recent conversation history
+    #                   is preserved while discarding or summarizing older messages.
+    #   Why change it?:
+    #   - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history
+    #   to maintain context.
+    #   - Larger values: Use when retaining more conversational context is critical for accurate responses
+    #   or maintaining a richer dialogue.
+    # reducer_threshold:
+    #   Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds
+    #          reducer_msg_count by a small margin.
+    #   What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response)
+    #                   are not "orphaned" or lost during truncation or summarization.
+    #   Why change it?:
+    #   - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older
+    #   pairs of messages sooner.
+    #   - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation,
+    #   especially for sensitive interactions like API function calls or complex responses.
+    reducer_msg_count = 10
+    reducer_threshold = 10
+
+    truncation_reducer = ChatHistoryTruncationReducer(target_count=reducer_msg_count, threshold_count=reducer_threshold)
+
+    kernel = _create_kernel_with_chat_completion(service_id="summary")
+    summarization_reducer = ChatHistorySummarizationReducer(
+        service=kernel.get_service("summary"), target_count=reducer_msg_count, threshold_count=reducer_threshold
     )
-    # print("===TruncatedAgentReduction Demo===")
-    # await example.invoke_agent(trunc_agent, chat_history=history_reducer, message_count=50)
-
-    # Demonstrate summarization-based reduction
-    sum_agent, history_reducer = example.create_summarizing_agent(
-        # Same configuration for summarization-based reduction
-        reducer_msg_count=10,  # Target number of messages to retain
-        reducer_threshold=10,  # Buffer to avoid premature reduction
+
+    # Demonstrate truncation-based reduction for a single agent
+    print("===Single Agent Truncated Chat History Reduction Demo===")
+    await example.invoke_agent(
+        agent=example.create_chat_completion_agent("truncation_agent"),
+        chat_history_reducer=truncation_reducer,
+        message_count=50,
+    )
+
+    # # Demonstrate group chat with a truncation reducer
+    print("\n===Group Agent Chat Truncated Chat History Reduction Demo===")
+    truncation_reducer.clear()
+    await example.invoke_chat(
+        agent=example.create_chat_completion_agent(service_id="truncation_chat"),
+        chat_history_reducer=truncation_reducer,
+        message_count=50,
+    )
+
+    # Demonstrate summarization-based reduction for a single agent
+    print("\n===Single Agent Summarized Chat History Reduction Demo===")
+    await example.invoke_agent(
+        agent=example.create_chat_completion_agent(service_id="summary"),
+        chat_history_reducer=summarization_reducer,
+        message_count=50,
+    )
+
+    # Demonstrate group chat with a summarization reducer
+    print("\n===Group Agent Chat Summarized Chat History Reduction Demo===")
+    summarization_reducer.clear()
+    await example.invoke_chat(
+        agent=example.create_chat_completion_agent(service_id="summary"),
+        chat_history_reducer=summarization_reducer,
+        message_count=50,
     )
-    print("\n===SummarizedAgentReduction Demo===")
-    await example.invoke_agent(sum_agent, chat_history=history_reducer, message_count=50)
-
-    # Demonstrate group chat with truncation
-    print("\n===TruncatedChatReduction Demo===")
-    trunc_agent.history_reducer.messages.clear()
-    await example.invoke_chat(trunc_agent, message_count=50)
-
-    # Demonstrate group chat with summarization
-    print("\n===SummarizedChatReduction Demo===")
-    sum_agent.history_reducer.messages.clear()
-    await example.invoke_chat(sum_agent, message_count=50)
 
 
 # Interaction between reducer_msg_count and reducer_threshold:
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py
index 338c76519b0e..838d90ac18ab 100644
--- a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py
+++ b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py
@@ -27,6 +27,9 @@
 # The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer.
 # To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly.
 
+# Toggle this flag to view the chat history summary after a reduction was performed.
+view_chat_history_summary_after_reduction = True
+
 # You can select from the following chat completion services:
 # - Services.OPENAI
 # - Services.AZURE_OPENAI
@@ -122,7 +125,8 @@ async def chat() -> bool:
         print("\n\nExiting chat...")
         return False
 
-    await summarization_reducer.reduce()
+    if is_reduced := await summarization_reducer.reduce():
+        print(f"@ History reduced to {len(summarization_reducer.messages)} messages.")
 
     kernel_arguments = KernelArguments(
         settings=request_settings,
@@ -136,6 +140,15 @@ async def chat() -> bool:
         summarization_reducer.add_user_message(user_input)
         summarization_reducer.add_message(answer.value[0])
 
+    if view_chat_history_summary_after_reduction and is_reduced:
+        for msg in summarization_reducer.messages:
+            if msg.metadata and msg.metadata.get("__summary__"):
+                print("*" * 60)
+                print(f"Chat History Reduction Summary: {msg.content}")
+                print("*" * 60)
+                break
+        print("\n")
+
     return True
 
 
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py
index b5d0eae75d24..591bbec053b8 100644
--- a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py
+++ b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py
@@ -32,6 +32,9 @@
 # The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer.
 # To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly.
 
+# Toggle this flag to view the chat history summary after a reduction was performed.
+view_chat_history_summary_after_reduction = True
+
 # You can select from the following chat completion services:
 # - Services.OPENAI
 # - Services.AZURE_OPENAI
@@ -136,7 +139,8 @@ async def chat() -> bool:
         print("\n\nExiting chat...")
         return False
 
-    await summarization_reducer.reduce()
+    if is_reduced := await summarization_reducer.reduce():
+        print(f"@ History reduced to {len(summarization_reducer.messages)} messages.")
 
     kernel_arguments = KernelArguments(
         settings=request_settings,
@@ -169,17 +173,26 @@ async def chat() -> bool:
                                     frc.append(item)
 
             for i, item in enumerate(fcc):
-                summarization_reducer.add_assistant_message_list([item])
+                summarization_reducer.add_assistant_message([item])
                 processed_fccs.add(item.id)
                 # Safely check if there's a matching FunctionResultContent
                 if i < len(frc):
                     assert fcc[i].id == frc[i].id  # nosec
-                    summarization_reducer.add_tool_message_list([frc[i]])
+                    summarization_reducer.add_tool_message([frc[i]])
                     processed_frcs.add(item.id)
 
         # Since this example is showing how to include FunctionCallContent and FunctionResultContent
         # in the summary, we need to add them to the chat history and also to the processed sets.
 
+        if view_chat_history_summary_after_reduction and is_reduced:
+            for msg in summarization_reducer.messages:
+                if msg.metadata and msg.metadata.get("__summary__"):
+                    print("*" * 60)
+                    print(f"Chat History Reduction Summary: {msg.content}")
+                    print("*" * 60)
+                    break
+            print("\n")
+
     return True
 
 
diff --git a/python/samples/concepts/structured_outputs/json_structured_outputs.py b/python/samples/concepts/structured_outputs/json_structured_outputs.py
index f6ea600cd56f..b1eacba11fd6 100644
--- a/python/samples/concepts/structured_outputs/json_structured_outputs.py
+++ b/python/samples/concepts/structured_outputs/json_structured_outputs.py
@@ -109,7 +109,7 @@ class Reasoning(KernelBaseModel):
 
 
 async def main():
-    stream = True
+    stream = False
     if stream:
         answer = kernel.invoke_stream(
             chat_function,
@@ -127,7 +127,8 @@ async def main():
             chat_function,
             chat_history=history,
         )
-        print(f"Mosscap:> {result}")
+        reasoned_result = Reasoning.model_validate_json(result.value[0].content)
+        print(f"Mosscap:> {reasoned_result}")
     history.add_assistant_message(str(result))
 
 
diff --git a/python/samples/getting_started_with_agents/step10_assistant_tool_file_search.py b/python/samples/getting_started_with_agents/step10_assistant_tool_file_search.py
index 3ac413f92400..c2ff3afe8483 100644
--- a/python/samples/getting_started_with_agents/step10_assistant_tool_file_search.py
+++ b/python/samples/getting_started_with_agents/step10_assistant_tool_file_search.py
@@ -1,12 +1,11 @@
 # Copyright (c) Microsoft. All rights reserved.
+
 import asyncio
 import os
 
-from semantic_kernel.agents.open_ai.azure_assistant_agent import AzureAssistantAgent
-from semantic_kernel.agents.open_ai.open_ai_assistant_agent import OpenAIAssistantAgent
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.utils.author_role import AuthorRole
-from semantic_kernel.kernel import Kernel
+from semantic_kernel import Kernel
+from semantic_kernel.agents.open_ai import AzureAssistantAgent, OpenAIAssistantAgent
+from semantic_kernel.contents import AuthorRole, ChatMessageContent
 
 #####################################################################
 # The following sample demonstrates how to create an OpenAI         #
@@ -15,35 +14,21 @@
 #####################################################################
 
 
-AGENT_NAME = "FileSearch"
-AGENT_INSTRUCTIONS = "Find answers to the user's questions in the provided file."
+# Create the instance of the Kernel
+kernel = Kernel()
 
 # Note: you may toggle this to switch between AzureOpenAI and OpenAI
-use_azure_openai = True
-
-
-# A helper method to invoke the agent with the user input
-async def invoke_agent(agent: OpenAIAssistantAgent, thread_id: str, input: str) -> None:
-    """Invoke the agent with the user input."""
-    await agent.add_chat_message(thread_id=thread_id, message=ChatMessageContent(role=AuthorRole.USER, content=input))
-
-    print(f"# {AuthorRole.USER}: '{input}'")
-
-    async for content in agent.invoke(thread_id=thread_id):
-        if content.role != AuthorRole.TOOL:
-            print(f"# {content.role}: {content.content}")
+use_azure_openai = False
 
 
 async def main():
-    # Create the instance of the Kernel
-    kernel = Kernel()
+    # Get the path to the employees.pdf file
+    pdf_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "resources", "employees.pdf")
 
     # Define a service_id for the sample
     service_id = "agent"
-
-    # Get the path to the travelinfo.txt file
-    pdf_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "resources", "employees.pdf")
-
+    AGENT_NAME = "FileSearch"
+    AGENT_INSTRUCTIONS = "Find answers to the user's questions in the provided file."
     # Create the agent configuration
     if use_azure_openai:
         agent = await AzureAssistantAgent.create(
@@ -67,10 +52,23 @@ async def main():
     # Define a thread and invoke the agent with the user input
     thread_id = await agent.create_thread()
 
+    user_inputs = {
+        "Who is the youngest employee?",
+        "Who works in sales?",
+        "I have a customer request, who can help me?",
+    }
+
     try:
-        await invoke_agent(agent, thread_id=thread_id, input="Who is the youngest employee?")
-        await invoke_agent(agent, thread_id=thread_id, input="Who works in sales?")
-        await invoke_agent(agent, thread_id=thread_id, input="I have a customer request, who can help me?")
+        for user_input in user_inputs:
+            await agent.add_chat_message(
+                thread_id=thread_id, message=ChatMessageContent(role=AuthorRole.USER, content=user_input)
+            )
+
+            print(f"# User: '{user_input}'")
+
+            async for content in agent.invoke(thread_id=thread_id):
+                if content.role != AuthorRole.TOOL:
+                    print(f"# Agent: {content.content}")
     finally:
         [await agent.delete_file(file_id) for file_id in agent.file_search_file_ids]
         await agent.delete_thread(thread_id)
diff --git a/python/samples/getting_started_with_agents/step1_agent.py b/python/samples/getting_started_with_agents/step1_agent.py
index 28d19a45df1f..6dda5a4dbadd 100644
--- a/python/samples/getting_started_with_agents/step1_agent.py
+++ b/python/samples/getting_started_with_agents/step1_agent.py
@@ -1,13 +1,11 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import asyncio
-from functools import reduce
 
+from semantic_kernel import Kernel
 from semantic_kernel.agents import ChatCompletionAgent
-from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
-from semantic_kernel.contents.chat_history import ChatHistory
-from semantic_kernel.contents.utils.author_role import AuthorRole
-from semantic_kernel.kernel import Kernel
+from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
+from semantic_kernel.contents import ChatHistory
 
 ###################################################################
 # The following sample demonstrates how to create a simple,       #
@@ -15,52 +13,37 @@
 # of a pirate and then ends with a parrot sound.                  #
 ###################################################################
 
-# To toggle streaming or non-streaming mode, change the following boolean
-streaming = True
+# Create the instance of the Kernel
+kernel = Kernel()
 
-# Define the agent name and instructions
-PARROT_NAME = "Parrot"
-PARROT_INSTRUCTIONS = "Repeat the user message in the voice of a pirate and then end with a parrot sound."
+# Add the OpenAIChatCompletion AI Service to the Kernel
+kernel.add_service(OpenAIChatCompletion(service_id="agent"))
 
-
-async def invoke_agent(agent: ChatCompletionAgent, input: str, chat: ChatHistory):
-    """Invoke the agent with the user input."""
-    chat.add_user_message(input)
-
-    print(f"# {AuthorRole.USER}: '{input}'")
-
-    if streaming:
-        contents = []
-        content_name = ""
-        async for content in agent.invoke_stream(chat):
-            content_name = content.name
-            contents.append(content)
-        streaming_chat_message = reduce(lambda first, second: first + second, contents)
-        print(f"# {content.role} - {content_name or '*'}: '{streaming_chat_message}'")
-        chat.add_message(streaming_chat_message)
-    else:
-        async for content in agent.invoke(chat):
-            print(f"# {content.role} - {content.name or '*'}: '{content.content}'")
-            chat.add_message(content)
+# Define the agent with name and instructions
+AGENT_NAME = "Parrot"
+AGENT_INSTRUCTIONS = "You are a helpful parrot that repeats the user message in a pirate voice."
+agent = ChatCompletionAgent(service_id="agent", kernel=kernel, name=AGENT_NAME)
 
 
 async def main():
-    # Create the instance of the Kernel
-    kernel = Kernel()
-
-    # Add the OpenAIChatCompletion AI Service to the Kernel
-    kernel.add_service(AzureChatCompletion(service_id="agent"))
-
-    # Create the agent
-    agent = ChatCompletionAgent(service_id="agent", kernel=kernel, name=PARROT_NAME, instructions=PARROT_INSTRUCTIONS)
-
     # Define the chat history
-    chat = ChatHistory()
-
-    # Respond to user input
-    await invoke_agent(agent, "Fortune favors the bold.", chat)
-    await invoke_agent(agent, "I came, I saw, I conquered.", chat)
-    await invoke_agent(agent, "Practice makes perfect.", chat)
+    chat_history = ChatHistory()
+    chat_history.add_developer_message(AGENT_INSTRUCTIONS)
+
+    user_inputs = [
+        "Fortune favors the bold.",
+        "I came, I saw, I conquered.",
+        "Practice makes perfect.",
+    ]
+    for user_input in user_inputs:
+        # Add the user input to the chat history
+        chat_history.add_user_message(user_input)
+        print(f"# User: '{user_input}'")
+        # Invoke the agent to get a response
+        async for content in agent.invoke(chat_history):
+            # Add the response to the chat history
+            chat_history.add_message(content)
+            print(f"# Agent - {content.name or '*'}: '{content.content}'")
 
 
 if __name__ == "__main__":
diff --git a/python/samples/getting_started_with_agents/step2_plugins.py b/python/samples/getting_started_with_agents/step2_plugins.py
index 919f8f25176e..4749a93bbcad 100644
--- a/python/samples/getting_started_with_agents/step2_plugins.py
+++ b/python/samples/getting_started_with_agents/step2_plugins.py
@@ -1,16 +1,20 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import asyncio
-from typing import Annotated
+from typing import TYPE_CHECKING, Annotated
 
+from semantic_kernel import Kernel
 from semantic_kernel.agents import ChatCompletionAgent
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.connectors.ai import FunctionChoiceBehavior
 from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
-from semantic_kernel.contents.chat_history import ChatHistory
-from semantic_kernel.contents.utils.author_role import AuthorRole
-from semantic_kernel.functions.kernel_arguments import KernelArguments
-from semantic_kernel.functions.kernel_function_decorator import kernel_function
-from semantic_kernel.kernel import Kernel
+from semantic_kernel.contents import ChatHistory
+from semantic_kernel.contents.function_call_content import FunctionCallContent
+from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.functions import KernelArguments, kernel_function
+
+if TYPE_CHECKING:
+    pass
+
 
 ###################################################################
 # The following sample demonstrates how to create a simple,       #
@@ -18,13 +22,6 @@
 # the Kernel.                                                     #
 ###################################################################
 
-# This sample allows for a streaming response verus a non-streaming response
-streaming = True
-
-# Define the agent name and instructions
-HOST_NAME = "Host"
-HOST_INSTRUCTIONS = "Answer questions about the menu."
-
 
 # Define a sample plugin for the sample
 class MenuPlugin:
@@ -45,58 +42,59 @@ def get_item_price(
         return "$9.99"
 
 
-# A helper method to invoke the agent with the user input
-async def invoke_agent(agent: ChatCompletionAgent, input: str, chat: ChatHistory) -> None:
-    """Invoke the agent with the user input."""
-    chat.add_user_message(input)
-
-    print(f"# {AuthorRole.USER}: '{input}'")
+# Create the instance of the Kernel
+kernel = Kernel()
+kernel.add_plugin(MenuPlugin(), plugin_name="menu")
 
-    if streaming:
-        contents = []
-        content_name = ""
-        async for content in agent.invoke_stream(chat):
-            content_name = content.name
-            contents.append(content)
-        message_content = "".join([content.content for content in contents])
-        print(f"# {content.role} - {content_name or '*'}: '{message_content}'")
-        chat.add_assistant_message(message_content)
-    else:
-        async for content in agent.invoke(chat):
-            print(f"# {content.role} - {content.name or '*'}: '{content.content}'")
-        chat.add_message(content)
+service_id = "agent"
+kernel.add_service(AzureChatCompletion(service_id=service_id))
 
+settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
+# Configure the function choice behavior to auto invoke kernel functions
+settings.function_choice_behavior = FunctionChoiceBehavior.Auto()
 
-async def main():
-    # Create the instance of the Kernel
-    kernel = Kernel()
-
-    service_id = "agent"
-    kernel.add_service(AzureChatCompletion(service_id=service_id))
-
-    settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
-    # Configure the function choice behavior to auto invoke kernel functions
-    settings.function_choice_behavior = FunctionChoiceBehavior.Auto()
-
-    kernel.add_plugin(MenuPlugin(), plugin_name="menu")
+# Define the agent name and instructions
+AGENT_NAME = "Host"
+AGENT_INSTRUCTIONS = "Answer questions about the menu."
+# Create the agent
+agent = ChatCompletionAgent(
+    service_id=service_id,
+    kernel=kernel,
+    name=AGENT_NAME,
+    instructions=AGENT_INSTRUCTIONS,
+    arguments=KernelArguments(settings=settings),
+)
 
-    # Create the agent
-    agent = ChatCompletionAgent(
-        service_id="agent",
-        kernel=kernel,
-        name=HOST_NAME,
-        instructions=HOST_INSTRUCTIONS,
-        arguments=KernelArguments(settings=settings),
-    )
 
+async def main():
     # Define the chat history
-    chat = ChatHistory()
+    chat_history = ChatHistory()
 
     # Respond to user input
-    await invoke_agent(agent, "Hello", chat)
-    await invoke_agent(agent, "What is the special soup?", chat)
-    await invoke_agent(agent, "What is the special drink?", chat)
-    await invoke_agent(agent, "Thank you", chat)
+    user_inputs = [
+        "Hello",
+        "What is the special soup?",
+        "What does that cost?",
+        "Thank you",
+    ]
+
+    for user_input in user_inputs:
+        # Add the user input to the chat history
+        chat_history.add_user_message(user_input)
+        print(f"# User: '{user_input}'")
+
+        agent_name: str | None = None
+        print("# Assistant - ", end="")
+        async for content in agent.invoke_stream(chat_history):
+            if not agent_name:
+                agent_name = content.name
+                print(f"{agent_name}: '", end="")
+            if (
+                not any(isinstance(item, (FunctionCallContent, FunctionResultContent)) for item in content.items)
+                and content.content.strip()
+            ):
+                print(f"{content.content}", end="", flush=True)
+        print("'")
 
 
 if __name__ == "__main__":
diff --git a/python/samples/getting_started_with_agents/step3_chat.py b/python/samples/getting_started_with_agents/step3_chat.py
index e81c5d0c516c..17d1fdae59df 100644
--- a/python/samples/getting_started_with_agents/step3_chat.py
+++ b/python/samples/getting_started_with_agents/step3_chat.py
@@ -2,12 +2,11 @@
 
 import asyncio
 
+from semantic_kernel import Kernel
 from semantic_kernel.agents import AgentGroupChat, ChatCompletionAgent
-from semantic_kernel.agents.strategies.termination.termination_strategy import TerminationStrategy
-from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.utils.author_role import AuthorRole
-from semantic_kernel.kernel import Kernel
+from semantic_kernel.agents.strategies import TerminationStrategy
+from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
+from semantic_kernel.contents import AuthorRole, ChatMessageContent
 
 ###################################################################
 # The following sample demonstrates how to create a simple,       #
@@ -17,6 +16,12 @@
 ###################################################################
 
 
+def _create_kernel_with_chat_completion(service_id: str) -> Kernel:
+    kernel = Kernel()
+    kernel.add_service(AzureChatCompletion(service_id=service_id))
+    return kernel
+
+
 class ApprovalTerminationStrategy(TerminationStrategy):
     """A strategy for determining when an agent should terminate."""
 
@@ -25,32 +30,14 @@ async def should_agent_terminate(self, agent, history):
         return "approved" in history[-1].content.lower()
 
 
-REVIEWER_NAME = "ArtDirector"
-REVIEWER_INSTRUCTIONS = """
-You are an art director who has opinions about copywriting born of a love for David Ogilvy.
-The goal is to determine if the given copy is acceptable to print.
-If so, state that it is approved.
-If not, provide insight on how to refine suggested copy without example.
-"""
-
-COPYWRITER_NAME = "CopyWriter"
-COPYWRITER_INSTRUCTIONS = """
-You are a copywriter with ten years of experience and are known for brevity and a dry humor.
-The goal is to refine and decide on the single best copy as an expert in the field.
-Only provide a single proposal per response.
-You're laser focused on the goal at hand.
-Don't waste time with chit chat.
-Consider suggestions when refining an idea.
-"""
-
-
-def _create_kernel_with_chat_completion(service_id: str) -> Kernel:
-    kernel = Kernel()
-    kernel.add_service(AzureChatCompletion(service_id=service_id))
-    return kernel
-
-
 async def main():
+    REVIEWER_NAME = "ArtDirector"
+    REVIEWER_INSTRUCTIONS = """
+    You are an art director who has opinions about copywriting born of a love for David Ogilvy.
+    The goal is to determine if the given copy is acceptable to print.
+    If so, state that it is approved.
+    If not, provide insight on how to refine suggested copy without example.
+    """
     agent_reviewer = ChatCompletionAgent(
         service_id="artdirector",
         kernel=_create_kernel_with_chat_completion("artdirector"),
@@ -58,6 +45,15 @@ async def main():
         instructions=REVIEWER_INSTRUCTIONS,
     )
 
+    COPYWRITER_NAME = "CopyWriter"
+    COPYWRITER_INSTRUCTIONS = """
+    You are a copywriter with ten years of experience and are known for brevity and a dry humor.
+    The goal is to refine and decide on the single best copy as an expert in the field.
+    Only provide a single proposal per response.
+    You're laser focused on the goal at hand.
+    Don't waste time with chit chat.
+    Consider suggestions when refining an idea.
+    """
     agent_writer = ChatCompletionAgent(
         service_id="copywriter",
         kernel=_create_kernel_with_chat_completion("copywriter"),
@@ -65,20 +61,26 @@ async def main():
         instructions=COPYWRITER_INSTRUCTIONS,
     )
 
-    chat = AgentGroupChat(
-        agents=[agent_writer, agent_reviewer],
-        termination_strategy=ApprovalTerminationStrategy(agents=[agent_reviewer], maximum_iterations=10),
+    group_chat = AgentGroupChat(
+        agents=[
+            agent_writer,
+            agent_reviewer,
+        ],
+        termination_strategy=ApprovalTerminationStrategy(
+            agents=[agent_reviewer],
+            maximum_iterations=10,
+        ),
     )
 
     input = "a slogan for a new line of electric cars."
 
-    await chat.add_chat_message(ChatMessageContent(role=AuthorRole.USER, content=input))
-    print(f"# {AuthorRole.USER}: '{input}'")
+    await group_chat.add_chat_message(ChatMessageContent(role=AuthorRole.USER, content=input))
+    print(f"# User: '{input}'")
 
-    async for content in chat.invoke():
-        print(f"# {content.role} - {content.name or '*'}: '{content.content}'")
+    async for content in group_chat.invoke():
+        print(f"# Agent - {content.name or '*'}: '{content.content}'")
 
-    print(f"# IS COMPLETE: {chat.is_complete}")
+    print(f"# IS COMPLETE: {group_chat.is_complete}")
 
 
 if __name__ == "__main__":
diff --git a/python/samples/getting_started_with_agents/step4_kernel_function_strategies.py b/python/samples/getting_started_with_agents/step4_kernel_function_strategies.py
index 9ad6a9d361bf..47aa9a3ab356 100644
--- a/python/samples/getting_started_with_agents/step4_kernel_function_strategies.py
+++ b/python/samples/getting_started_with_agents/step4_kernel_function_strategies.py
@@ -2,16 +2,15 @@
 
 import asyncio
 
+from semantic_kernel import Kernel
 from semantic_kernel.agents import AgentGroupChat, ChatCompletionAgent
 from semantic_kernel.agents.strategies import (
     KernelFunctionSelectionStrategy,
     KernelFunctionTerminationStrategy,
 )
-from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.utils.author_role import AuthorRole
-from semantic_kernel.functions.kernel_function_from_prompt import KernelFunctionFromPrompt
-from semantic_kernel.kernel import Kernel
+from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
+from semantic_kernel.contents import AuthorRole, ChatMessageContent
+from semantic_kernel.functions import KernelFunctionFromPrompt
 
 ###################################################################
 # The following sample demonstrates how to create a simple,       #
@@ -23,24 +22,6 @@
 # in the conversation.                                            #
 ###################################################################
 
-REVIEWER_NAME = "ArtDirector"
-REVIEWER_INSTRUCTIONS = """
-You are an art director who has opinions about copywriting born of a love for David Ogilvy.
-The goal is to determine if the given copy is acceptable to print.
-If so, state that it is approved.
-If not, provide insight on how to refine suggested copy without example.
-"""
-
-COPYWRITER_NAME = "CopyWriter"
-COPYWRITER_INSTRUCTIONS = """
-You are a copywriter with ten years of experience and are known for brevity and a dry humor.
-The goal is to refine and decide on the single best copy as an expert in the field.
-Only provide a single proposal per response.
-You're laser focused on the goal at hand.
-Don't waste time with chit chat.
-Consider suggestions when refining an idea.
-"""
-
 
 def _create_kernel_with_chat_completion(service_id: str) -> Kernel:
     kernel = Kernel()
@@ -49,6 +30,13 @@ def _create_kernel_with_chat_completion(service_id: str) -> Kernel:
 
 
 async def main():
+    REVIEWER_NAME = "ArtDirector"
+    REVIEWER_INSTRUCTIONS = """
+    You are an art director who has opinions about copywriting born of a love for David Ogilvy.
+    The goal is to determine if the given copy is acceptable to print.
+    If so, state that it is approved.
+    If not, provide insight on how to refine suggested copy without example.
+    """
     agent_reviewer = ChatCompletionAgent(
         service_id="artdirector",
         kernel=_create_kernel_with_chat_completion("artdirector"),
@@ -56,6 +44,15 @@ async def main():
         instructions=REVIEWER_INSTRUCTIONS,
     )
 
+    COPYWRITER_NAME = "CopyWriter"
+    COPYWRITER_INSTRUCTIONS = """
+    You are a copywriter with ten years of experience and are known for brevity and a dry humor.
+    The goal is to refine and decide on the single best copy as an expert in the field.
+    Only provide a single proposal per response.
+    You're laser focused on the goal at hand.
+    Don't waste time with chit chat.
+    Consider suggestions when refining an idea.
+    """
     agent_writer = ChatCompletionAgent(
         service_id="copywriter",
         kernel=_create_kernel_with_chat_completion("copywriter"),
@@ -116,10 +113,10 @@ async def main():
     input = "a slogan for a new line of electric cars."
 
     await chat.add_chat_message(ChatMessageContent(role=AuthorRole.USER, content=input))
-    print(f"# {AuthorRole.USER}: '{input}'")
+    print(f"# User: '{input}'")
 
     async for content in chat.invoke():
-        print(f"# {content.role} - {content.name or '*'}: '{content.content}'")
+        print(f"# Agent - {content.name or '*'}: '{content.content}'")
 
     print(f"# IS COMPLETE: {chat.is_complete}")
 
diff --git a/python/samples/getting_started_with_agents/step5_json_result.py b/python/samples/getting_started_with_agents/step5_json_result.py
index 10edc9f2198f..b57751825dad 100644
--- a/python/samples/getting_started_with_agents/step5_json_result.py
+++ b/python/samples/getting_started_with_agents/step5_json_result.py
@@ -2,15 +2,13 @@
 
 import asyncio
 
-from pydantic import ValidationError
+from pydantic import BaseModel, ValidationError
 
+from semantic_kernel import Kernel
 from semantic_kernel.agents import AgentGroupChat, ChatCompletionAgent
-from semantic_kernel.agents.strategies.termination.termination_strategy import TerminationStrategy
-from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.utils.author_role import AuthorRole
-from semantic_kernel.kernel import Kernel
-from semantic_kernel.kernel_pydantic import KernelBaseModel
+from semantic_kernel.agents.strategies import TerminationStrategy
+from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
+from semantic_kernel.contents import AuthorRole, ChatMessageContent
 
 ###################################################################
 # The following sample demonstrates how to configure an Agent     #
@@ -21,85 +19,68 @@
 ###################################################################
 
 
-SCORE_COMPLETED_THRESHOLD = 70
-TUTOR_NAME = "Tutor"
-TUTOR_INSTRUCTIONS = """
-Think step-by-step and rate the user input on creativity and expressivness from 1-100.
-
-Respond in JSON format with the following JSON schema:
-
-{
-    "score": "integer (1-100)",
-    "notes": "the reason for your score"
-}
-"""
+def _create_kernel_with_chat_completion(service_id: str) -> Kernel:
+    kernel = Kernel()
+    kernel.add_service(OpenAIChatCompletion(service_id=service_id))
+    return kernel
 
 
-class InputScore(KernelBaseModel):
+class InputScore(BaseModel):
     """A model for the input score."""
 
     score: int
     notes: str
 
 
-def translate_json(json_string: str) -> InputScore | None:
-    try:
-        if json_string is None:
-            return None
-        return InputScore.model_validate_json(json_string)
-    except ValidationError:
-        return None
-
-
 class ThresholdTerminationStrategy(TerminationStrategy):
     """A strategy for determining when an agent should terminate."""
 
+    threshold: int = 70
+
     async def should_agent_terminate(self, agent, history):
         """Check if the agent should terminate."""
-        last_message_content = history[-1].content or ""
-        result = translate_json(last_message_content)
-        return result.score >= SCORE_COMPLETED_THRESHOLD if result else False
-
+        try:
+            result = InputScore.model_validate_json(history[-1].content or "")
+            return result.score >= self.threshold
+        except ValidationError:
+            return False
 
-def _create_kernel_with_chat_completion(service_id: str) -> Kernel:
-    kernel = Kernel()
-    kernel.add_service(AzureChatCompletion(service_id=service_id))
-    return kernel
-
-
-async def invoke_agent(agent: ChatCompletionAgent, input: str, chat: AgentGroupChat):
-    """Invoke the agent with the user input."""
-    await chat.add_chat_message(ChatMessageContent(role=AuthorRole.USER, content=input))
 
-    print(f"# {AuthorRole.USER}: '{input}'")
+async def main():
+    kernel = _create_kernel_with_chat_completion(service_id="tutor")
 
-    async for content in chat.invoke_single_turn(agent):
-        print(f"# {content.role} - {content.name or '*'}: '{content.content}'")
-        print(f"# IS COMPLETE: {chat.is_complete}")
+    TUTOR_NAME = "Tutor"
+    TUTOR_INSTRUCTIONS = """Think step-by-step and rate the user input on creativity and expressiveness from 1-100 with some notes on how to improve."""  # noqa: E501
 
+    settings = kernel.get_prompt_execution_settings_from_service_id(service_id="tutor")
+    settings.response_format = InputScore
 
-async def main():
-    service_id = "tutor"
     agent = ChatCompletionAgent(
-        service_id=service_id,
-        kernel=_create_kernel_with_chat_completion(service_id=service_id),
+        service_id="tutor",
+        kernel=kernel,
         name=TUTOR_NAME,
         instructions=TUTOR_INSTRUCTIONS,
+        execution_settings=settings,
     )
 
     # Here a TerminationStrategy subclass is used that will terminate when
     # the response includes a score that is greater than or equal to 70.
     termination_strategy = ThresholdTerminationStrategy(maximum_iterations=10)
 
-    chat = AgentGroupChat(termination_strategy=termination_strategy)
+    group_chat = AgentGroupChat(termination_strategy=termination_strategy)
 
-    await invoke_agent(agent=agent, input="The sunset is very colorful.", chat=chat)
-    await invoke_agent(agent=agent, input="The sunset is setting over the mountains.", chat=chat)
-    await invoke_agent(
-        agent=agent,
-        input="The sunset is setting over the mountains and filled the sky with a deep red flame, setting the clouds ablaze.",  # noqa: E501
-        chat=chat,
-    )
+    user_inputs = {
+        "The sunset is very colorful.",
+        "The sunset is setting over the mountains.",
+        "The sunset is setting over the mountains and filled the sky with a deep red flame, setting the clouds ablaze.",
+    }
+    for user_input in user_inputs:
+        await group_chat.add_chat_message(ChatMessageContent(role=AuthorRole.USER, content=user_input))
+        print(f"# User: '{user_input}'")
+
+        async for content in group_chat.invoke_single_turn(agent):
+            print(f"# Agent - {content.name or '*'}: '{content.content}'")
+            print(f"# IS COMPLETE: {group_chat.is_complete}")
 
 
 if __name__ == "__main__":
diff --git a/python/samples/getting_started_with_agents/step6_logging.py b/python/samples/getting_started_with_agents/step6_logging.py
index 197bcd72ab8e..834573e0f383 100644
--- a/python/samples/getting_started_with_agents/step6_logging.py
+++ b/python/samples/getting_started_with_agents/step6_logging.py
@@ -3,13 +3,11 @@
 import asyncio
 import logging
 
-from semantic_kernel.agents import AgentGroupChat
-from semantic_kernel.agents.chat_completion.chat_completion_agent import ChatCompletionAgent
-from semantic_kernel.agents.strategies.termination.termination_strategy import TerminationStrategy
-from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.utils.author_role import AuthorRole
-from semantic_kernel.kernel import Kernel
+from semantic_kernel import Kernel
+from semantic_kernel.agents import AgentGroupChat, ChatCompletionAgent
+from semantic_kernel.agents.strategies import TerminationStrategy
+from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
+from semantic_kernel.contents import AuthorRole, ChatMessageContent
 
 ###################################################################
 # The following sample demonstrates how to create a simple,       #
@@ -33,25 +31,6 @@ async def should_agent_terminate(self, agent, history):
         return "approved" in history[-1].content.lower()
 
 
-REVIEWER_NAME = "ArtDirector"
-REVIEWER_INSTRUCTIONS = """
-You are an art director who has opinions about copywriting born of a love for David Ogilvy.
-The goal is to determine if the given copy is acceptable to print.
-If so, state that it is approved.
-If not, provide insight on how to refine suggested copy without example.
-"""
-
-COPYWRITER_NAME = "CopyWriter"
-COPYWRITER_INSTRUCTIONS = """
-You are a copywriter with ten years of experience and are known for brevity and a dry humor.
-The goal is to refine and decide on the single best copy as an expert in the field.
-Only provide a single proposal per response.
-You're laser focused on the goal at hand.
-Don't waste time with chit chat.
-Consider suggestions when refining an idea.
-"""
-
-
 def _create_kernel_with_chat_completion(service_id: str) -> Kernel:
     kernel = Kernel()
     kernel.add_service(AzureChatCompletion(service_id=service_id))
@@ -59,6 +38,13 @@ def _create_kernel_with_chat_completion(service_id: str) -> Kernel:
 
 
 async def main():
+    REVIEWER_NAME = "ArtDirector"
+    REVIEWER_INSTRUCTIONS = """
+    You are an art director who has opinions about copywriting born of a love for David Ogilvy.
+    The goal is to determine if the given copy is acceptable to print.
+    If so, state that it is approved.
+    If not, provide insight on how to refine suggested copy without example.
+    """
     agent_reviewer = ChatCompletionAgent(
         service_id="artdirector",
         kernel=_create_kernel_with_chat_completion("artdirector"),
@@ -66,6 +52,15 @@ async def main():
         instructions=REVIEWER_INSTRUCTIONS,
     )
 
+    COPYWRITER_NAME = "CopyWriter"
+    COPYWRITER_INSTRUCTIONS = """
+    You are a copywriter with ten years of experience and are known for brevity and a dry humor.
+    The goal is to refine and decide on the single best copy as an expert in the field.
+    Only provide a single proposal per response.
+    You're laser focused on the goal at hand.
+    Don't waste time with chit chat.
+    Consider suggestions when refining an idea.
+    """
     agent_writer = ChatCompletionAgent(
         service_id="copywriter",
         kernel=_create_kernel_with_chat_completion("copywriter"),
@@ -73,20 +68,20 @@ async def main():
         instructions=COPYWRITER_INSTRUCTIONS,
     )
 
-    chat = AgentGroupChat(
+    group_chat = AgentGroupChat(
         agents=[agent_writer, agent_reviewer],
         termination_strategy=ApprovalTerminationStrategy(agents=[agent_reviewer], maximum_iterations=10),
     )
 
     input = "a slogan for a new line of electric cars."
 
-    await chat.add_chat_message(ChatMessageContent(role=AuthorRole.USER, content=input))
-    print(f"# {AuthorRole.USER}: '{input}'")
+    await group_chat.add_chat_message(ChatMessageContent(role=AuthorRole.USER, content=input))
+    print(f"# User: '{input}'")
 
-    async for content in chat.invoke():
-        print(f"# {content.role} - {content.name or '*'}: '{content.content}'")
+    async for content in group_chat.invoke():
+        print(f"# Agent - {content.name or '*'}: '{content.content}'")
 
-    print(f"# IS COMPLETE: {chat.is_complete}")
+    print(f"# IS COMPLETE: {group_chat.is_complete}")
 
 
 if __name__ == "__main__":
diff --git a/python/samples/getting_started_with_agents/step7_assistant.py b/python/samples/getting_started_with_agents/step7_assistant.py
index 67235c0dcf3c..4f003434022a 100644
--- a/python/samples/getting_started_with_agents/step7_assistant.py
+++ b/python/samples/getting_started_with_agents/step7_assistant.py
@@ -2,11 +2,10 @@
 import asyncio
 from typing import Annotated
 
+from semantic_kernel import Kernel
 from semantic_kernel.agents.open_ai import AzureAssistantAgent, OpenAIAssistantAgent
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.utils.author_role import AuthorRole
-from semantic_kernel.functions.kernel_function_decorator import kernel_function
-from semantic_kernel.kernel import Kernel
+from semantic_kernel.contents import AuthorRole, ChatMessageContent
+from semantic_kernel.functions import kernel_function
 
 #####################################################################
 # The following sample demonstrates how to create an OpenAI         #
@@ -16,8 +15,6 @@
 # conversation state, similar to a Semantic Kernel Chat History.    #
 #####################################################################
 
-HOST_NAME = "Host"
-HOST_INSTRUCTIONS = "Answer questions about the menu."
 
 # Note: you may toggle this to switch between AzureOpenAI and OpenAI
 use_azure_openai = False
@@ -42,24 +39,16 @@ def get_item_price(
         return "$9.99"
 
 
-# A helper method to invoke the agent with the user input
-async def invoke_agent(agent: OpenAIAssistantAgent, thread_id: str, input: str) -> None:
-    """Invoke the agent with the user input."""
-    await agent.add_chat_message(thread_id=thread_id, message=ChatMessageContent(role=AuthorRole.USER, content=input))
+# Create the instance of the Kernel
+kernel = Kernel()
 
-    print(f"# {AuthorRole.USER}: '{input}'")
-
-    async for content in agent.invoke(thread_id=thread_id):
-        if content.role != AuthorRole.TOOL:
-            print(f"# {content.role}: {content.content}")
+# Add the sample plugin to the kernel
+kernel.add_plugin(plugin=MenuPlugin(), plugin_name="menu")
 
 
 async def main():
-    # Create the instance of the Kernel
-    kernel = Kernel()
-
-    # Add the sample plugin to the kernel
-    kernel.add_plugin(plugin=MenuPlugin(), plugin_name="menu")
+    HOST_NAME = "Host"
+    HOST_INSTRUCTIONS = "Answer questions about the menu."
 
     # Create the OpenAI Assistant Agent
     service_id = "agent"
@@ -74,11 +63,17 @@ async def main():
 
     thread_id = await agent.create_thread()
 
+    user_inputs = ["Hello", "What is the special soup?", "What is the special drink?", "Thank you"]
     try:
-        await invoke_agent(agent, thread_id=thread_id, input="Hello")
-        await invoke_agent(agent, thread_id=thread_id, input="What is the special soup?")
-        await invoke_agent(agent, thread_id=thread_id, input="What is the special drink?")
-        await invoke_agent(agent, thread_id=thread_id, input="Thank you")
+        for user_input in user_inputs:
+            await agent.add_chat_message(
+                thread_id=thread_id, message=ChatMessageContent(role=AuthorRole.USER, content=user_input)
+            )
+            print(f"# User: '{user_input}'")
+            async for content in agent.invoke(thread_id=thread_id):
+                if content.role != AuthorRole.TOOL:
+                    print(f"# Agent: {content.content}")
+
     finally:
         await agent.delete_thread(thread_id)
         await agent.delete()
diff --git a/python/samples/getting_started_with_agents/step8_assistant_vision.py b/python/samples/getting_started_with_agents/step8_assistant_vision.py
index ac7bf34d7e48..22cb0c305258 100644
--- a/python/samples/getting_started_with_agents/step8_assistant_vision.py
+++ b/python/samples/getting_started_with_agents/step8_assistant_vision.py
@@ -1,14 +1,11 @@
 # Copyright (c) Microsoft. All rights reserved.
+
 import asyncio
 import os
 
-from semantic_kernel.agents.open_ai.open_ai_assistant_agent import OpenAIAssistantAgent
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.file_reference_content import FileReferenceContent
-from semantic_kernel.contents.image_content import ImageContent
-from semantic_kernel.contents.text_content import TextContent
-from semantic_kernel.contents.utils.author_role import AuthorRole
-from semantic_kernel.kernel import Kernel
+from semantic_kernel import Kernel
+from semantic_kernel.agents.open_ai import OpenAIAssistantAgent
+from semantic_kernel.contents import AuthorRole, ChatMessageContent, FileReferenceContent, ImageContent, TextContent
 
 #####################################################################
 # The following sample demonstrates how to create an OpenAI         #
@@ -17,58 +14,20 @@
 # and answer questions about them.                                  #
 #####################################################################
 
-HOST_NAME = "Host"
-HOST_INSTRUCTIONS = "Answer questions about the menu."
-
-
-def create_message_with_image_url(input: str, url: str) -> ChatMessageContent:
-    return ChatMessageContent(
-        role=AuthorRole.USER,
-        items=[TextContent(text=input), ImageContent(uri=url)],
-    )
-
-
-def create_message_with_image_reference(input: str, file_id: str) -> ChatMessageContent:
-    return ChatMessageContent(
-        role=AuthorRole.USER,
-        items=[TextContent(text=input), FileReferenceContent(file_id=file_id)],
-    )
 
+# Create the instance of the Kernel
+kernel = Kernel()
 
+# Toggle streaming or non-streaming mode
 streaming = False
 
 
-# A helper method to invoke the agent with the user input
-async def invoke_agent(agent: OpenAIAssistantAgent, thread_id: str, message: ChatMessageContent) -> None:
-    """Invoke the agent with the user input."""
-    await agent.add_chat_message(thread_id=thread_id, message=message)
-
-    print(f"# {AuthorRole.USER}: '{message.items[0].text}'")
-
-    if streaming:
-        first_chunk = True
-        async for content in agent.invoke_stream(thread_id=thread_id):
-            if content.role != AuthorRole.TOOL:
-                if first_chunk:
-                    print(f"# {content.role}: ", end="", flush=True)
-                    first_chunk = False
-                print(content.content, end="", flush=True)
-        print()
-    else:
-        async for content in agent.invoke(thread_id=thread_id):
-            if content.role != AuthorRole.TOOL:
-                print(f"# {content.role}: {content.content}")
-
-
 async def main():
-    # Create the instance of the Kernel
-    kernel = Kernel()
-
-    service_id = "agent"
-
     # Create the Assistant Agent
+    AGENT_NAME = "Host"
+    AGENT_INSTRUCTIONS = "Answer questions about the menu."
     agent = await OpenAIAssistantAgent.create(
-        kernel=kernel, service_id=service_id, name=HOST_NAME, instructions=HOST_INSTRUCTIONS
+        kernel=kernel, service_id="agent", name=AGENT_NAME, instructions=AGENT_INSTRUCTIONS
     )
 
     cat_image_file_path = os.path.join(
@@ -83,28 +42,51 @@ async def main():
     # Create a thread for the conversation
     thread_id = await agent.create_thread()
 
+    user_messages = {
+        ChatMessageContent(
+            role=AuthorRole.USER,
+            items=[
+                TextContent(text="Describe this image."),
+                ImageContent(
+                    uri="https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/New_york_times_square-terabass.jpg/1200px-New_york_times_square-terabass.jpg"
+                ),
+            ],
+        ),
+        ChatMessageContent(
+            role=AuthorRole.USER,
+            items=[
+                TextContent(text="What is the main color in this image?"),
+                ImageContent(uri="https://upload.wikimedia.org/wikipedia/commons/5/56/White_shark.jpg"),
+            ],
+        ),
+        ChatMessageContent(
+            role=AuthorRole.USER,
+            items=[
+                TextContent(text="Is there an animal in this image?"),
+                FileReferenceContent(file_id=file_id),
+            ],
+        ),
+    }
     try:
-        await invoke_agent(
-            agent,
-            thread_id=thread_id,
-            message=create_message_with_image_url(
-                "Describe this image.",
-                "https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/New_york_times_square-terabass.jpg/1200px-New_york_times_square-terabass.jpg",
-            ),
-        )
-        await invoke_agent(
-            agent,
-            thread_id=thread_id,
-            message=create_message_with_image_url(
-                "What is the main color in this image?",
-                "https://upload.wikimedia.org/wikipedia/commons/5/56/White_shark.jpg",
-            ),
-        )
-        await invoke_agent(
-            agent,
-            thread_id=thread_id,
-            message=create_message_with_image_reference("Is there an animal in this image?", file_id),
-        )
+        for message in user_messages:
+            await agent.add_chat_message(thread_id=thread_id, message=message)
+
+            print(f"# User: '{message.items[0].text}'")
+
+            if streaming:
+                first_chunk = True
+                async for content in agent.invoke_stream(thread_id=thread_id):
+                    if content.role != AuthorRole.TOOL:
+                        if first_chunk:
+                            print("# Agent: ", end="", flush=True)
+                            first_chunk = False
+                        print(content.content, end="", flush=True)
+                print()
+            else:
+                async for content in agent.invoke(thread_id=thread_id):
+                    if content.role != AuthorRole.TOOL:
+                        print(f"# Agent: {content.content}")
+
     finally:
         await agent.delete_file(file_id)
         await agent.delete_thread(thread_id)
diff --git a/python/samples/getting_started_with_agents/step9_assistant_tool_code_interpreter.py b/python/samples/getting_started_with_agents/step9_assistant_tool_code_interpreter.py
index 11c2deff8e7c..1b7440ff6748 100644
--- a/python/samples/getting_started_with_agents/step9_assistant_tool_code_interpreter.py
+++ b/python/samples/getting_started_with_agents/step9_assistant_tool_code_interpreter.py
@@ -1,11 +1,9 @@
 # Copyright (c) Microsoft. All rights reserved.
 import asyncio
 
-from semantic_kernel.agents.open_ai.azure_assistant_agent import AzureAssistantAgent
-from semantic_kernel.agents.open_ai.open_ai_assistant_agent import OpenAIAssistantAgent
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.utils.author_role import AuthorRole
-from semantic_kernel.kernel import Kernel
+from semantic_kernel import Kernel
+from semantic_kernel.agents.open_ai import AzureAssistantAgent, OpenAIAssistantAgent
+from semantic_kernel.contents import AuthorRole, ChatMessageContent
 
 #####################################################################
 # The following sample demonstrates how to create an OpenAI         #
@@ -14,32 +12,18 @@
 # Python code to print Fibonacci numbers.                           #
 #####################################################################
 
-
-AGENT_NAME = "CodeRunner"
-AGENT_INSTRUCTIONS = "Run the provided code file and return the result."
+# Create the instance of the Kernel
+kernel = Kernel()
 
 # Note: you may toggle this to switch between AzureOpenAI and OpenAI
-use_azure_openai = True
-
-
-# A helper method to invoke the agent with the user input
-async def invoke_agent(agent: OpenAIAssistantAgent, thread_id: str, input: str) -> None:
-    """Invoke the agent with the user input."""
-    await agent.add_chat_message(thread_id=thread_id, message=ChatMessageContent(role=AuthorRole.USER, content=input))
-
-    print(f"# {AuthorRole.USER}: '{input}'")
-
-    async for content in agent.invoke(thread_id=thread_id):
-        if content.role != AuthorRole.TOOL:
-            print(f"# {content.role}: {content.content}")
+use_azure_openai = False
 
 
 async def main():
-    # Create the instance of the Kernel
-    kernel = Kernel()
-
     # Define a service_id for the sample
     service_id = "agent"
+    AGENT_NAME = "CodeRunner"
+    AGENT_INSTRUCTIONS = "Run the provided code file and return the result."
 
     # Create the agent
     if use_azure_openai:
@@ -61,12 +45,19 @@ async def main():
 
     thread_id = await agent.create_thread()
 
+    user_input = "Use code to determine the values in the Fibonacci sequence that that are less then the value of 101?"
+    print(f"# User: '{user_input}'")
     try:
-        await invoke_agent(
-            agent,
+        await agent.add_chat_message(
             thread_id=thread_id,
-            input="Use code to determine the values in the Fibonacci sequence that that are less then the value of 101?",  # noqa: E501
+            message=ChatMessageContent(
+                role=AuthorRole.USER,
+                content=user_input,
+            ),
         )
+        async for content in agent.invoke(thread_id=thread_id):
+            if content.role != AuthorRole.TOOL:
+                print(f"# Agent: {content.content}")
     finally:
         await agent.delete_thread(thread_id)
         await agent.delete()
diff --git a/python/semantic_kernel/agents/agent.py b/python/semantic_kernel/agents/agent.py
index 7b923be138f3..8d569d696d33 100644
--- a/python/semantic_kernel/agents/agent.py
+++ b/python/semantic_kernel/agents/agent.py
@@ -3,12 +3,11 @@
 import logging
 import uuid
 from collections.abc import Iterable
-from typing import TYPE_CHECKING, ClassVar
+from typing import ClassVar
 
 from pydantic import Field
 
 from semantic_kernel.agents.channels.agent_channel import AgentChannel
-from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 from semantic_kernel.kernel import Kernel
 from semantic_kernel.kernel_pydantic import KernelBaseModel
@@ -19,10 +18,6 @@
 from semantic_kernel.utils.naming import generate_random_ascii_name
 from semantic_kernel.utils.validation import AGENT_NAME_REGEX
 
-if TYPE_CHECKING:
-    from semantic_kernel.contents.chat_history import ChatHistory
-
-
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -40,7 +35,7 @@ class Agent(KernelBaseModel):
         description: The description of the agent (optional).
         id: The unique identifier of the agent (optional). If no id is provided,
             a new UUID will be generated.
-        instructions: The instructions for the agent (optional
+        instructions: The instructions for the agent (optional)
     """
 
     id: str = Field(default_factory=lambda: str(uuid.uuid4()))
@@ -49,25 +44,9 @@ class Agent(KernelBaseModel):
     instructions: str | None = None
     kernel: Kernel = Field(default_factory=Kernel)
     channel_type: ClassVar[type[AgentChannel] | None] = None
-    history_reducer: ChatHistoryReducer | None = None
-    arguments: KernelArguments = Field(default_factory=KernelArguments)
+    arguments: KernelArguments | None = None
     prompt_template: PromptTemplateBase | None = None
 
-    async def reduce_history(self, history: "ChatHistory") -> bool:
-        """Perform the reduction on the provided history, returning True if reduction occurred."""
-        if self.history_reducer is None:
-            return False
-
-        self.history_reducer.messages = history.messages
-
-        reducer = await self.history_reducer.reduce()
-        if reducer is not None:
-            history.messages.clear()
-            history.messages.extend(reducer.messages)
-            return True
-
-        return False
-
     def get_channel_keys(self) -> Iterable[str]:
         """Get the channel keys.
 
@@ -78,10 +57,6 @@ def get_channel_keys(self) -> Iterable[str]:
             raise NotImplementedError("Unable to get channel keys. Channel type not configured.")
         yield self.channel_type.__name__
 
-        if self.history_reducer is not None:
-            yield self.history_reducer.__class__.__name__
-            yield str(self.history_reducer.__hash__)
-
     async def create_channel(self) -> AgentChannel:
         """Create a channel.
 
@@ -110,7 +85,7 @@ async def format_instructions(self, kernel: Kernel, arguments: KernelArguments)
             )
         return await self.prompt_template.render(kernel, arguments)
 
-    def merge_arguments(self, override_args: KernelArguments) -> KernelArguments:
+    def merge_arguments(self, override_args: KernelArguments | None) -> KernelArguments:
         """Merge the arguments with the override arguments.
 
         Args:
@@ -119,17 +94,18 @@ def merge_arguments(self, override_args: KernelArguments) -> KernelArguments:
         Returns:
             The merged arguments. If both are None, return None.
         """
-        # If the agent's arguments are not set, simply return whatever is passed in.
         if not self.arguments:
+            if not override_args:
+                return KernelArguments()
             return override_args
 
-        # If the override args are not set, keep the current arguments.
         if not override_args:
             return self.arguments
 
         # Both are not None, so merge with precedence for override_args.
-        merged_execution_settings = dict(self.arguments.execution_settings or {})
-        merged_execution_settings.update(override_args.execution_settings or {})
+        merged_execution_settings = self.arguments.execution_settings or {}
+        if override_args.execution_settings:
+            merged_execution_settings.update(override_args.execution_settings)
 
         merged_params = dict(self.arguments)
         merged_params.update(override_args)
diff --git a/python/semantic_kernel/agents/channels/chat_history_channel.py b/python/semantic_kernel/agents/channels/chat_history_channel.py
index 057c005b3d3d..563efeaef610 100644
--- a/python/semantic_kernel/agents/channels/chat_history_channel.py
+++ b/python/semantic_kernel/agents/channels/chat_history_channel.py
@@ -64,9 +64,6 @@ async def invoke(
                 f"Invalid channel binding for agent with id: `{id}` with name: ({type(agent).__name__})"
             )
 
-        # pre-process history reduction
-        await agent.reduce_history(self)
-
         message_count = len(self.messages)
         mutated_history = set()
         message_queue: Deque[ChatMessageContent] = deque()
@@ -122,9 +119,6 @@ async def invoke_stream(
                 f"Invalid channel binding for agent with id: `{id}` with name: ({type(agent).__name__})"
             )
 
-        # pre-process history reduction
-        await agent.reduce_history(self)
-
         message_count = len(self.messages)
 
         async for response_message in agent.invoke_stream(self):
diff --git a/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py b/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py
index 3de3e7ce3493..44498287669d 100644
--- a/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py
+++ b/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py
@@ -12,7 +12,6 @@
 from semantic_kernel.const import DEFAULT_SERVICE_NAME
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.exceptions import KernelServiceNotFoundError
@@ -48,7 +47,6 @@ def __init__(
         id: str | None = None,
         description: str | None = None,
         instructions: str | None = None,
-        history_reducer: ChatHistoryReducer | None = None,
         arguments: KernelArguments | None = None,
         prompt_template_config: PromptTemplateConfig | None = None,
     ) -> None:
@@ -63,7 +61,6 @@ def __init__(
                 a unique GUID will be generated.
             description: The description of the agent. (optional)
             instructions: The instructions for the agent. (optional)
-            history_reducer: The history reducer for the agent. (optional)
             arguments: The kernel arguments for the agent. (optional) Invoke method arguments take precedence over
                 the arguments provided here.
             prompt_template_config: The prompt template configuration for the agent. (optional)
@@ -81,8 +78,6 @@ def __init__(
             args["id"] = id
         if kernel is not None:
             args["kernel"] = kernel
-        if history_reducer is not None:
-            args["history_reducer"] = history_reducer
         if arguments is not None:
             args["arguments"] = arguments
 
@@ -248,16 +243,11 @@ async def _setup_agent_chat_history(
         self, history: ChatHistory, kernel: "Kernel", arguments: KernelArguments
     ) -> ChatHistory:
         """Setup the agent chat history."""
-        chat = []
-
-        instructions = await self.format_instructions(kernel, arguments)
-
-        if instructions is not None:
-            chat.append(ChatMessageContent(role=AuthorRole.SYSTEM, content=instructions, name=self.name))
-
-        chat.extend(history.messages if history.messages else [])
-
-        return ChatHistory(messages=chat)
+        return (
+            ChatHistory(messages=history.messages)
+            if self.instructions is None
+            else ChatHistory(system_message=self.instructions, messages=history.messages)
+        )
 
     async def _get_chat_completion_service_and_settings(
         self, kernel: "Kernel", arguments: KernelArguments
diff --git a/python/semantic_kernel/agents/group_chat/agent_chat.py b/python/semantic_kernel/agents/group_chat/agent_chat.py
index 294f695cbb1b..e6b4da0e2242 100644
--- a/python/semantic_kernel/agents/group_chat/agent_chat.py
+++ b/python/semantic_kernel/agents/group_chat/agent_chat.py
@@ -3,7 +3,7 @@
 import asyncio
 import logging
 import threading
-from collections.abc import AsyncGenerator, AsyncIterable
+from collections.abc import AsyncIterable
 
 from pydantic import Field, PrivateAttr
 
@@ -54,13 +54,13 @@ def invoke(self, agent: Agent | None = None, is_joining: bool = True) -> AsyncIt
         """Invoke the agent asynchronously."""
         raise NotImplementedError("Subclasses should implement this method")
 
-    async def get_messages_in_descending_order(self):
+    async def get_messages_in_descending_order(self) -> AsyncIterable[ChatMessageContent]:
         """Get messages in descending order asynchronously."""
         for index in range(len(self.history.messages) - 1, -1, -1):
             yield self.history.messages[index]
             await asyncio.sleep(0)  # Yield control to the event loop
 
-    async def get_chat_messages(self, agent: "Agent | None" = None) -> AsyncGenerator[ChatMessageContent, None]:
+    async def get_chat_messages(self, agent: "Agent | None" = None) -> AsyncIterable[ChatMessageContent]:
         """Get chat messages asynchronously."""
         self.set_activity_or_throw()
 
diff --git a/python/semantic_kernel/agents/group_chat/agent_group_chat.py b/python/semantic_kernel/agents/group_chat/agent_group_chat.py
index 38d0d73af0ab..c4f8acc8f41a 100644
--- a/python/semantic_kernel/agents/group_chat/agent_group_chat.py
+++ b/python/semantic_kernel/agents/group_chat/agent_group_chat.py
@@ -2,7 +2,8 @@
 
 import logging
 from collections.abc import AsyncIterable
-from typing import Any
+from copy import deepcopy
+from typing import TYPE_CHECKING, Any
 
 from pydantic import Field
 
@@ -14,10 +15,14 @@
 from semantic_kernel.agents.strategies.selection.selection_strategy import SelectionStrategy
 from semantic_kernel.agents.strategies.termination.termination_strategy import TerminationStrategy
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.exceptions.agent_exceptions import AgentChatException
 from semantic_kernel.utils.experimental_decorator import experimental_class
 
+if TYPE_CHECKING:
+    from semantic_kernel.contents.chat_history import ChatHistory
+
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -37,6 +42,7 @@ def __init__(
         agents: list[Agent] | None = None,
         termination_strategy: TerminationStrategy | None = None,
         selection_strategy: SelectionStrategy | None = None,
+        chat_history: "ChatHistory | None" = None,
     ) -> None:
         """Initialize a new instance of AgentGroupChat.
 
@@ -44,6 +50,7 @@ def __init__(
             agents: The agents to add to the group chat.
             termination_strategy: The termination strategy to use.
             selection_strategy: The selection strategy
+            chat_history: The chat history.
         """
         agent_ids = {agent.id for agent in agents} if agents else set()
 
@@ -59,6 +66,8 @@ def __init__(
             args["termination_strategy"] = termination_strategy
         if selection_strategy is not None:
             args["selection_strategy"] = selection_strategy
+        if chat_history is not None:
+            args["history"] = chat_history
 
         super().__init__(**args)
 
@@ -199,3 +208,17 @@ async def invoke_stream(
 
             if self.is_complete:
                 break
+
+    async def reduce_history(self) -> bool:
+        """Perform the reduction on the provided history, returning True if reduction occurred."""
+        if not isinstance(self.history, ChatHistoryReducer):
+            return False
+
+        reducer = await self.history.reduce()
+        if reducer is not None:
+            reduced_history = deepcopy(reducer.messages)
+            await self.reset()
+            await self.add_chat_messages(reduced_history)
+            return True
+
+        return False
diff --git a/python/semantic_kernel/agents/strategies/__init__.py b/python/semantic_kernel/agents/strategies/__init__.py
index 836604a9f632..9a0307489103 100644
--- a/python/semantic_kernel/agents/strategies/__init__.py
+++ b/python/semantic_kernel/agents/strategies/__init__.py
@@ -9,6 +9,7 @@
 from semantic_kernel.agents.strategies.termination.kernel_function_termination_strategy import (
     KernelFunctionTerminationStrategy,
 )
+from semantic_kernel.agents.strategies.termination.termination_strategy import TerminationStrategy
 
 __all__ = [
     "AggregatorTerminationStrategy",
@@ -16,4 +17,5 @@
     "KernelFunctionSelectionStrategy",
     "KernelFunctionTerminationStrategy",
     "SequentialSelectionStrategy",
+    "TerminationStrategy",
 ]
diff --git a/python/semantic_kernel/contents/__init__.py b/python/semantic_kernel/contents/__init__.py
index c326115ccd86..5d70a49c1f93 100644
--- a/python/semantic_kernel/contents/__init__.py
+++ b/python/semantic_kernel/contents/__init__.py
@@ -4,8 +4,10 @@
 from semantic_kernel.contents.audio_content import AudioContent
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.file_reference_content import FileReferenceContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import ChatHistorySummarizationReducer
 from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
 from semantic_kernel.contents.image_content import ImageContent
@@ -22,9 +24,11 @@
     "AudioContent",
     "AuthorRole",
     "ChatHistory",
+    "ChatHistoryReducer",
     "ChatHistorySummarizationReducer",
     "ChatHistoryTruncationReducer",
     "ChatMessageContent",
+    "FileReferenceContent",
     "FinishReason",
     "FunctionCallContent",
     "FunctionResultContent",
diff --git a/python/semantic_kernel/contents/chat_history.py b/python/semantic_kernel/contents/chat_history.py
index 5013aae0e073..e38172db02fa 100644
--- a/python/semantic_kernel/contents/chat_history.py
+++ b/python/semantic_kernel/contents/chat_history.py
@@ -1,14 +1,15 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import json
 import logging
-from collections.abc import Generator
+from collections.abc import Generator, Iterable
 from functools import singledispatchmethod
 from html import unescape
-from typing import Any
+from typing import Any, TypeVar
 from xml.etree.ElementTree import Element, tostring  # nosec
 
 from defusedxml.ElementTree import XML, ParseError
-from pydantic import field_validator
+from pydantic import Field, field_validator, model_validator
 
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.const import CHAT_HISTORY_TAG, CHAT_MESSAGE_CONTENT_TAG
@@ -19,56 +20,37 @@
 
 logger = logging.getLogger(__name__)
 
+_T = TypeVar("_T", bound="ChatHistory")
+
 
 class ChatHistory(KernelBaseModel):
     """This class holds the history of chat messages from a chat conversation.
 
-    Note: the constructor takes a system_message parameter, which is not part
-    of the class definition. This is to allow the system_message to be passed in
-    as a keyword argument, but not be part of the class definition.
+    Note: the system_message is added to the messages as a ChatMessageContent instance with role=AuthorRole.SYSTEM,
+    but updating it will not update the messages list.
 
-    Attributes:
-        messages (List[ChatMessageContent]): The list of chat messages in the history.
+    Args:
+        messages: The messages to add to the chat history.
+        system_message: A system message to add to the chat history, optional.
+            if passed, it is added to the messages
+            as a ChatMessageContent instance with role=AuthorRole.SYSTEM
+            before any other messages.
     """
 
-    messages: list[ChatMessageContent]
-
-    def __init__(self, **data: Any):
-        """Initializes a new instance of the ChatHistory class.
-
-        Optionally incorporating a message and/or a system message at the beginning of the chat history.
-
-        This constructor allows for flexible initialization with chat messages and an optional messages or a
-        system message. If both 'messages' (a list of ChatMessageContent instances) and 'system_message' are
-        provided, the 'system_message' is prepended to the list of messages, ensuring it appears as the first
-        message in the history. If only 'system_message' is provided without any 'messages', the chat history is
-        initialized with the 'system_message' as its first item. If 'messages' are provided without a
-        'system_message', the chat history is initialized with the provided messages as is.
-
-        Note: The 'system_message' is not retained as part of the class's attributes; it's used during
-        initialization and then discarded. The rest of the keyword arguments are passed to the superclass
-        constructor and handled according to the Pydantic model's behavior.
-
-        Args:
-            **data: Arbitrary keyword arguments.
-                The constructor looks for two optional keys:
-                - 'messages': Optional[List[ChatMessageContent]], a list of chat messages to include in the history.
-                - 'system_message' Optional[str]: An optional string representing a system-generated message to be
-                    included at the start of the chat history.
-
-        """
-        system_message_content = data.pop("system_message", None)
-
-        if system_message_content:
-            system_message = ChatMessageContent(role=AuthorRole.SYSTEM, content=system_message_content)
+    messages: list[ChatMessageContent] = Field(default_factory=list, kw_only=False)
+    system_message: str | None = Field(default=None, kw_only=False, repr=False)
 
+    @model_validator(mode="before")
+    @classmethod
+    def _parse_system_message(cls, data: Any) -> Any:
+        """Parse the system_message and add it to the messages."""
+        if isinstance(data, dict) and (system_message := data.pop("system_message", None)):
+            msg = ChatMessageContent(role=AuthorRole.SYSTEM, content=system_message)
             if "messages" in data:
-                data["messages"] = [system_message] + data["messages"]
+                data["messages"] = [msg] + data["messages"]
             else:
-                data["messages"] = [system_message]
-        if "messages" not in data:
-            data["messages"] = []
-        super().__init__(**data)
+                data["messages"] = [msg]
+        return data
 
     @field_validator("messages", mode="before")
     @classmethod
@@ -85,76 +67,107 @@ def _validate_messages(cls, messages: list[ChatMessageContent]) -> list[ChatMess
 
     @singledispatchmethod
     def add_system_message(self, content: str | list[KernelContent], **kwargs) -> None:
-        """Add a system message to the chat history."""
+        """Add a system message to the chat history.
+
+        Args:
+            content: The content of the system message, can be a string or a
+            list of KernelContent instances that are turned into a single ChatMessageContent.
+            **kwargs: Additional keyword arguments.
+        """
         raise NotImplementedError
 
     @add_system_message.register
-    def add_system_message_str(self, content: str, **kwargs: Any) -> None:
+    def _(self, content: str, **kwargs: Any) -> None:
         """Add a system message to the chat history."""
         self.add_message(message=self._prepare_for_add(role=AuthorRole.SYSTEM, content=content, **kwargs))
 
     @add_system_message.register(list)
-    def add_system_message_list(self, content: list[KernelContent], **kwargs: Any) -> None:
+    def _(self, content: list[KernelContent], **kwargs: Any) -> None:
         """Add a system message to the chat history."""
         self.add_message(message=self._prepare_for_add(role=AuthorRole.SYSTEM, items=content, **kwargs))
 
     @singledispatchmethod
     def add_developer_message(self, content: str | list[KernelContent], **kwargs) -> None:
-        """Add a system message to the chat history."""
+        """Add a system message to the chat history.
+
+        Args:
+            content: The content of the developer message, can be a string or a
+            list of KernelContent instances that are turned into a single ChatMessageContent.
+            **kwargs: Additional keyword arguments.
+        """
         raise NotImplementedError
 
     @add_developer_message.register
-    def add_developer_message_str(self, content: str, **kwargs: Any) -> None:
+    def _(self, content: str, **kwargs: Any) -> None:
         """Add a system message to the chat history."""
         self.add_message(message=self._prepare_for_add(role=AuthorRole.DEVELOPER, content=content, **kwargs))
 
     @add_developer_message.register(list)
-    def add_developer_message_list(self, content: list[KernelContent], **kwargs: Any) -> None:
+    def _(self, content: list[KernelContent], **kwargs: Any) -> None:
         """Add a system message to the chat history."""
         self.add_message(message=self._prepare_for_add(role=AuthorRole.DEVELOPER, items=content, **kwargs))
 
     @singledispatchmethod
     def add_user_message(self, content: str | list[KernelContent], **kwargs: Any) -> None:
-        """Add a user message to the chat history."""
+        """Add a user message to the chat history.
+
+        Args:
+            content: The content of the user message, can be a string or a
+            list of KernelContent instances that are turned into a single ChatMessageContent.
+            **kwargs: Additional keyword arguments.
+
+        """
         raise NotImplementedError
 
     @add_user_message.register
-    def add_user_message_str(self, content: str, **kwargs: Any) -> None:
+    def _(self, content: str, **kwargs: Any) -> None:
         """Add a user message to the chat history."""
         self.add_message(message=self._prepare_for_add(role=AuthorRole.USER, content=content, **kwargs))
 
     @add_user_message.register(list)
-    def add_user_message_list(self, content: list[KernelContent], **kwargs: Any) -> None:
+    def _(self, content: list[KernelContent], **kwargs: Any) -> None:
         """Add a user message to the chat history."""
         self.add_message(message=self._prepare_for_add(role=AuthorRole.USER, items=content, **kwargs))
 
     @singledispatchmethod
     def add_assistant_message(self, content: str | list[KernelContent], **kwargs: Any) -> None:
-        """Add an assistant message to the chat history."""
+        """Add an assistant message to the chat history.
+
+        Args:
+            content: The content of the assistant message, can be a string or a
+            list of KernelContent instances that are turned into a single ChatMessageContent.
+            **kwargs: Additional keyword arguments.
+        """
         raise NotImplementedError
 
     @add_assistant_message.register
-    def add_assistant_message_str(self, content: str, **kwargs: Any) -> None:
+    def _(self, content: str, **kwargs: Any) -> None:
         """Add an assistant message to the chat history."""
         self.add_message(message=self._prepare_for_add(role=AuthorRole.ASSISTANT, content=content, **kwargs))
 
     @add_assistant_message.register(list)
-    def add_assistant_message_list(self, content: list[KernelContent], **kwargs: Any) -> None:
+    def _(self, content: list[KernelContent], **kwargs: Any) -> None:
         """Add an assistant message to the chat history."""
         self.add_message(message=self._prepare_for_add(role=AuthorRole.ASSISTANT, items=content, **kwargs))
 
     @singledispatchmethod
     def add_tool_message(self, content: str | list[KernelContent], **kwargs: Any) -> None:
-        """Add a tool message to the chat history."""
+        """Add a tool message to the chat history.
+
+        Args:
+            content: The content of the tool message, can be a string or a
+            list of KernelContent instances that are turned into a single ChatMessageContent.
+            **kwargs: Additional keyword arguments.
+        """
         raise NotImplementedError
 
     @add_tool_message.register
-    def add_tool_message_str(self, content: str, **kwargs: Any) -> None:
+    def _(self, content: str, **kwargs: Any) -> None:
         """Add a tool message to the chat history."""
         self.add_message(message=self._prepare_for_add(role=AuthorRole.TOOL, content=content, **kwargs))
 
     @add_tool_message.register(list)
-    def add_tool_message_list(self, content: list[KernelContent], **kwargs: Any) -> None:
+    def _(self, content: list[KernelContent], **kwargs: Any) -> None:
         """Add a tool message to the chat history."""
         self.add_message(message=self._prepare_for_add(role=AuthorRole.TOOL, items=content, **kwargs))
 
@@ -245,6 +258,31 @@ def __str__(self) -> str:
             chat_history_xml.append(message.to_element())
         return tostring(chat_history_xml, encoding="unicode", short_empty_elements=True)
 
+    def clear(self) -> None:
+        """Clear the chat history."""
+        self.messages.clear()
+
+    def extend(self, messages: Iterable[ChatMessageContent]) -> None:
+        """Extend the chat history with a list of messages.
+
+        Args:
+            messages: The messages to add to the history.
+                Can be a list of ChatMessageContent instances or a ChatHistory itself.
+        """
+        self.messages.extend(messages)
+
+    def replace(self, messages: Iterable[ChatMessageContent]) -> None:
+        """Replace the chat history with a list of messages.
+
+        This calls clear() and then extend(messages=messages).
+
+        Args:
+            messages: The messages to add to the history.
+                Can be a list of ChatMessageContent instances or a ChatHistory itself.
+        """
+        self.clear()
+        self.extend(messages=messages)
+
     def to_prompt(self) -> str:
         """Return a string representation of the history."""
         chat_history_xml = Element(CHAT_HISTORY_TAG)
@@ -264,7 +302,7 @@ def __eq__(self, other: Any) -> bool:
         return self.messages == other.messages
 
     @classmethod
-    def from_rendered_prompt(cls, rendered_prompt: str) -> "ChatHistory":
+    def from_rendered_prompt(cls: type[_T], rendered_prompt: str) -> _T:
         """Create a ChatHistory instance from a rendered prompt.
 
         Args:
@@ -305,12 +343,12 @@ def serialize(self) -> str:
             ValueError: If the ChatHistory instance cannot be serialized to JSON.
         """
         try:
-            return self.model_dump_json(indent=2, exclude_none=True)
+            return self.model_dump_json(exclude_none=True, indent=2)
         except Exception as e:  # pragma: no cover
             raise ContentSerializationError(f"Unable to serialize ChatHistory to JSON: {e}") from e
 
     @classmethod
-    def restore_chat_history(cls, chat_history_json: str) -> "ChatHistory":
+    def restore_chat_history(cls: type[_T], chat_history_json: str) -> _T:
         """Restores a ChatHistory instance from a JSON string.
 
         Args:
@@ -325,7 +363,7 @@ def restore_chat_history(cls, chat_history_json: str) -> "ChatHistory":
                 fails validation.
         """
         try:
-            return ChatHistory.model_validate_json(chat_history_json)
+            return cls(**json.loads(chat_history_json))
         except Exception as e:
             raise ContentInitializationError(f"Invalid JSON format: {e}")
 
diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py
index bc05c705ceda..49ae0fc066f1 100644
--- a/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py
+++ b/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py
@@ -2,6 +2,7 @@
 
 import sys
 from abc import ABC, abstractmethod
+from typing import Any
 
 if sys.version < "3.11":
     from typing_extensions import Self  # pragma: no cover
@@ -11,6 +12,8 @@
 from pydantic import Field
 
 from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.exceptions.content_exceptions import ContentInitializationError
 from semantic_kernel.utils.experimental_decorator import experimental_class
 
 
@@ -19,7 +22,11 @@ class ChatHistoryReducer(ChatHistory, ABC):
     """Defines a contract for reducing chat history."""
 
     target_count: int = Field(..., gt=0, description="Target message count.")
-    threshold_count: int = Field(0, ge=0, description="Threshold count to avoid orphaning messages.")
+    threshold_count: int = Field(default=0, ge=0, description="Threshold count to avoid orphaning messages.")
+    auto_reduce: bool = Field(
+        default=False,
+        description="Whether to automatically reduce the chat history, this happens when using add_message_async.",
+    )
 
     @abstractmethod
     async def reduce(self) -> Self | None:
@@ -29,3 +36,28 @@ async def reduce(self) -> Self | None:
             A possibly shorter list of messages, or None if no change is needed.
         """
         ...
+
+    async def add_message_async(
+        self,
+        message: ChatMessageContent | dict[str, Any],
+        encoding: str | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Add a message to the chat history.
+
+        If auto_reduce is enabled, the history will be reduced after adding the message.
+        """
+        if isinstance(message, ChatMessageContent):
+            self.messages.append(message)
+            if self.auto_reduce:
+                await self.reduce()
+            return
+        if "role" not in message:
+            raise ContentInitializationError(f"Dictionary must contain at least the role. Got: {message}")
+        if encoding:
+            message["encoding"] = encoding
+        if metadata:
+            message["metadata"] = metadata
+        self.messages.append(ChatMessageContent(**message))
+        if self.auto_reduce:
+            await self.reduce()
diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py b/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py
index 6742c0b56816..6701e5f77e48 100644
--- a/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py
+++ b/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py
@@ -96,10 +96,11 @@ def locate_safe_reduction_index(
     message_index = total_count - target_count
 
     # Move backward to avoid cutting function calls / results
+    # also skip over developer/system messages
     while message_index >= offset_count:
-        if not any(
-            isinstance(item, (FunctionCallContent, FunctionResultContent)) for item in history[message_index].items
-        ):
+        if history[message_index].role not in (AuthorRole.DEVELOPER, AuthorRole.SYSTEM):
+            break
+        if not contains_function_call_or_result(history[message_index]):
             break
         message_index -= 1
 
@@ -164,6 +165,11 @@ def extract_range(
             i += 1
             continue
 
+        # skipping system/developer message
+        if msg.role in (AuthorRole.DEVELOPER, AuthorRole.SYSTEM):
+            i += 1
+            continue
+
         # If preserve_pairs is on, and there's a paired index, skip or include them both
         if preserve_pairs and idx in pair_map:
             paired_idx = pair_map[idx]
diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py
index 1feaf1a839ad..143e27b22326 100644
--- a/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py
+++ b/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py
@@ -2,20 +2,20 @@
 
 import logging
 import sys
-from typing import Any
-
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
-from semantic_kernel.utils.experimental_decorator import experimental_class
 
 if sys.version < "3.11":
     from typing_extensions import Self  # pragma: no cover
 else:
     from typing import Self  # type: ignore # pragma: no cover
+if sys.version < "3.12":
+    from typing_extensions import override  # pragma: no cover
+else:
+    from typing import override  # type: ignore # pragma: no cover
 
 from pydantic import Field
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
-from semantic_kernel.const import DEFAULT_SERVICE_NAME
+from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
@@ -27,6 +27,7 @@
     locate_summarization_boundary,
 )
 from semantic_kernel.exceptions.content_exceptions import ChatHistoryReducerException
+from semantic_kernel.utils.experimental_decorator import experimental_class
 
 logger = logging.getLogger(__name__)
 
@@ -49,73 +50,39 @@
 
 @experimental_class
 class ChatHistorySummarizationReducer(ChatHistoryReducer):
-    """A ChatHistory with logic to summarize older messages past a target count."""
+    """A ChatHistory with logic to summarize older messages past a target count.
+
+    This class inherits from ChatHistoryReducer, which in turn inherits from ChatHistory.
+    It can be used anywhere a ChatHistory is expected, while adding summarization capability.
+
+    Args:
+        target_count: The target message count.
+        threshold_count: The threshold count to avoid orphaning messages.
+        auto_reduce: Whether to automatically reduce the chat history, default is False.
+        service: The ChatCompletion service to use for summarization.
+        summarization_instructions: The summarization instructions, optional.
+        use_single_summary: Whether to use a single summary message, default is True.
+        fail_on_error: Raise error if summarization fails, default is True.
+        include_function_content_in_summary: Whether to include function calls/results in the summary, default is False.
+        execution_settings: The execution settings for the summarization prompt, optional.
+
+    """
 
     service: ChatCompletionClientBase
     summarization_instructions: str = Field(
-        default_factory=lambda: DEFAULT_SUMMARIZATION_PROMPT,
+        default=DEFAULT_SUMMARIZATION_PROMPT,
         description="The summarization instructions.",
+        kw_only=True,
     )
-    use_single_summary: bool = Field(True, description="Whether to use a single summary message.")
-    fail_on_error: bool = Field(True, description="Raise error if summarization fails.")
-    service_id: str = Field(
-        default_factory=lambda: DEFAULT_SERVICE_NAME, description="The ID of the chat completion service."
-    )
+    use_single_summary: bool = Field(default=True, description="Whether to use a single summary message.")
+    fail_on_error: bool = Field(default=True, description="Raise error if summarization fails.")
     include_function_content_in_summary: bool = Field(
-        False, description="Whether to include function calls/results in the summary."
+        default=False, description="Whether to include function calls/results in the summary."
     )
     execution_settings: PromptExecutionSettings | None = None
 
-    def __init__(
-        self,
-        service: ChatCompletionClientBase,
-        target_count: int,
-        service_id: str | None = None,
-        threshold_count: int | None = None,
-        summarization_instructions: str | None = None,
-        use_single_summary: bool | None = None,
-        fail_on_error: bool | None = None,
-        include_function_content_in_summary: bool | None = None,
-        execution_settings: PromptExecutionSettings | None = None,
-        **kwargs: Any,
-    ):
-        """Initialize the ChatHistorySummarizationReducer.
-
-        Args:
-            service (ChatCompletionClientBase): The chat completion service.
-            target_count (int): The target number of messages to retain after applying summarization.
-            service_id (str | None): The ID of the chat completion service.
-            threshold_count (int | None): The threshold beyond target_count required to trigger reduction.
-            summarization_instructions (str | None): The summarization instructions.
-            use_single_summary (bool | None): Whether to use a single summary message.
-            fail_on_error (bool | None): Raise error if summarization fails.
-            include_function_content_in_summary (bool | None): Whether to include function calls/results in the summary.
-            execution_settings (PromptExecutionSettings | None): The prompt execution settings.
-            **kwargs (Any): Additional keyword arguments.
-        """
-        args: dict[str, Any] = {
-            "service": service,
-            "target_count": target_count,
-        }
-        if service_id is not None:
-            args["service_id"] = service_id
-        if threshold_count is not None:
-            args["threshold_count"] = threshold_count
-        if summarization_instructions is not None:
-            args["summarization_instructions"] = summarization_instructions
-        if use_single_summary is not None:
-            args["use_single_summary"] = use_single_summary
-        if fail_on_error is not None:
-            args["fail_on_error"] = fail_on_error
-        if include_function_content_in_summary is not None:
-            args["include_function_content_in_summary"] = include_function_content_in_summary
-        if execution_settings is not None:
-            args["execution_settings"] = execution_settings
-
-        super().__init__(**args, **kwargs)
-
+    @override
     async def reduce(self) -> Self | None:
-        """Summarize the older messages past the target message count."""
         history = self.messages
         if len(history) <= self.target_count + (self.threshold_count or 0):
             return None  # No summarization needed
@@ -187,19 +154,15 @@ async def _summarize(self, messages: list[ChatMessageContent]) -> ChatMessageCon
         from semantic_kernel.contents.utils.author_role import AuthorRole
 
         chat_history = ChatHistory(messages=messages)
-
-        role = (
-            getattr(self.execution_settings, "instruction_role", AuthorRole.SYSTEM)
-            if self.execution_settings
-            else AuthorRole.SYSTEM
+        execution_settings = self.execution_settings or self.service.get_prompt_execution_settings_from_settings(
+            PromptExecutionSettings()
         )
-
-        chat_history.add_message(ChatMessageContent(role=role, content=self.summarization_instructions))
-
-        execution_settings = self.execution_settings or self.service.get_prompt_execution_settings_class()(
-            service_id=self.service_id
+        chat_history.add_message(
+            ChatMessageContent(
+                role=getattr(execution_settings, "instruction_role", AuthorRole.SYSTEM),
+                content=self.summarization_instructions,
+            )
         )
-
         return await self.service.get_chat_message_content(chat_history=chat_history, settings=execution_settings)
 
     def __eq__(self, other: object) -> bool:
diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py
index 4faf28876748..81684054cf23 100644
--- a/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py
+++ b/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py
@@ -2,20 +2,23 @@
 
 import logging
 import sys
-from typing import Any
-
-from semantic_kernel.utils.experimental_decorator import experimental_class
 
 if sys.version < "3.11":
     from typing_extensions import Self  # pragma: no cover
 else:
     from typing import Self  # type: ignore # pragma: no cover
+if sys.version < "3.12":
+    from typing_extensions import override  # pragma: no cover
+else:
+    from typing import override  # type: ignore  # pragma: no cover
+
 
 from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import (
     extract_range,
     locate_safe_reduction_index,
 )
+from semantic_kernel.utils.experimental_decorator import experimental_class
 
 logger = logging.getLogger(__name__)
 
@@ -26,23 +29,15 @@ class ChatHistoryTruncationReducer(ChatHistoryReducer):
 
     Because this class inherits from ChatHistoryReducer (which in turn inherits from ChatHistory),
     it can also be used anywhere a ChatHistory is expected, while adding truncation capability.
-    """
 
-    def __init__(self, target_count: int, threshold_count: int | None = None, **kwargs: Any):
-        """Initialize the truncation reducer."""
-        args: dict[str, Any] = {
-            "target_count": target_count,
-        }
-        if threshold_count is not None:
-            args["threshold_count"] = threshold_count
-        super().__init__(**args, **kwargs)
+    Args:
+        target_count: The target message count.
+        threshold_count: The threshold count to avoid orphaning messages.
+        auto_reduce: Whether to automatically reduce the chat history, default is False.
+    """
 
+    @override
     async def reduce(self) -> Self | None:
-        """Truncate the chat history to the target message count, avoiding orphaned calls.
-
-        Returns:
-            The truncated list of messages if truncation occurred, or None otherwise.
-        """
         history = self.messages
         if len(history) <= self.target_count + (self.threshold_count or 0):
             # No need to reduce
diff --git a/python/tests/unit/agents/test_agent.py b/python/tests/unit/agents/test_agent.py
index c65d31de2eeb..edb2772a82d2 100644
--- a/python/tests/unit/agents/test_agent.py
+++ b/python/tests/unit/agents/test_agent.py
@@ -8,9 +8,6 @@
 
 from semantic_kernel.agents import Agent
 from semantic_kernel.agents.channels.agent_channel import AgentChannel
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
-from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 
 
@@ -110,66 +107,17 @@ async def test_agent_hash():
     assert hash(agent1) != hash(agent3)
 
 
-async def test_reduce_history_no_reducer():
-    agent = Agent()
-    history = MockChatHistory(messages=["msg1", "msg2"])
-
-    result = await agent.reduce_history(history)
-
-    assert result is False, "reduce_history should return False if no history_reducer is set"
-    assert history.messages == ["msg1", "msg2"], "History should remain unchanged"
-
-
-async def test_reduce_history_reducer_returns_none():
-    agent = Agent()
-    agent.history_reducer = AsyncMock(spec=ChatHistoryReducer)
-    agent.history_reducer.reduce = AsyncMock(return_value=None)
-
-    history = MockChatHistory(messages=["original1", "original2"])
-    result = await agent.reduce_history(history)
-
-    assert result is False, "reduce_history should return False if reducer returns None"
-    assert history.messages == ["original1", "original2"], "History should remain unchanged"
-
-
-async def test_reduce_history_reducer_returns_messages():
-    agent = Agent()
-    agent.history_reducer = ChatHistoryTruncationReducer(target_count=1)
-    history = MockChatHistory(
-        messages=[
-            ChatMessageContent(role="user", content="original message"),
-            ChatMessageContent(role="assistant", content="assistant message"),
-        ]
-    )
-
-    result = await agent.reduce_history(history)
-
-    assert result is True, "reduce_history should return True if new messages are returned"
-    assert history.messages is not None
-
-
 def test_get_channel_keys_no_channel_type():
     agent = Agent()
     with pytest.raises(NotImplementedError):
         list(agent.get_channel_keys())
 
 
-def test_get_channel_keys_with_channel_and_reducer():
-    agent = MockAgent()
-    reducer = ChatHistoryTruncationReducer(target_count=1)
-    agent.history_reducer = reducer
-
-    keys = list(agent.get_channel_keys())
-    assert len(keys) == 3, "Should return three keys: channel, reducer class name, and reducer hash"
-    assert keys[0] == "MockChannel"
-    assert keys[1] == "ChatHistoryTruncationReducer"
-    assert keys[2] == str(reducer.__hash__), "Should return the string of the reducer's __hash__"
-
-
 def test_merge_arguments_both_none():
     agent = Agent()
     merged = agent.merge_arguments(None)
-    assert merged is None, "Should return None if both agent.arguments and override_args are None"
+    assert isinstance(merged, KernelArguments)
+    assert len(merged) == 0, "If both arguments are None, should return an empty KernelArguments object"
 
 
 def test_merge_arguments_agent_none_override_not_none():
diff --git a/python/tests/unit/contents/test_chat_history_summarization_reducer.py b/python/tests/unit/contents/test_chat_history_summarization_reducer.py
index 35e13c969522..c61d044a9811 100644
--- a/python/tests/unit/contents/test_chat_history_summarization_reducer.py
+++ b/python/tests/unit/contents/test_chat_history_summarization_reducer.py
@@ -5,7 +5,7 @@
 import pytest
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
-from semantic_kernel.const import DEFAULT_SERVICE_NAME
+from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import SUMMARY_METADATA_KEY
 from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import (
@@ -49,7 +49,6 @@ def test_summarization_reducer_init(mock_service):
     reducer = ChatHistorySummarizationReducer(
         service=mock_service,
         target_count=10,
-        service_id="my_service",
         threshold_count=5,
         summarization_instructions="Custom instructions",
         use_single_summary=False,
@@ -58,7 +57,6 @@ def test_summarization_reducer_init(mock_service):
 
     assert reducer.service == mock_service
     assert reducer.target_count == 10
-    assert reducer.service_id == "my_service"
     assert reducer.threshold_count == 5
     assert reducer.summarization_instructions == "Custom instructions"
     assert reducer.use_single_summary is False
@@ -72,7 +70,6 @@ def test_summarization_reducer_defaults(mock_service):
     assert reducer.summarization_instructions in reducer.summarization_instructions
     assert reducer.use_single_summary is True
     assert reducer.fail_on_error is True
-    assert reducer.service_id == DEFAULT_SERVICE_NAME
 
 
 def test_summarization_reducer_eq_and_hash(mock_service):
@@ -115,6 +112,7 @@ async def test_summarization_reducer_reduce_needed(mock_service):
     # Mock that the service will return a single summary message
     summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="This is a summary.")
     mock_service.get_chat_message_content.return_value = summary_content
+    mock_service.get_prompt_execution_settings_from_settings.return_value = PromptExecutionSettings()
 
     result = await reducer.reduce()
     assert result is not None, "We expect a shortened list with a new summary inserted."
@@ -124,6 +122,33 @@ async def test_summarization_reducer_reduce_needed(mock_service):
     )
 
 
+async def test_summarization_reducer_reduce_needed_auto(mock_service):
+    # Mock that the service will return a single summary message
+    summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="This is a summary.")
+    mock_service.get_chat_message_content.return_value = summary_content
+    mock_service.get_prompt_execution_settings_from_settings.return_value = PromptExecutionSettings()
+
+    messages = [
+        # A summary message (as in the original test)
+        ChatMessageContent(role=AuthorRole.SYSTEM, content="Existing summary", metadata={SUMMARY_METADATA_KEY: True}),
+        # Enough additional messages so total is > 4
+        ChatMessageContent(role=AuthorRole.USER, content="User says hello"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says more"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says more"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"),
+    ]
+
+    reducer = ChatHistorySummarizationReducer(auto_reduce=True, service=mock_service, target_count=3, threshold_count=1)
+
+    for msg in messages:
+        await reducer.add_message_async(msg)
+        assert len(reducer.messages) <= 5, (
+            "We should auto-reduce after each message, we have one summary, and then 4 other messages."
+        )
+
+
 async def test_summarization_reducer_reduce_no_messages_to_summarize(mock_service):
     # If we do use_single_summary=False, the older_range_start is insertion_point
     # In that scenario, if insertion_point == older_range_end => no messages to summarize => return None
@@ -196,6 +221,7 @@ async def test_summarization_reducer_private_summarize(mock_service):
 
     summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Mock Summary")
     mock_service.get_chat_message_content.return_value = summary_content
+    mock_service.get_prompt_execution_settings_from_settings.return_value = PromptExecutionSettings()
 
     actual_summary = await reducer._summarize(chat_messages)
     assert actual_summary is not None, "We should get a summary message back."