Add anthropic llm tests (#519)

* Tests for Anthopic LLMs: WIP * Tests for Anthopic LLMs: WIP * Add ANTHROPIC_API_KEY env to core-llm-test.yml * Skip redis test for gemini and anthropic * Update .github/workflows/core-llm-test.yml --------- Co-authored-by: Davor Runje <[email protected]>
ag2ai · Jan 16, 2025 · 6ffce58 · 6ffce58
1 parent 13e4e41
commit 6ffce58
Show file tree

Hide file tree

Showing 13 changed files with 80 additions and 4 deletions.
diff --git a/.github/workflows/core-llm-test.yml b/.github/workflows/core-llm-test.yml
@@ -20,7 +20,7 @@ jobs:
   test:
     strategy:
       matrix:
-        llm: ["openai", "gemini"]
+        llm: ["openai", "gemini", "anthropic"]
         python-version: ["3.9"]
         os: [ubuntu-latest]
       fail-fast: false
@@ -52,7 +52,7 @@ jobs:
           uv pip install --system -e ".[test,redis,interop]"
           python -c "import autogen"
       - name: Install packages for ${{ matrix.llm }}
-        if: matrix.llm == 'gemini'
+        if: matrix.llm != 'openai'
         run: |
           docker --version
           uv pip install --system -e ".[test,redis,interop,${{ matrix.llm }}]"
@@ -61,6 +61,7 @@ jobs:
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
           AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
           AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
           OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}

diff --git a/pyproject.toml b/pyproject.toml
@@ -249,6 +249,7 @@ markers = [
     "all",
     "openai",
     "gemini",
+    "anthropic",
     "redis",
     "docker",
 ]

diff --git a/scripts/test-core-llm.sh b/scripts/test-core-llm.sh
@@ -5,7 +5,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Default mark if none is provided
-DEFAULT_MARK="openai or gemini"
+DEFAULT_MARK="openai or gemini or anthropic"
 
 # Initialize MARK as the default value
 MARK="$DEFAULT_MARK"

diff --git a/scripts/test-skip-llm.sh b/scripts/test-skip-llm.sh
@@ -4,4 +4,4 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-bash scripts/test.sh -m "not (openai or gemini)" "$@"
+bash scripts/test.sh -m "not (openai or gemini or anthropic)" "$@"
diff --git a/test/agentchat/test_agent_logging.py b/test/agentchat/test_agent_logging.py
@@ -174,6 +174,11 @@ def test_two_agents_logging_gemini(credentials_gemini_pro: Credentials, db_conne
     _test_two_agents_logging(credentials_gemini_pro, db_connection, row_classes=["GeminiClient"])
 
 
+@pytest.mark.anthropic
+def test_two_agents_logging_anthropic(credentials_anthropic_claude_sonnet: Credentials, db_connection) -> None:
+    _test_two_agents_logging(credentials_anthropic_claude_sonnet, db_connection, row_classes=["AnthropicClient"])
+
+
 @pytest.mark.openai
 def test_two_agents_logging(credentials: Credentials, db_connection):
     _test_two_agents_logging(credentials, db_connection)
@@ -255,6 +260,11 @@ def test_groupchat_logging_gemini(credentials_gemini_pro: Credentials, db_connec
     _test_groupchat_logging(credentials_gemini_pro, credentials_gemini_pro, db_connection)
 
 
+@pytest.mark.anthropic
+def test_groupchat_logging_anthropic(credentials_anthropic_claude_sonnet: Credentials, db_connection):
+    _test_groupchat_logging(credentials_anthropic_claude_sonnet, credentials_anthropic_claude_sonnet, db_connection)
+
+
 @pytest.mark.openai
 def test_groupchat_logging(credentials_gpt_4o: Credentials, credentials_gpt_4o_mini: Credentials, db_connection):
     _test_groupchat_logging(credentials_gpt_4o, credentials_gpt_4o_mini, db_connection)
diff --git a/test/agentchat/test_assistant_agent.py b/test/agentchat/test_assistant_agent.py
@@ -60,6 +60,11 @@ def test_ai_user_proxy_agent_gemini(credentials_gemini_pro: Credentials) -> None
     _test_ai_user_proxy_agent(credentials_gemini_pro)
 
 
+@pytest.mark.anthropic
+def test_ai_user_proxy_agent_anthropic(credentials_anthropic_claude_sonnet: Credentials) -> None:
+    _test_ai_user_proxy_agent(credentials_anthropic_claude_sonnet)
+
+
 @pytest.mark.openai
 def test_ai_user_proxy_agent(credentials_gpt_4o_mini: Credentials) -> None:
     _test_ai_user_proxy_agent(credentials_gpt_4o_mini)

diff --git a/test/agentchat/test_async.py b/test/agentchat/test_async.py
@@ -100,6 +100,12 @@ async def test_async_groupchat_gemini(credentials_gemini_pro: Credentials):
     await _test_async_groupchat(credentials_gemini_pro)
 
 
+@pytest.mark.anthropic
+@pytest.mark.asyncio
+async def test_async_groupchat_anthropic(credentials_anthropic_claude_sonnet: Credentials):
+    await _test_async_groupchat(credentials_anthropic_claude_sonnet)
+
+
 async def _test_stream(credentials: Credentials):
     config_list = credentials.config_list
     data = asyncio.Future()
@@ -176,3 +182,9 @@ async def test_stream(credentials_gpt_4o_mini: Credentials):
 @pytest.mark.asyncio
 async def test_stream_gemini(credentials_gemini_pro: Credentials):
     await _test_stream(credentials_gemini_pro)
+
+
+@pytest.mark.anthropic
+@pytest.mark.asyncio
+async def test_stream_anthropic(credentials_anthropic_claude_sonnet: Credentials):
+    await _test_stream(credentials_anthropic_claude_sonnet)
diff --git a/test/agentchat/test_async_get_human_input.py b/test/agentchat/test_async_get_human_input.py
@@ -52,6 +52,12 @@ async def test_async_get_human_input_gemini(credentials_gemini_pro: Credentials)
     await _test_async_get_human_input(credentials_gemini_pro)
 
 
+@pytest.mark.anthropic
+@pytest.mark.asyncio
+async def test_async_get_human_input_anthropic(credentials_anthropic_claude_sonnet: Credentials) -> None:
+    await _test_async_get_human_input(credentials_anthropic_claude_sonnet)
+
+
 async def _test_async_max_turn(credentials: Credentials):
     config_list = credentials.config_list
 
@@ -90,3 +96,9 @@ async def test_async_max_turn(credentials_gpt_4o_mini: Credentials):
 @pytest.mark.asyncio
 async def test_async_max_turn_gemini(credentials_gemini_pro: Credentials):
     await _test_async_max_turn(credentials_gemini_pro)
+
+
+@pytest.mark.anthropic
+@pytest.mark.asyncio
+async def test_async_max_turn_anthropic(credentials_anthropic_claude_sonnet: Credentials):
+    await _test_async_max_turn(credentials_anthropic_claude_sonnet)
diff --git a/test/agentchat/test_cache_agent.py b/test/agentchat/test_cache_agent.py
@@ -90,12 +90,20 @@ def test_redis_cache(credentials_gpt_4o_mini: Credentials):
     _test_redis_cache(credentials_gpt_4o_mini)
 
 
+@pytest.mark.skip(reason="Currently not working")
 @pytest.mark.gemini
 @pytest.mark.skipif(skip_tests or skip_redis_tests, reason="redis not installed OR openai not installed")
 def test_redis_cache_gemini(credentials_gemini_pro: Credentials):
     _test_redis_cache(credentials_gemini_pro)
 
 
+@pytest.mark.skip(reason="Currently not working")
+@pytest.mark.anthropic
+@pytest.mark.skipif(skip_tests or skip_redis_tests, reason="redis not installed OR openai not installed")
+def test_redis_cache_anthropic(credentials_anthropic_claude_sonnet: Credentials):
+    _test_redis_cache(credentials_anthropic_claude_sonnet)
+
+
 @pytest.mark.openai
 @pytest.mark.skipif(skip_tests, reason="openai not installed")
 def test_disk_cache(credentials_gpt_4o_mini: Credentials):

diff --git a/test/agentchat/test_chats.py b/test/agentchat/test_chats.py
@@ -552,6 +552,11 @@ def test_chats_w_func_gemini(credentials_gemini_pro: Credentials, tasks_work_dir
     _test_chats_w_func(credentials_gemini_pro, tasks_work_dir)
 
 
+@pytest.mark.anthropic
+def test_chats_w_func_anthropic(credentials_anthropic_claude_sonnet: Credentials, tasks_work_dir: str):
+    _test_chats_w_func(credentials_anthropic_claude_sonnet, tasks_work_dir)
+
+
 @pytest.mark.openai
 def test_udf_message_in_chats(credentials_gpt_4o_mini: Credentials, tasks_work_dir: str) -> None:
     llm_config_40mini = credentials_gpt_4o_mini.llm_config

diff --git a/test/agentchat/test_conversable_agent.py b/test/agentchat/test_conversable_agent.py
@@ -1056,6 +1056,12 @@ async def test_function_registration_e2e_async_gemini(credentials_gemini_pro: Cr
     await _test_function_registration_e2e_async(credentials_gemini_pro)
 
 
+@pytest.mark.anthropic
+@pytest.mark.asyncio
+async def test_function_registration_e2e_async_anthropic(credentials_anthropic_claude_sonnet: Credentials) -> None:
+    await _test_function_registration_e2e_async(credentials_anthropic_claude_sonnet)
+
+
 @pytest.mark.openai
 def test_max_turn(credentials_gpt_4o_mini: Credentials) -> None:
     # create an AssistantAgent instance named "assistant"

diff --git a/test/agentchat/test_dependancy_injection.py b/test/agentchat/test_dependancy_injection.py
@@ -245,3 +245,9 @@ async def test_end2end(self, credentials_gpt_4o_mini, is_async: bool) -> None:
     @pytest.mark.asyncio
     async def test_end2end_gemini(self, credentials_gemini_pro, is_async: bool) -> None:
         self._test_end2end(credentials_gemini_pro, is_async)
+
+    @pytest.mark.anthropic
+    @pytest.mark.parametrize("is_async", [False, True])
+    @pytest.mark.asyncio
+    async def test_end2end_anthropic(self, credentials_anthropic_claude_sonnet, is_async: bool) -> None:
+        self._test_end2end(credentials_anthropic_claude_sonnet, is_async)
diff --git a/test/conftest.py b/test/conftest.py
@@ -189,6 +189,16 @@ def credentials_gemini_pro() -> Credentials:
     )
 
 
+@pytest.fixture
+def credentials_anthropic_claude_sonnet() -> Credentials:
+    return get_llm_credentials(
+        "ANTHROPIC_API_KEY",
+        model="claude-3-sonnet-20240229",
+        api_type="anthropic",
+        filter_dict={"tags": ["anthropic-claude-sonnet"]},
+    )
+
+
 def get_mock_credentials(model: str, temperature: float = 0.6) -> Credentials:
     llm_config = {
         "config_list": [