Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add anthropic llm tests #519

Merged
merged 5 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/core-llm-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
test:
strategy:
matrix:
llm: ["openai", "gemini"]
llm: ["openai", "gemini", "anthropic"]
python-version: ["3.9"]
os: [ubuntu-latest]
fail-fast: false
Expand Down Expand Up @@ -52,7 +52,7 @@ jobs:
uv pip install --system -e ".[test,redis,interop]"
python -c "import autogen"
- name: Install packages for ${{ matrix.llm }}
if: matrix.llm == 'gemini'
if: matrix.llm != 'openai'
run: |
docker --version
uv pip install --system -e ".[test,redis,interop,${{ matrix.llm }}]"
Expand All @@ -61,6 +61,7 @@ jobs:
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ markers = [
"all",
"openai",
"gemini",
"anthropic",
"redis",
"docker",
]
Expand Down
2 changes: 1 addition & 1 deletion scripts/test-core-llm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# SPDX-License-Identifier: Apache-2.0

# Default mark if none is provided
DEFAULT_MARK="openai or gemini"
DEFAULT_MARK="openai or gemini or anthropic"

# Initialize MARK as the default value
MARK="$DEFAULT_MARK"
Expand Down
2 changes: 1 addition & 1 deletion scripts/test-skip-llm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
#
# SPDX-License-Identifier: Apache-2.0

bash scripts/test.sh -m "not (openai or gemini)" "$@"
bash scripts/test.sh -m "not (openai or gemini or anthropic)" "$@"
10 changes: 10 additions & 0 deletions test/agentchat/test_agent_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,11 @@ def test_two_agents_logging_gemini(credentials_gemini_pro: Credentials, db_conne
_test_two_agents_logging(credentials_gemini_pro, db_connection, row_classes=["GeminiClient"])


@pytest.mark.anthropic
def test_two_agents_logging_anthropic(credentials_anthropic_claude_sonnet: Credentials, db_connection) -> None:
_test_two_agents_logging(credentials_anthropic_claude_sonnet, db_connection, row_classes=["AnthropicClient"])


@pytest.mark.openai
def test_two_agents_logging(credentials: Credentials, db_connection):
_test_two_agents_logging(credentials, db_connection)
Expand Down Expand Up @@ -255,6 +260,11 @@ def test_groupchat_logging_gemini(credentials_gemini_pro: Credentials, db_connec
_test_groupchat_logging(credentials_gemini_pro, credentials_gemini_pro, db_connection)


@pytest.mark.anthropic
def test_groupchat_logging_anthropic(credentials_anthropic_claude_sonnet: Credentials, db_connection):
_test_groupchat_logging(credentials_anthropic_claude_sonnet, credentials_anthropic_claude_sonnet, db_connection)


@pytest.mark.openai
def test_groupchat_logging(credentials_gpt_4o: Credentials, credentials_gpt_4o_mini: Credentials, db_connection):
_test_groupchat_logging(credentials_gpt_4o, credentials_gpt_4o_mini, db_connection)
5 changes: 5 additions & 0 deletions test/agentchat/test_assistant_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ def test_ai_user_proxy_agent_gemini(credentials_gemini_pro: Credentials) -> None
_test_ai_user_proxy_agent(credentials_gemini_pro)


@pytest.mark.anthropic
def test_ai_user_proxy_agent_anthropic(credentials_anthropic_claude_sonnet: Credentials) -> None:
_test_ai_user_proxy_agent(credentials_anthropic_claude_sonnet)


@pytest.mark.openai
def test_ai_user_proxy_agent(credentials_gpt_4o_mini: Credentials) -> None:
_test_ai_user_proxy_agent(credentials_gpt_4o_mini)
Expand Down
12 changes: 12 additions & 0 deletions test/agentchat/test_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@ async def test_async_groupchat_gemini(credentials_gemini_pro: Credentials):
await _test_async_groupchat(credentials_gemini_pro)


@pytest.mark.anthropic
@pytest.mark.asyncio
async def test_async_groupchat_anthropic(credentials_anthropic_claude_sonnet: Credentials):
await _test_async_groupchat(credentials_anthropic_claude_sonnet)


async def _test_stream(credentials: Credentials):
config_list = credentials.config_list
data = asyncio.Future()
Expand Down Expand Up @@ -176,3 +182,9 @@ async def test_stream(credentials_gpt_4o_mini: Credentials):
@pytest.mark.asyncio
async def test_stream_gemini(credentials_gemini_pro: Credentials):
await _test_stream(credentials_gemini_pro)


@pytest.mark.anthropic
@pytest.mark.asyncio
async def test_stream_anthropic(credentials_anthropic_claude_sonnet: Credentials):
await _test_stream(credentials_anthropic_claude_sonnet)
12 changes: 12 additions & 0 deletions test/agentchat/test_async_get_human_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ async def test_async_get_human_input_gemini(credentials_gemini_pro: Credentials)
await _test_async_get_human_input(credentials_gemini_pro)


@pytest.mark.anthropic
@pytest.mark.asyncio
async def test_async_get_human_input_anthropic(credentials_anthropic_claude_sonnet: Credentials) -> None:
await _test_async_get_human_input(credentials_anthropic_claude_sonnet)


async def _test_async_max_turn(credentials: Credentials):
config_list = credentials.config_list

Expand Down Expand Up @@ -90,3 +96,9 @@ async def test_async_max_turn(credentials_gpt_4o_mini: Credentials):
@pytest.mark.asyncio
async def test_async_max_turn_gemini(credentials_gemini_pro: Credentials):
await _test_async_max_turn(credentials_gemini_pro)


@pytest.mark.anthropic
@pytest.mark.asyncio
async def test_async_max_turn_anthropic(credentials_anthropic_claude_sonnet: Credentials):
await _test_async_max_turn(credentials_anthropic_claude_sonnet)
8 changes: 8 additions & 0 deletions test/agentchat/test_cache_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,20 @@ def test_redis_cache(credentials_gpt_4o_mini: Credentials):
_test_redis_cache(credentials_gpt_4o_mini)


@pytest.mark.skip(reason="Currently not working")
@pytest.mark.gemini
@pytest.mark.skipif(skip_tests or skip_redis_tests, reason="redis not installed OR openai not installed")
def test_redis_cache_gemini(credentials_gemini_pro: Credentials):
_test_redis_cache(credentials_gemini_pro)


@pytest.mark.skip(reason="Currently not working")
@pytest.mark.anthropic
@pytest.mark.skipif(skip_tests or skip_redis_tests, reason="redis not installed OR openai not installed")
def test_redis_cache_anthropic(credentials_anthropic_claude_sonnet: Credentials):
_test_redis_cache(credentials_anthropic_claude_sonnet)


@pytest.mark.openai
@pytest.mark.skipif(skip_tests, reason="openai not installed")
def test_disk_cache(credentials_gpt_4o_mini: Credentials):
Expand Down
5 changes: 5 additions & 0 deletions test/agentchat/test_chats.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,11 @@ def test_chats_w_func_gemini(credentials_gemini_pro: Credentials, tasks_work_dir
_test_chats_w_func(credentials_gemini_pro, tasks_work_dir)


@pytest.mark.anthropic
def test_chats_w_func_anthropic(credentials_anthropic_claude_sonnet: Credentials, tasks_work_dir: str):
_test_chats_w_func(credentials_anthropic_claude_sonnet, tasks_work_dir)


@pytest.mark.openai
def test_udf_message_in_chats(credentials_gpt_4o_mini: Credentials, tasks_work_dir: str) -> None:
llm_config_40mini = credentials_gpt_4o_mini.llm_config
Expand Down
6 changes: 6 additions & 0 deletions test/agentchat/test_conversable_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -1056,6 +1056,12 @@ async def test_function_registration_e2e_async_gemini(credentials_gemini_pro: Cr
await _test_function_registration_e2e_async(credentials_gemini_pro)


@pytest.mark.anthropic
@pytest.mark.asyncio
async def test_function_registration_e2e_async_anthropic(credentials_anthropic_claude_sonnet: Credentials) -> None:
await _test_function_registration_e2e_async(credentials_anthropic_claude_sonnet)


@pytest.mark.openai
def test_max_turn(credentials_gpt_4o_mini: Credentials) -> None:
# create an AssistantAgent instance named "assistant"
Expand Down
6 changes: 6 additions & 0 deletions test/agentchat/test_dependancy_injection.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,9 @@ async def test_end2end(self, credentials_gpt_4o_mini, is_async: bool) -> None:
@pytest.mark.asyncio
async def test_end2end_gemini(self, credentials_gemini_pro, is_async: bool) -> None:
self._test_end2end(credentials_gemini_pro, is_async)

@pytest.mark.anthropic
@pytest.mark.parametrize("is_async", [False, True])
@pytest.mark.asyncio
async def test_end2end_anthropic(self, credentials_anthropic_claude_sonnet, is_async: bool) -> None:
self._test_end2end(credentials_anthropic_claude_sonnet, is_async)
10 changes: 10 additions & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,16 @@ def credentials_gemini_pro() -> Credentials:
)


@pytest.fixture
def credentials_anthropic_claude_sonnet() -> Credentials:
return get_llm_credentials(
"ANTHROPIC_API_KEY",
model="claude-3-sonnet-20240229",
api_type="anthropic",
filter_dict={"tags": ["anthropic-claude-sonnet"]},
)


def get_mock_credentials(model: str, temperature: float = 0.6) -> Credentials:
llm_config = {
"config_list": [
Expand Down
Loading