-
Notifications
You must be signed in to change notification settings - Fork 24
Support reasoning summary models in AzureOpenAIEvalClient #216
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
bfab9a4
c6ae1c4
43977cf
9516f23
ce02ed2
8409f03
8d826a3
0ae2cbd
422884c
d98842e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,12 +2,14 @@ | |
|
|
||
| import asyncio | ||
| import os | ||
| import traceback | ||
| import warnings | ||
| from typing import Any, Literal | ||
|
|
||
| import torch | ||
| from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI | ||
| from openai.types.create_embedding_response import CreateEmbeddingResponse | ||
| from openai.types.shared_params import Reasoning, ReasoningEffort | ||
| from pydantic import BaseModel | ||
|
|
||
| from langcheck.metrics.eval_clients.eval_response import ( | ||
|
|
@@ -30,6 +32,10 @@ def __init__( | |
| openai_args: dict[str, str] | None = None, | ||
| *, | ||
| use_async: bool = False, | ||
| use_reasoning_summary: bool = False, | ||
|
taniokay marked this conversation as resolved.
|
||
| reasoning_effort: ReasoningEffort = "medium", | ||
| reasoning_summary: Literal["auto", "concise", "detailed"] | ||
| | None = "auto", | ||
| system_prompt: str | None = None, | ||
| extractor: Extractor | None = None, | ||
| ): | ||
|
|
@@ -44,6 +50,15 @@ def __init__( | |
| `client.chat.completions.create` function. | ||
| use_async: If True, the async client will be used. Defaults to | ||
| False. | ||
| use_reasoning_summary: Whether to use reasoning summary. | ||
| NOTE: Please make sure that the model and API version support | ||
| reasoning summary. | ||
| https://platform.openai.com/docs/models | ||
| https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning#api--feature-support | ||
| reasoning_effort: How many reasoning tokens to generate. | ||
| This is only used when `use_reasoning_summary` is True. | ||
| reasoning_summary: The level of detail of the summarizer. | ||
| This is only used when `use_reasoning_summary` is True. | ||
| system_prompt (Optional): The system prompt to use. If not provided, | ||
| no system prompt will be used. | ||
| extractor (Optional): The extractor to use. If not provided, the | ||
|
|
@@ -77,6 +92,13 @@ def __init__( | |
| self._openai_args = openai_args | ||
| self._system_prompt = system_prompt | ||
|
|
||
| self._reasoning_effort: ReasoningEffort = ( | ||
| reasoning_effort if use_reasoning_summary else None | ||
| ) | ||
| self._reasoning_summary: ( | ||
| Literal["auto", "concise", "detailed"] | None | ||
| ) = reasoning_summary if use_reasoning_summary else None | ||
|
|
||
| if extractor is None: | ||
| self._extractor = OpenAIExtractor( | ||
| openai_client=self._client, | ||
|
|
@@ -86,6 +108,41 @@ def __init__( | |
| else: | ||
| self._extractor = extractor | ||
|
|
||
| def _dispatch( | ||
| self, | ||
| messages: list[dict[str, str]], | ||
| seed: int | None = None, | ||
| config: dict[str, str] | None = None, | ||
| ) -> Any: | ||
| """Dispatch the API call to the OpenAI API.""" | ||
| if self._reasoning_summary is None: | ||
| return self._client.chat.completions.create( | ||
| messages=messages, # type: ignore | ||
| seed=seed, | ||
| **config, | ||
| ) | ||
| else: | ||
| # To use reasoning summary, we must use the Responses API | ||
| # instead of Chat Completions API. | ||
| # https://platform.openai.com/docs/guides/reasoning#reasoning-summaries | ||
|
|
||
| include = [] | ||
|
|
||
| reasoning: Reasoning = { | ||
| "effort": self._reasoning_effort, | ||
| "summary": self._reasoning_summary, | ||
| } | ||
|
|
||
| # seed and logprobs are not supported in responses API. | ||
| return self._client.responses.create( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah nice, I was wondering if we set this properly to avoid logging prompts |
||
| input=messages, # type: ignore | ||
| include=include, | ||
| store=False, | ||
| reasoning=reasoning, | ||
| truncation="auto", | ||
| **config, | ||
| ) | ||
|
|
||
| def _call_api( | ||
| self, | ||
| prompts: list[str], | ||
|
|
@@ -100,7 +157,11 @@ def _call_api_with_exception_filter(model_input: dict[str, Any]) -> Any: | |
| if model_input is None: | ||
| return None | ||
| try: | ||
| return self._client.chat.completions.create(**model_input) | ||
| return self._dispatch( | ||
| model_input["messages"], | ||
| model_input["seed"], | ||
| config=config, | ||
| ) | ||
|
taniokay marked this conversation as resolved.
|
||
| except Exception as e: | ||
| return e | ||
|
|
||
|
|
@@ -114,7 +175,6 @@ def _call_api_with_exception_filter(model_input: dict[str, Any]) -> Any: | |
| "messages": system_message | ||
| + [{"role": "user", "content": prompt}], | ||
| "seed": i, | ||
| **config, | ||
| } | ||
| for i, prompt in enumerate(prompts) | ||
| ] | ||
|
|
@@ -124,8 +184,10 @@ def _call_api_with_exception_filter(model_input: dict[str, Any]) -> Any: | |
| async def _call_async_api() -> list[Any]: | ||
| responses = await asyncio.gather( | ||
| *map( | ||
| lambda model_input: self._client.chat.completions.create( | ||
| **model_input | ||
| lambda model_input: self._dispatch( | ||
| model_input["messages"], | ||
| model_input["seed"], | ||
| config=config, | ||
| ), | ||
|
taniokay marked this conversation as resolved.
|
||
| model_inputs, | ||
| ), | ||
|
|
@@ -146,6 +208,7 @@ async def _call_async_api() -> list[Any]: | |
| for i, response in enumerate(responses): | ||
| if not isinstance(response, Exception): | ||
| continue | ||
| traceback.print_exception(response) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Logprobs Misrouted Causing Disabled Output and ErrorsThe |
||
| print( | ||
|
taniokay marked this conversation as resolved.
|
||
| "OpenAI failed to return an assessment corresponding to " | ||
| f"{i}th prompt: {response}" | ||
|
|
@@ -185,11 +248,39 @@ def get_text_responses( | |
| tqdm_description=tqdm_description, | ||
| system_prompt=self._system_prompt, | ||
| ) | ||
| response_texts = [ | ||
| response.choices[0].message.content if response else None | ||
| for response in responses | ||
| ] | ||
|
|
||
| response_texts = [] | ||
| for response in responses: | ||
| if not response: | ||
| response_texts.append(None) | ||
| continue | ||
| # Use the Responses API only when a reasoning summary is required. | ||
| # Otherwise, use the Chat Completions API. | ||
| if self._reasoning_summary is None: | ||
| content = response.choices[0].message.content | ||
| else: | ||
| content = None | ||
| summaries = [] | ||
|
|
||
| for output in response.output: | ||
| if hasattr(output, "summary"): | ||
| if output.summary == []: | ||
| print( | ||
| "Reasoning summary is empty. " | ||
| "This may happen even if model supports reasoning summary." | ||
| ) | ||
| continue | ||
|
|
||
| # Summary can be a list of summaries | ||
| summaries.extend([s.text for s in output.summary]) | ||
| elif hasattr(output, "content"): | ||
| content = output.content[0].text | ||
|
|
||
| if content is not None and summaries: | ||
| summaries_str = "\n\n".join(summaries) | ||
| content += f"\n\n**Reasoning Summary:**\n\n{summaries_str}" | ||
|
|
||
| response_texts.append(content) | ||
|
kennysong marked this conversation as resolved.
taniokay marked this conversation as resolved.
|
||
| # Token usage is not supported in OpenAIEvalClient | ||
| # If you need token usage, please use LiteLLMEvalClient instead. | ||
| return ResponsesWithMetadata(response_texts, None) | ||
|
|
@@ -425,6 +516,7 @@ def _call_api_with_exception_filter( | |
| "OpenAI failed to return an assessment corresponding to " | ||
| f"{i}th prompt: {response}" | ||
| ) | ||
| traceback.print_exception(response) | ||
| responses[i] = None | ||
|
|
||
| assessments = [ | ||
|
|
@@ -454,6 +546,10 @@ def __init__( | |
| openai_args: dict[str, str] | None = None, | ||
| *, | ||
| use_async: bool = False, | ||
| use_reasoning_summary: bool = False, | ||
| reasoning_effort: ReasoningEffort = "medium", | ||
| reasoning_summary: Literal["auto", "concise", "detailed"] | ||
|
taniokay marked this conversation as resolved.
|
||
| | None = "auto", | ||
| system_prompt: str | None = None, | ||
| extractor: Extractor | None = None, | ||
| ): | ||
|
|
@@ -473,6 +569,15 @@ def __init__( | |
| openai_args (Optional): dict of additional args to pass in to the | ||
| `client.chat.completions.create` function. | ||
| use_async (Optional): If True, the async client will be used. | ||
| use_reasoning_summary: Whether to use reasoning summary. | ||
| NOTE: Please make sure that the model and API version support | ||
| reasoning summary. | ||
| https://platform.openai.com/docs/models | ||
| https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning#api--feature-support | ||
| reasoning_effort: How many reasoning tokens to generate. | ||
| This is only used when `use_reasoning_summary` is True. | ||
| reasoning_summary: The level of detail of the summarizer. | ||
| This is only used when `use_reasoning_summary` is True. | ||
| system_prompt (Optional): The system prompt to use. If not provided, | ||
| no system prompt will be used. | ||
| extractor (Optional): The extractor to use. If not provided, the | ||
|
|
@@ -541,6 +646,13 @@ def __init__( | |
| self._openai_args = openai_args or {} | ||
| self._system_prompt = system_prompt | ||
|
|
||
| self._reasoning_effort: ReasoningEffort = ( | ||
| reasoning_effort if use_reasoning_summary else None | ||
| ) | ||
| self._reasoning_summary: ( | ||
| Literal["auto", "concise", "detailed"] | None | ||
| ) = reasoning_summary if use_reasoning_summary else None | ||
|
|
||
| if self._text_model_name is not None: | ||
| self._openai_args["model"] = self._text_model_name | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's also bump the version in this PR!
https://langcheck.readthedocs.io/en/latest/contributing.html#publishing