From 885d865475550d241b7fed7bf9db272bcccefc41 Mon Sep 17 00:00:00 2001 From: Meghana Mandava Date: Sat, 2 Aug 2025 17:41:14 -0500 Subject: [PATCH 1/2] Fixes for issue #351: Updated concatenate handler --- extract_thinker/concatenation_handler.py | 34 ++++++++++++++++++ extract_thinker/llm.py | 46 ++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/extract_thinker/concatenation_handler.py b/extract_thinker/concatenation_handler.py index f8f51ae..f4b12cf 100644 --- a/extract_thinker/concatenation_handler.py +++ b/extract_thinker/concatenation_handler.py @@ -58,6 +58,40 @@ def handle(self, content: Any, response_model: type[BaseModel], vision: bool = F raise ValueError(f"Maximum retries reached: {str(e)}") retry_count += 1 messages = self._build_continuation_messages(messages, response) + + def handle1(self, content: Any, response_model: type[BaseModel], vision: bool = False, extra_content: Optional[str] = None) -> Any: + self.json_parts = [] + messages = self._build_messages(content, vision, response_model) + + if extra_content: + self._add_extra_content(messages, extra_content) + + retry_count = 0 + max_retries = 3 + while True: + try: + response_obj = self.llm.raw_completion_complete(messages) + response = response_obj.message.content + finish_reason = response_obj.finish_reason + + self.json_parts.append(response) + + if self._is_finish(response_obj): # ← ✅ check if response is "finished" + result = self._process_json_parts(response_model) + return result + + # If not finished, continue building messages and retry + retry_count += 1 + if retry_count >= max_retries: + raise ValueError("Maximum retries reached with incomplete response") + messages = self._build_continuation_messages(messages, response) + + except ValueError as e: + if retry_count >= max_retries: + raise ValueError(f"Maximum retries reached: {str(e)}") + retry_count += 1 + messages = self._build_continuation_messages(messages, response) + def _process_json_parts(self, response_model: type[BaseModel]) -> Any: """Process collected JSON parts into a complete response.""" diff --git a/extract_thinker/llm.py b/extract_thinker/llm.py index 95c9971..828916a 100644 --- a/extract_thinker/llm.py +++ b/extract_thinker/llm.py @@ -322,6 +322,52 @@ def raw_completion(self, messages: List[Dict[str, str]]) -> str: raw_response = litellm.completion(**params) return raw_response.choices[0].message.content + + def raw_completion_complete(self, messages: List[Dict[str, str]]) -> str: + """Make raw completion request without response model.""" + if self.backend == LLMEngine.PYDANTIC_AI: + # Combine messages into a single prompt + combined_prompt = " ".join([m["content"] for m in messages]) + try: + result = asyncio.run( + self.agent.run( + combined_prompt, + result_type=str + ) + ) + return result.data + except Exception as e: + raise ValueError(f"Failed to extract from source: {str(e)}") + + max_tokens = self.DEFAULT_OUTPUT_TOKENS + if self.token_limit is not None: + max_tokens = self.token_limit + elif self.is_thinking: + max_tokens = self.thinking_token_limit + + params = { + "model": self.model, + "messages": messages, + "max_completion_tokens": max_tokens, + } + + if self.is_thinking: + if litellm.supports_reasoning(self.model): + # Add thinking parameter for supported models + thinking_param = { + "type": "enabled", + "budget_tokens": self.thinking_budget + } + params["thinking"] = thinking_param + else: + print(f"Warning: Model {self.model} doesn't support thinking parameter, proceeding without it.") + + if self.router: + raw_response = self.router.completion(**params) + else: + raw_response = litellm.completion(**params) + + return raw_response.choices[0] def set_timeout(self, timeout_ms: int) -> None: """Set the timeout value for LLM requests in milliseconds.""" From 0354b4d05da39d09c84003a4ce2040b6ad5aee39 Mon Sep 17 00:00:00 2001 From: Meghana Mandava Date: Sat, 2 Aug 2025 18:03:07 -0500 Subject: [PATCH 2/2] Fixes for issue #351: Updated concatenate handle function --- extract_thinker/concatenation_handler.py | 38 ++---------------------- 1 file changed, 3 insertions(+), 35 deletions(-) diff --git a/extract_thinker/concatenation_handler.py b/extract_thinker/concatenation_handler.py index f4b12cf..4d74fb8 100644 --- a/extract_thinker/concatenation_handler.py +++ b/extract_thinker/concatenation_handler.py @@ -27,39 +27,8 @@ def _is_valid_json_continuation(self, response: str) -> bool: return has_json_markers - def handle(self, content: Any, response_model: type[BaseModel], vision: bool = False, extra_content: Optional[str] = None) -> Any: - self.json_parts = [] - messages = self._build_messages(content, vision, response_model) - - if extra_content: - self._add_extra_content(messages, extra_content) - - retry_count = 0 - max_retries = 3 - while True: - try: - response = self.llm.raw_completion(messages) - - # Validate if it's a proper JSON continuation - if not self._is_valid_json_continuation(response): - retry_count += 1 - if retry_count >= max_retries: - raise ValueError("Maximum retries reached with invalid JSON continuations") - continue - - self.json_parts.append(response) - - # Try to process and validate the JSON - result = self._process_json_parts(response_model) - return result - - except ValueError as e: - if retry_count >= max_retries: - raise ValueError(f"Maximum retries reached: {str(e)}") - retry_count += 1 - messages = self._build_continuation_messages(messages, response) - def handle1(self, content: Any, response_model: type[BaseModel], vision: bool = False, extra_content: Optional[str] = None) -> Any: + def handle(self, content: Any, response_model: type[BaseModel], vision: bool = False, extra_content: Optional[str] = None) -> Any: self.json_parts = [] messages = self._build_messages(content, vision, response_model) @@ -76,11 +45,10 @@ def handle1(self, content: Any, response_model: type[BaseModel], vision: bool = self.json_parts.append(response) - if self._is_finish(response_obj): # ← ✅ check if response is "finished" + if self._is_finish(response_obj): result = self._process_json_parts(response_model) return result - # If not finished, continue building messages and retry retry_count += 1 if retry_count >= max_retries: raise ValueError("Maximum retries reached with incomplete response") @@ -92,7 +60,7 @@ def handle1(self, content: Any, response_model: type[BaseModel], vision: bool = retry_count += 1 messages = self._build_continuation_messages(messages, response) - + def _process_json_parts(self, response_model: type[BaseModel]) -> Any: """Process collected JSON parts into a complete response.""" if not self.json_parts: