-
Notifications
You must be signed in to change notification settings - Fork 43
Description
Hi
When running the following cell :
faqs = doc2cache.invoke({"doc": sample_doc})
Results in the following output:
16:17:24 openai._base_client INFO Retrying request to /completions in 0.433632 seconds
16:17:25 openai._base_client INFO Retrying request to /completions in 0.830750 seconds
RemoteProtocolError Traceback (most recent call last)
File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:101, in map_httpcore_exceptions()
100 try:
--> 101 yield
102 except Exception as exc:
File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:250, in HTTPTransport.handle_request(self, request)
249 with map_httpcore_exceptions():
--> 250 resp = self._pool.handle_request(req)
252 assert isinstance(resp.stream, typing.Iterable)
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\connection_pool.py:256, in ConnectionPool.handle_request(self, request)
255 self._close_connections(closing)
--> 256 raise exc from None
258 # Return the response. Note that in this case we still have to manage
259 # the point at which the response is closed.
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\connection_pool.py:236, in ConnectionPool.handle_request(self, request)
234 try:
235 # Send the request on the assigned connection.
--> 236 response = connection.handle_request(
237 pool_request.request
238 )
239 except ConnectionNotAvailable:
240 # In some cases a connection may initially be available to
241 # handle a request, but then become unavailable.
242 #
243 # In this case we clear the connection and try again.
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\connection.py:103, in HTTPConnection.handle_request(self, request)
101 raise exc
--> 103 return self._connection.handle_request(request)
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:136, in HTTP11Connection.handle_request(self, request)
135 self._response_closed()
--> 136 raise exc
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:106, in HTTP11Connection.handle_request(self, request)
97 with Trace(
98 "receive_response_headers", logger, request, kwargs
99 ) as trace:
100 (
101 http_version,
102 status,
103 reason_phrase,
104 headers,
105 trailing_data,
--> 106 ) = self._receive_response_headers(**kwargs)
107 trace.return_value = (
108 http_version,
109 status,
110 reason_phrase,
111 headers,
112 )
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:177, in HTTP11Connection._receive_response_headers(self, request)
176 while True:
--> 177 event = self._receive_event(timeout=timeout)
178 if isinstance(event, h11.Response):
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:231, in HTTP11Connection._receive_event(self, timeout)
230 msg = "Server disconnected without sending a response."
--> 231 raise RemoteProtocolError(msg)
233 self._h11_state.receive_data(data)
RemoteProtocolError: Server disconnected without sending a response.
The above exception was the direct cause of the following exception:
RemoteProtocolError Traceback (most recent call last)
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:989, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
988 try:
--> 989 response = self._client.send(
990 request,
991 stream=stream or self._should_stream_response_body(request=request),
992 **kwargs,
993 )
994 except httpx.TimeoutException as err:
File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:914, in Client.send(self, request, stream, auth, follow_redirects)
912 auth = self._build_request_auth(request, auth)
--> 914 response = self._send_handling_auth(
915 request,
916 auth=auth,
917 follow_redirects=follow_redirects,
918 history=[],
919 )
920 try:
File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:942, in Client._send_handling_auth(self, request, auth, follow_redirects, history)
941 while True:
--> 942 response = self._send_handling_redirects(
943 request,
944 follow_redirects=follow_redirects,
945 history=history,
946 )
947 try:
File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:979, in Client._send_handling_redirects(self, request, follow_redirects, history)
977 hook(request)
--> 979 response = self._send_single_request(request)
980 try:
File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:1014, in Client._send_single_request(self, request)
1013 with request_context(request=request):
-> 1014 response = transport.handle_request(request)
1016 assert isinstance(response.stream, SyncByteStream)
File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:249, in HTTPTransport.handle_request(self, request)
237 req = httpcore.Request(
238 method=request.method,
239 url=httpcore.URL(
(...) 247 extensions=request.extensions,
248 )
--> 249 with map_httpcore_exceptions():
250 resp = self._pool.handle_request(req)
File C:\Program Files\Python313\Lib\contextlib.py:162, in _GeneratorContextManager.exit(self, typ, value, traceback)
161 try:
--> 162 self.gen.throw(value)
163 except StopIteration as exc:
164 # Suppress StopIteration unless it's the same exception that
165 # was passed to throw(). This prevents a StopIteration
166 # raised inside the "with" statement from being suppressed.
File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:118, in map_httpcore_exceptions()
117 message = str(exc)
--> 118 raise mapped_exc(message) from exc
RemoteProtocolError: Server disconnected without sending a response.
The above exception was the direct cause of the following exception:
APIConnectionError Traceback (most recent call last)
Cell In[11], line 1
----> 1 faqs = doc2cache.invoke({"doc": sample_doc})
File ~\litellm-qa-env\Lib\site-packages\langchain_core\runnables\base.py:3034, in RunnableSequence.invoke(self, input, config, **kwargs)
3032 input = context.run(step.invoke, input, config, **kwargs)
3033 else:
-> 3034 input = context.run(step.invoke, input, config)
3035 # finish the root run
3036 except BaseException as e:
File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:387, in BaseLLM.invoke(self, input, config, stop, **kwargs)
376 @OverRide
377 def invoke(
378 self,
(...) 383 **kwargs: Any,
384 ) -> str:
385 config = ensure_config(config)
386 return (
--> 387 self.generate_prompt(
388 [self._convert_input(input)],
389 stop=stop,
390 callbacks=config.get("callbacks"),
391 tags=config.get("tags"),
392 metadata=config.get("metadata"),
393 run_name=config.get("run_name"),
394 run_id=config.pop("run_id", None),
395 **kwargs,
396 )
397 .generations[0][0]
398 .text
399 )
File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:764, in BaseLLM.generate_prompt(self, prompts, stop, callbacks, **kwargs)
755 @OverRide
756 def generate_prompt(
757 self,
(...) 761 **kwargs: Any,
762 ) -> LLMResult:
763 prompt_strings = [p.to_string() for p in prompts]
--> 764 return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)
File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:971, in BaseLLM.generate(self, prompts, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)
956 if (self.cache is None and get_llm_cache() is None) or self.cache is False:
957 run_managers = [
958 callback_manager.on_llm_start(
959 self._serialized,
(...) 969 )
970 ]
--> 971 return self._generate_helper(
972 prompts,
973 stop,
974 run_managers,
975 new_arg_supported=bool(new_arg_supported),
976 **kwargs,
977 )
978 if len(missing_prompts) > 0:
979 run_managers = [
980 callback_managers[idx].on_llm_start(
981 self._serialized,
(...) 988 for idx in missing_prompt_idxs
989 ]
File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:790, in BaseLLM._generate_helper(self, prompts, stop, run_managers, new_arg_supported, **kwargs)
779 def _generate_helper(
780 self,
781 prompts: list[str],
(...) 786 **kwargs: Any,
787 ) -> LLMResult:
788 try:
789 output = (
--> 790 self._generate(
791 prompts,
792 stop=stop,
793 # TODO: support multiple run managers
794 run_manager=run_managers[0] if run_managers else None,
795 **kwargs,
796 )
797 if new_arg_supported
798 else self._generate(prompts, stop=stop)
799 )
800 except BaseException as e:
801 for run_manager in run_managers:
File ~\litellm-qa-env\Lib\site-packages\langchain_community\llms\openai.py:463, in BaseOpenAI._generate(self, prompts, stop, run_manager, **kwargs)
451 choices.append(
452 {
453 "text": generation.text,
(...) 460 }
461 )
462 else:
--> 463 response = completion_with_retry(
464 self, prompt=_prompts, run_manager=run_manager, **params
465 )
466 if not isinstance(response, dict):
467 # V1 client returns the response in an PyDantic object instead of
468 # dict. For the transition period, we deep convert it to dict.
469 response = response.dict()
File ~\litellm-qa-env\Lib\site-packages\langchain_community\llms\openai.py:121, in completion_with_retry(llm, run_manager, **kwargs)
119 """Use tenacity to retry the completion call."""
120 if is_openai_v1():
--> 121 return llm.client.create(**kwargs)
123 retry_decorator = _create_retry_decorator(llm, run_manager=run_manager)
125 @retry_decorator
126 def _completion_with_retry(**kwargs: Any) -> Any:
File ~\litellm-qa-env\Lib\site-packages\openai_utils_utils.py:279, in required_args..inner..wrapper(*args, **kwargs)
277 msg = f"Missing required argument: {quote(missing[0])}"
278 raise TypeError(msg)
--> 279 return func(*args, **kwargs)
File ~\litellm-qa-env\Lib\site-packages\openai\resources\completions.py:545, in Completions.create(self, model, prompt, best_of, echo, frequency_penalty, logit_bias, logprobs, max_tokens, n, presence_penalty, seed, stop, stream, stream_options, suffix, temperature, top_p, user, extra_headers, extra_query, extra_body, timeout)
516 @required_args(["model", "prompt"], ["model", "prompt", "stream"])
517 def create(
518 self,
(...) 543 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
544 ) -> Completion | Stream[Completion]:
--> 545 return self._post(
546 "/completions",
547 body=maybe_transform(
548 {
549 "model": model,
550 "prompt": prompt,
551 "best_of": best_of,
552 "echo": echo,
553 "frequency_penalty": frequency_penalty,
554 "logit_bias": logit_bias,
555 "logprobs": logprobs,
556 "max_tokens": max_tokens,
557 "n": n,
558 "presence_penalty": presence_penalty,
559 "seed": seed,
560 "stop": stop,
561 "stream": stream,
562 "stream_options": stream_options,
563 "suffix": suffix,
564 "temperature": temperature,
565 "top_p": top_p,
566 "user": user,
567 },
568 completion_create_params.CompletionCreateParamsStreaming
569 if stream
570 else completion_create_params.CompletionCreateParamsNonStreaming,
571 ),
572 options=make_request_options(
573 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
574 ),
575 cast_to=Completion,
576 stream=stream or False,
577 stream_cls=Stream[Completion],
578 )
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1276, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
1262 def post(
1263 self,
1264 path: str,
(...) 1271 stream_cls: type[_StreamT] | None = None,
1272 ) -> ResponseT | _StreamT:
1273 opts = FinalRequestOptions.construct(
1274 method="post", url=path, json_data=body, files=to_httpx_files(files), **options
1275 )
-> 1276 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:949, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
946 else:
947 retries_taken = 0
--> 949 return self._request(
950 cast_to=cast_to,
951 options=options,
952 stream=stream,
953 stream_cls=stream_cls,
954 retries_taken=retries_taken,
955 )
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1013, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1010 log.debug("Encountered Exception", exc_info=True)
1012 if remaining_retries > 0:
-> 1013 return self._retry_request(
1014 input_options,
1015 cast_to,
1016 retries_taken=retries_taken,
1017 stream=stream,
1018 stream_cls=stream_cls,
1019 response_headers=None,
1020 )
1022 log.debug("Raising connection error")
1023 raise APIConnectionError(request=request) from err
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1091, in SyncAPIClient._retry_request(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)
1087 # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
1088 # different thread if necessary.
1089 time.sleep(timeout)
-> 1091 return self._request(
1092 options=options,
1093 cast_to=cast_to,
1094 retries_taken=retries_taken + 1,
1095 stream=stream,
1096 stream_cls=stream_cls,
1097 )
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1013, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1010 log.debug("Encountered Exception", exc_info=True)
1012 if remaining_retries > 0:
-> 1013 return self._retry_request(
1014 input_options,
1015 cast_to,
1016 retries_taken=retries_taken,
1017 stream=stream,
1018 stream_cls=stream_cls,
1019 response_headers=None,
1020 )
1022 log.debug("Raising connection error")
1023 raise APIConnectionError(request=request) from err
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1091, in SyncAPIClient._retry_request(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)
1087 # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
1088 # different thread if necessary.
1089 time.sleep(timeout)
-> 1091 return self._request(
1092 options=options,
1093 cast_to=cast_to,
1094 retries_taken=retries_taken + 1,
1095 stream=stream,
1096 stream_cls=stream_cls,
1097 )
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1023, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1013 return self._retry_request(
1014 input_options,
1015 cast_to,
(...) 1019 response_headers=None,
1020 )
1022 log.debug("Raising connection error")
-> 1023 raise APIConnectionError(request=request) from err
1025 log.debug(
1026 'HTTP Response: %s %s "%i %s" %s',
1027 request.method,
(...) 1031 response.headers,
1032 )
1033 log.debug("request_id: %s", response.headers.get("x-request-id"))
APIConnectionError: Connection error.