Skip to content

redis-ai-resources/python-recipes/semantic-cache/01_doc2cache_llama3_1.ipynb ERROR #2 #87

@amscosta

Description

@amscosta

Hi

When running the following cell :

faqs = doc2cache.invoke({"doc": sample_doc})

Results in the following output:
16:17:24 openai._base_client INFO Retrying request to /completions in 0.433632 seconds
16:17:25 openai._base_client INFO Retrying request to /completions in 0.830750 seconds


RemoteProtocolError Traceback (most recent call last)
File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:101, in map_httpcore_exceptions()
100 try:
--> 101 yield
102 except Exception as exc:

File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:250, in HTTPTransport.handle_request(self, request)
249 with map_httpcore_exceptions():
--> 250 resp = self._pool.handle_request(req)
252 assert isinstance(resp.stream, typing.Iterable)

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\connection_pool.py:256, in ConnectionPool.handle_request(self, request)
255 self._close_connections(closing)
--> 256 raise exc from None
258 # Return the response. Note that in this case we still have to manage
259 # the point at which the response is closed.

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\connection_pool.py:236, in ConnectionPool.handle_request(self, request)
234 try:
235 # Send the request on the assigned connection.
--> 236 response = connection.handle_request(
237 pool_request.request
238 )
239 except ConnectionNotAvailable:
240 # In some cases a connection may initially be available to
241 # handle a request, but then become unavailable.
242 #
243 # In this case we clear the connection and try again.

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\connection.py:103, in HTTPConnection.handle_request(self, request)
101 raise exc
--> 103 return self._connection.handle_request(request)

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:136, in HTTP11Connection.handle_request(self, request)
135 self._response_closed()
--> 136 raise exc

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:106, in HTTP11Connection.handle_request(self, request)
97 with Trace(
98 "receive_response_headers", logger, request, kwargs
99 ) as trace:
100 (
101 http_version,
102 status,
103 reason_phrase,
104 headers,
105 trailing_data,
--> 106 ) = self._receive_response_headers(**kwargs)
107 trace.return_value = (
108 http_version,
109 status,
110 reason_phrase,
111 headers,
112 )

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:177, in HTTP11Connection._receive_response_headers(self, request)
176 while True:
--> 177 event = self._receive_event(timeout=timeout)
178 if isinstance(event, h11.Response):

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:231, in HTTP11Connection._receive_event(self, timeout)
230 msg = "Server disconnected without sending a response."
--> 231 raise RemoteProtocolError(msg)
233 self._h11_state.receive_data(data)

RemoteProtocolError: Server disconnected without sending a response.

The above exception was the direct cause of the following exception:

RemoteProtocolError Traceback (most recent call last)
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:989, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
988 try:
--> 989 response = self._client.send(
990 request,
991 stream=stream or self._should_stream_response_body(request=request),
992 **kwargs,
993 )
994 except httpx.TimeoutException as err:

File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:914, in Client.send(self, request, stream, auth, follow_redirects)
912 auth = self._build_request_auth(request, auth)
--> 914 response = self._send_handling_auth(
915 request,
916 auth=auth,
917 follow_redirects=follow_redirects,
918 history=[],
919 )
920 try:

File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:942, in Client._send_handling_auth(self, request, auth, follow_redirects, history)
941 while True:
--> 942 response = self._send_handling_redirects(
943 request,
944 follow_redirects=follow_redirects,
945 history=history,
946 )
947 try:

File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:979, in Client._send_handling_redirects(self, request, follow_redirects, history)
977 hook(request)
--> 979 response = self._send_single_request(request)
980 try:

File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:1014, in Client._send_single_request(self, request)
1013 with request_context(request=request):
-> 1014 response = transport.handle_request(request)
1016 assert isinstance(response.stream, SyncByteStream)

File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:249, in HTTPTransport.handle_request(self, request)
237 req = httpcore.Request(
238 method=request.method,
239 url=httpcore.URL(
(...) 247 extensions=request.extensions,
248 )
--> 249 with map_httpcore_exceptions():
250 resp = self._pool.handle_request(req)

File C:\Program Files\Python313\Lib\contextlib.py:162, in _GeneratorContextManager.exit(self, typ, value, traceback)
161 try:
--> 162 self.gen.throw(value)
163 except StopIteration as exc:
164 # Suppress StopIteration unless it's the same exception that
165 # was passed to throw(). This prevents a StopIteration
166 # raised inside the "with" statement from being suppressed.

File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:118, in map_httpcore_exceptions()
117 message = str(exc)
--> 118 raise mapped_exc(message) from exc

RemoteProtocolError: Server disconnected without sending a response.

The above exception was the direct cause of the following exception:

APIConnectionError Traceback (most recent call last)
Cell In[11], line 1
----> 1 faqs = doc2cache.invoke({"doc": sample_doc})

File ~\litellm-qa-env\Lib\site-packages\langchain_core\runnables\base.py:3034, in RunnableSequence.invoke(self, input, config, **kwargs)
3032 input = context.run(step.invoke, input, config, **kwargs)
3033 else:
-> 3034 input = context.run(step.invoke, input, config)
3035 # finish the root run
3036 except BaseException as e:

File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:387, in BaseLLM.invoke(self, input, config, stop, **kwargs)
376 @OverRide
377 def invoke(
378 self,
(...) 383 **kwargs: Any,
384 ) -> str:
385 config = ensure_config(config)
386 return (
--> 387 self.generate_prompt(
388 [self._convert_input(input)],
389 stop=stop,
390 callbacks=config.get("callbacks"),
391 tags=config.get("tags"),
392 metadata=config.get("metadata"),
393 run_name=config.get("run_name"),
394 run_id=config.pop("run_id", None),
395 **kwargs,
396 )
397 .generations[0][0]
398 .text
399 )

File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:764, in BaseLLM.generate_prompt(self, prompts, stop, callbacks, **kwargs)
755 @OverRide
756 def generate_prompt(
757 self,
(...) 761 **kwargs: Any,
762 ) -> LLMResult:
763 prompt_strings = [p.to_string() for p in prompts]
--> 764 return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)

File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:971, in BaseLLM.generate(self, prompts, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)
956 if (self.cache is None and get_llm_cache() is None) or self.cache is False:
957 run_managers = [
958 callback_manager.on_llm_start(
959 self._serialized,
(...) 969 )
970 ]
--> 971 return self._generate_helper(
972 prompts,
973 stop,
974 run_managers,
975 new_arg_supported=bool(new_arg_supported),
976 **kwargs,
977 )
978 if len(missing_prompts) > 0:
979 run_managers = [
980 callback_managers[idx].on_llm_start(
981 self._serialized,
(...) 988 for idx in missing_prompt_idxs
989 ]

File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:790, in BaseLLM._generate_helper(self, prompts, stop, run_managers, new_arg_supported, **kwargs)
779 def _generate_helper(
780 self,
781 prompts: list[str],
(...) 786 **kwargs: Any,
787 ) -> LLMResult:
788 try:
789 output = (
--> 790 self._generate(
791 prompts,
792 stop=stop,
793 # TODO: support multiple run managers
794 run_manager=run_managers[0] if run_managers else None,
795 **kwargs,
796 )
797 if new_arg_supported
798 else self._generate(prompts, stop=stop)
799 )
800 except BaseException as e:
801 for run_manager in run_managers:

File ~\litellm-qa-env\Lib\site-packages\langchain_community\llms\openai.py:463, in BaseOpenAI._generate(self, prompts, stop, run_manager, **kwargs)
451 choices.append(
452 {
453 "text": generation.text,
(...) 460 }
461 )
462 else:
--> 463 response = completion_with_retry(
464 self, prompt=_prompts, run_manager=run_manager, **params
465 )
466 if not isinstance(response, dict):
467 # V1 client returns the response in an PyDantic object instead of
468 # dict. For the transition period, we deep convert it to dict.
469 response = response.dict()

File ~\litellm-qa-env\Lib\site-packages\langchain_community\llms\openai.py:121, in completion_with_retry(llm, run_manager, **kwargs)
119 """Use tenacity to retry the completion call."""
120 if is_openai_v1():
--> 121 return llm.client.create(**kwargs)
123 retry_decorator = _create_retry_decorator(llm, run_manager=run_manager)
125 @retry_decorator
126 def _completion_with_retry(**kwargs: Any) -> Any:

File ~\litellm-qa-env\Lib\site-packages\openai_utils_utils.py:279, in required_args..inner..wrapper(*args, **kwargs)
277 msg = f"Missing required argument: {quote(missing[0])}"
278 raise TypeError(msg)
--> 279 return func(*args, **kwargs)

File ~\litellm-qa-env\Lib\site-packages\openai\resources\completions.py:545, in Completions.create(self, model, prompt, best_of, echo, frequency_penalty, logit_bias, logprobs, max_tokens, n, presence_penalty, seed, stop, stream, stream_options, suffix, temperature, top_p, user, extra_headers, extra_query, extra_body, timeout)
516 @required_args(["model", "prompt"], ["model", "prompt", "stream"])
517 def create(
518 self,
(...) 543 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
544 ) -> Completion | Stream[Completion]:
--> 545 return self._post(
546 "/completions",
547 body=maybe_transform(
548 {
549 "model": model,
550 "prompt": prompt,
551 "best_of": best_of,
552 "echo": echo,
553 "frequency_penalty": frequency_penalty,
554 "logit_bias": logit_bias,
555 "logprobs": logprobs,
556 "max_tokens": max_tokens,
557 "n": n,
558 "presence_penalty": presence_penalty,
559 "seed": seed,
560 "stop": stop,
561 "stream": stream,
562 "stream_options": stream_options,
563 "suffix": suffix,
564 "temperature": temperature,
565 "top_p": top_p,
566 "user": user,
567 },
568 completion_create_params.CompletionCreateParamsStreaming
569 if stream
570 else completion_create_params.CompletionCreateParamsNonStreaming,
571 ),
572 options=make_request_options(
573 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
574 ),
575 cast_to=Completion,
576 stream=stream or False,
577 stream_cls=Stream[Completion],
578 )

File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1276, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
1262 def post(
1263 self,
1264 path: str,
(...) 1271 stream_cls: type[_StreamT] | None = None,
1272 ) -> ResponseT | _StreamT:
1273 opts = FinalRequestOptions.construct(
1274 method="post", url=path, json_data=body, files=to_httpx_files(files), **options
1275 )
-> 1276 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))

File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:949, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
946 else:
947 retries_taken = 0
--> 949 return self._request(
950 cast_to=cast_to,
951 options=options,
952 stream=stream,
953 stream_cls=stream_cls,
954 retries_taken=retries_taken,
955 )

File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1013, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1010 log.debug("Encountered Exception", exc_info=True)
1012 if remaining_retries > 0:
-> 1013 return self._retry_request(
1014 input_options,
1015 cast_to,
1016 retries_taken=retries_taken,
1017 stream=stream,
1018 stream_cls=stream_cls,
1019 response_headers=None,
1020 )
1022 log.debug("Raising connection error")
1023 raise APIConnectionError(request=request) from err

File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1091, in SyncAPIClient._retry_request(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)
1087 # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
1088 # different thread if necessary.
1089 time.sleep(timeout)
-> 1091 return self._request(
1092 options=options,
1093 cast_to=cast_to,
1094 retries_taken=retries_taken + 1,
1095 stream=stream,
1096 stream_cls=stream_cls,
1097 )

File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1013, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1010 log.debug("Encountered Exception", exc_info=True)
1012 if remaining_retries > 0:
-> 1013 return self._retry_request(
1014 input_options,
1015 cast_to,
1016 retries_taken=retries_taken,
1017 stream=stream,
1018 stream_cls=stream_cls,
1019 response_headers=None,
1020 )
1022 log.debug("Raising connection error")
1023 raise APIConnectionError(request=request) from err

File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1091, in SyncAPIClient._retry_request(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)
1087 # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
1088 # different thread if necessary.
1089 time.sleep(timeout)
-> 1091 return self._request(
1092 options=options,
1093 cast_to=cast_to,
1094 retries_taken=retries_taken + 1,
1095 stream=stream,
1096 stream_cls=stream_cls,
1097 )

File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1023, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1013 return self._retry_request(
1014 input_options,
1015 cast_to,
(...) 1019 response_headers=None,
1020 )
1022 log.debug("Raising connection error")
-> 1023 raise APIConnectionError(request=request) from err
1025 log.debug(
1026 'HTTP Response: %s %s "%i %s" %s',
1027 request.method,
(...) 1031 response.headers,
1032 )
1033 log.debug("request_id: %s", response.headers.get("x-request-id"))

APIConnectionError: Connection error.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions