Skip to content

Generate fails with ReadTimeout error #23

@jwm4

Description

@jwm4

When I try to generate a large number of questions (e.g., 2000) with docling-sdg, I always wind up with a timeout error such as the one shown below. I am using the default URL and model configuration (US South watsonx.ai). I would guess that the underlying problem is in watsonx.ai, but it would be good to make docling-sdg more robust to these failures and/or provide a way to save checkpoints and resume from a checkpoint when this happens.

ReadTimeout                               Traceback (most recent call last)
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_transports/default.py:101, in map_httpcore_exceptions()
    100 try:
--> 101     yield
    102 except Exception as exc:

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_transports/default.py:250, in HTTPTransport.handle_request(self, request)
    249 with map_httpcore_exceptions():
--> 250     resp = self._pool.handle_request(req)
    252 assert isinstance(resp.stream, typing.Iterable)

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/connection_pool.py:256, in ConnectionPool.handle_request(self, request)
    255     self._close_connections(closing)
--> 256     raise exc from None
    258 # Return the response. Note that in this case we still have to manage
    259 # the point at which the response is closed.

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/connection_pool.py:236, in ConnectionPool.handle_request(self, request)
    234 try:
    235     # Send the request on the assigned connection.
--> 236     response = connection.handle_request(
    237         pool_request.request
    238     )
    239 except ConnectionNotAvailable:
    240     # In some cases a connection may initially be available to
    241     # handle a request, but then become unavailable.
    242     #
    243     # In this case we clear the connection and try again.

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/connection.py:103, in HTTPConnection.handle_request(self, request)
    101     raise exc
--> 103 return self._connection.handle_request(request)

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/http11.py:136, in HTTP11Connection.handle_request(self, request)
    135         self._response_closed()
--> 136 raise exc

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/http11.py:106, in HTTP11Connection.handle_request(self, request)
     97 with Trace(
     98     "receive_response_headers", logger, request, kwargs
     99 ) as trace:
    100     (
    101         http_version,
    102         status,
    103         reason_phrase,
    104         headers,
    105         trailing_data,
--> 106     ) = self._receive_response_headers(**kwargs)
    107     trace.return_value = (
    108         http_version,
    109         status,
    110         reason_phrase,
    111         headers,
    112     )

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/http11.py:177, in HTTP11Connection._receive_response_headers(self, request)
    176 while True:
--> 177     event = self._receive_event(timeout=timeout)
    178     if isinstance(event, h11.Response):

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/http11.py:217, in HTTP11Connection._receive_event(self, timeout)
    216 if event is h11.NEED_DATA:
--> 217     data = self._network_stream.read(
    218         self.READ_NUM_BYTES, timeout=timeout
    219     )
    221     # If we feed this case through h11 we'll raise an exception like:
    222     #
    223     #     httpcore.RemoteProtocolError: can't handle event type
   (...)    227     # perspective. Instead we handle this case distinctly and treat
    228     # it as a ConnectError.

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_backends/sync.py:126, in SyncStream.read(self, max_bytes, timeout)
    125 exc_map: ExceptionMapping = {socket.timeout: ReadTimeout, OSError: ReadError}
--> 126 with map_exceptions(exc_map):
    127     self._sock.settimeout(timeout)

File ~/.pyenv/versions/3.11.10/lib/python3.11/contextlib.py:158, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
    157 try:
--> 158     self.gen.throw(typ, value, traceback)
    159 except StopIteration as exc:
    160     # Suppress StopIteration *unless* it's the same exception that
    161     # was passed to throw().  This prevents a StopIteration
    162     # raised inside the "with" statement from being suppressed.

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_exceptions.py:14, in map_exceptions(map)
     13     if isinstance(exc, from_exc):
---> 14         raise to_exc(exc) from exc
     15 raise

ReadTimeout: The read operation timed out

The above exception was the direct cause of the following exception:

ReadTimeout                               Traceback (most recent call last)
Cell In[4], line 9
      1 options = GenerateOptions(
      2     project_id=WATSONX_PROJECT_ID,
      3     api_key=WATSONX_APIKEY,
      4     url=WATSONX_URL,
      5     max_qac=NUM_QUESTIONS_TO_GENERATE
      6 )
      8 generator = Generator(generate_options=options)
----> 9 results = generator.generate_from_sample(Path("docling_sdg_sample.jsonl"))
     10 print(results)

File ~/pocs/poc-venv/lib/python3.11/site-packages/pydantic/_internal/_validate_call.py:38, in update_wrapper_attributes.<locals>.wrapper_function(*args, **kwargs)
     36 @functools.wraps(wrapped)
     37 def wrapper_function(*args, **kwargs):
---> 38     return wrapper(*args, **kwargs)

File ~/pocs/poc-venv/lib/python3.11/site-packages/pydantic/_internal/_validate_call.py:111, in ValidateCallWrapper.__call__(self, *args, **kwargs)
    110 def __call__(self, *args: Any, **kwargs: Any) -> Any:
--> 111     res = self.__pydantic_validator__.validate_python(pydantic_core.ArgsKwargs(args, kwargs))
    112     if self.__return_pydantic_validator__:
    113         return self.__return_pydantic_validator__(res)

File ~/git/docling-sdg/docling_sdg/qa/generate.py:110, in Generator.generate_from_sample(self, source)
    106 start_time = time.time()
    108 passages: Iterator[QaChunk] = retrieve_stored_passages(in_file=source)
--> 110 result = self.generate_from_chunks(passages)
    111 end_time = time.time()
    112 result.time_taken = end_time - start_time

File ~/git/docling-sdg/docling_sdg/qa/generate.py:142, in Generator.generate_from_chunks(self, stored_chunks)
    139     continue
    141 # Generate question
--> 142 question, question_prompt = self.generate_from_prompt(
    143     key_dict={"context_str": chunk.text},
    144     question_types=self.qac_types,
    145     prompt_type=PromptTypes.QUESTION,
    146 )
    147 if question is None or question_prompt is None:
    148     continue

File ~/git/docling-sdg/docling_sdg/qa/generate.py:97, in Generator.generate_from_prompt(self, key_dict, question_types, prompt_type)
     93 prompt_template = PromptTemplate(template=template)
     94 prompt = format_string(prompt_template.template, **key_dict).strip()
     96 return (
---> 97     self.agent.ask(question=prompt, max_tokens=self.options.max_new_tokens)
     98     .replace("\n", " ")
     99     .strip(),
    100     prompt.strip(),
    101 )

File ~/git/docling-sdg/docling_sdg/qa/utils.py:139, in ChatAgent.ask(self, question, max_tokens)
    138 def ask(self, question: str, max_tokens: int) -> str:
--> 139     response = self.llm.chat([ChatMessage(content=question)], max_tokens=max_tokens)
    140     answer = str(response)
    141     return answer

File ~/pocs/poc-venv/lib/python3.11/site-packages/llama_index/core/instrumentation/dispatcher.py:322, in Dispatcher.span.<locals>.wrapper(func, instance, args, kwargs)
    319             _logger.debug(f"Failed to reset active_span_id: {e}")
    321 try:
--> 322     result = func(*args, **kwargs)
    323     if isinstance(result, asyncio.Future):
    324         # If the result is a Future, wrap it
    325         new_future = asyncio.ensure_future(result)

File ~/pocs/poc-venv/lib/python3.11/site-packages/llama_index/core/llms/callbacks.py:173, in llm_chat_callback.<locals>.wrap.<locals>.wrapped_llm_chat(_self, messages, **kwargs)
    164 event_id = callback_manager.on_event_start(
    165     CBEventType.LLM,
    166     payload={
   (...)    170     },
    171 )
    172 try:
--> 173     f_return_val = f(_self, messages, **kwargs)
    174 except BaseException as e:
    175     callback_manager.on_event_end(
    176         CBEventType.LLM,
    177         payload={EventPayload.EXCEPTION: e},
    178         event_id=event_id,
    179     )

File ~/pocs/poc-venv/lib/python3.11/site-packages/llama_index/llms/ibm/base.py:456, in WatsonxLLM.chat(self, messages, **kwargs)
    453 else:
    454     chat_fn = self._chat
--> 456 return chat_fn(messages, **kwargs)

File ~/pocs/poc-venv/lib/python3.11/site-packages/llama_index/llms/ibm/base.py:433, in WatsonxLLM._chat(self, messages, **kwargs)
    430 message_dicts = [to_watsonx_message_dict(message) for message in messages]
    432 params, generation_kwargs = self._split_chat_generation_params(kwargs)
--> 433 response = self._model.chat(
    434     messages=message_dicts,
    435     params=params,
    436     tools=generation_kwargs.get("tools"),
    437     tool_choice=generation_kwargs.get("tool_choice"),
    438     tool_choice_option=generation_kwargs.get("tool_choice_option"),
    439 )
    441 wx_message = response["choices"][0]["message"]
    442 message = from_watsonx_message(wx_message)

File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/foundation_models/inference/model_inference.py:312, in ModelInference.chat(self, messages, params, tools, tool_choice, tool_choice_option, context)
    307 if context and self.model_id:
    308     raise WMLClientError(
    309         "The `context` parameter is only supported for inferring a chat prompt deployment."
    310     )
--> 312 return self._inference.chat(
    313     messages=messages,
    314     params=params,
    315     tools=tools,
    316     tool_choice=tool_choice,
    317     tool_choice_option=tool_choice_option,
    318     context=context,
    319 )

File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/foundation_models/inference/fm_model_inference.py:148, in FMModelInference.chat(self, messages, params, tools, tool_choice, tool_choice_option, context)
    134 def chat(
    135     self,
    136     messages: list[dict],
   (...)    141     context: str | None = None,
    142 ) -> dict:
    144     text_chat_url = (
    145         self._client.service_instance._href_definitions.get_fm_chat_href("chat")
    146     )
--> 148     return self._send_chat_payload(
    149         messages=messages,
    150         params=params,
    151         generate_url=text_chat_url,
    152         tools=tools,
    153         tool_choice=tool_choice,
    154         tool_choice_option=tool_choice_option,
    155     )

File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/foundation_models/inference/base_model_inference.py:351, in BaseModelInference._send_chat_payload(self, messages, params, generate_url, tools, tool_choice, tool_choice_option)
    336 payload = self._prepare_chat_payload(
    337     messages,
    338     params=params,
   (...)    341     tool_choice_option=tool_choice_option,
    342 )
    344 post_params: dict[str, Any] = dict(
    345     url=generate_url,
    346     json=payload,
    347     params=self._client._params(skip_for_create=True, skip_userfs=True),
    348     headers=self._client._get_headers(),
    349 )
--> 351 response_scoring = self._post(self._http_client, **post_params)
    353 return self._handle_response(
    354     200,
    355     "chat",
    356     response_scoring,
    357     _field_to_hide="choices",
    358 )

File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/_wrappers/requests.py:711, in _with_retry.<locals>.decorator.<locals>.wrapper(self, *args, **kwargs)
    709 if response is not None:
    710     response.close()
--> 711 response = function(self, *args, **kwargs)
    713 if (
    714     response.status_code in wx_retry_status_codes
    715 ) and attempt != wx_max_retries:
    716     rate_limit_remaining = int(
    717         response.headers.get(
    718             "x-requests-limit-remaining",
    719             self.rate_limiter.capacity,
    720         )
    721     )

File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/foundation_models/inference/base_model_inference.py:1126, in BaseModelInference._post(self, http_client, *args, **kwargs)
   1122 @requests._with_retry()
   1123 def _post(
   1124     self, http_client: Any, *args: Any, **kwargs: Any
   1125 ) -> httpx.Response | _requests.Response:
-> 1126     return http_client.post(*args, **kwargs)

File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/_wrappers/requests.py:632, in HTTPXClient.post(self, url, content, json, headers, params, **kwargs)
    629     if headers is not None and headers.get("Content-Type") is not None:
    630         headers["Content-Type"] = "application/json"
--> 632 response = super().post(
    633     url=url,
    634     content=content,
    635     headers=headers,
    636     params=params,
    637     **kwargs,
    638 )
    639 return response

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:1144, in Client.post(self, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)
   1123 def post(
   1124     self,
   1125     url: URL | str,
   (...)   1137     extensions: RequestExtensions | None = None,
   1138 ) -> Response:
   1139     """
   1140     Send a `POST` request.
   1141 
   1142     **Parameters**: See `httpx.request`.
   1143     """
-> 1144     return self.request(
   1145         "POST",
   1146         url,
   1147         content=content,
   1148         data=data,
   1149         files=files,
   1150         json=json,
   1151         params=params,
   1152         headers=headers,
   1153         cookies=cookies,
   1154         auth=auth,
   1155         follow_redirects=follow_redirects,
   1156         timeout=timeout,
   1157         extensions=extensions,
   1158     )

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:825, in Client.request(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)
    810     warnings.warn(message, DeprecationWarning, stacklevel=2)
    812 request = self.build_request(
    813     method=method,
    814     url=url,
   (...)    823     extensions=extensions,
    824 )
--> 825 return self.send(request, auth=auth, follow_redirects=follow_redirects)

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:914, in Client.send(self, request, stream, auth, follow_redirects)
    910 self._set_timeout(request)
    912 auth = self._build_request_auth(request, auth)
--> 914 response = self._send_handling_auth(
    915     request,
    916     auth=auth,
    917     follow_redirects=follow_redirects,
    918     history=[],
    919 )
    920 try:
    921     if not stream:

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:942, in Client._send_handling_auth(self, request, auth, follow_redirects, history)
    939 request = next(auth_flow)
    941 while True:
--> 942     response = self._send_handling_redirects(
    943         request,
    944         follow_redirects=follow_redirects,
    945         history=history,
    946     )
    947     try:
    948         try:

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:979, in Client._send_handling_redirects(self, request, follow_redirects, history)
    976 for hook in self._event_hooks["request"]:
    977     hook(request)
--> 979 response = self._send_single_request(request)
    980 try:
    981     for hook in self._event_hooks["response"]:

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:1014, in Client._send_single_request(self, request)
   1009     raise RuntimeError(
   1010         "Attempted to send an async request with a sync Client instance."
   1011     )
   1013 with request_context(request=request):
-> 1014     response = transport.handle_request(request)
   1016 assert isinstance(response.stream, SyncByteStream)
   1018 response.request = request

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_transports/default.py:249, in HTTPTransport.handle_request(self, request)
    235 import httpcore
    237 req = httpcore.Request(
    238     method=request.method,
    239     url=httpcore.URL(
   (...)    247     extensions=request.extensions,
    248 )
--> 249 with map_httpcore_exceptions():
    250     resp = self._pool.handle_request(req)
    252 assert isinstance(resp.stream, typing.Iterable)

File ~/.pyenv/versions/3.11.10/lib/python3.11/contextlib.py:158, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
    156     value = typ()
    157 try:
--> 158     self.gen.throw(typ, value, traceback)
    159 except StopIteration as exc:
    160     # Suppress StopIteration *unless* it's the same exception that
    161     # was passed to throw().  This prevents a StopIteration
    162     # raised inside the "with" statement from being suppressed.
    163     return exc is not value

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_transports/default.py:118, in map_httpcore_exceptions()
    115     raise
    117 message = str(exc)
--> 118 raise mapped_exc(message) from exc

ReadTimeout: The read operation timed out

Metadata

Metadata

Assignees

Labels

enhancementNew feature or requestquestionFurther information is requested

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions