Generate fails with ReadTimeout error

When I try to generate a large number of questions (e.g., 2000) with docling-sdg, I always wind up with a timeout error such as the one shown below.  I am using the default URL and model configuration (US South watsonx.ai).  I would guess that the underlying problem is in watsonx.ai, but it would be good to make docling-sdg more robust to these failures and/or provide a way to save checkpoints and resume from a checkpoint when this happens.

```
ReadTimeout                               Traceback (most recent call last)
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_transports/default.py:101, in map_httpcore_exceptions()
    100 try:
--> 101     yield
    102 except Exception as exc:

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_transports/default.py:250, in HTTPTransport.handle_request(self, request)
    249 with map_httpcore_exceptions():
--> 250     resp = self._pool.handle_request(req)
    252 assert isinstance(resp.stream, typing.Iterable)

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/connection_pool.py:256, in ConnectionPool.handle_request(self, request)
    255     self._close_connections(closing)
--> 256     raise exc from None
    258 # Return the response. Note that in this case we still have to manage
    259 # the point at which the response is closed.

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/connection_pool.py:236, in ConnectionPool.handle_request(self, request)
    234 try:
    235     # Send the request on the assigned connection.
--> 236     response = connection.handle_request(
    237         pool_request.request
    238     )
    239 except ConnectionNotAvailable:
    240     # In some cases a connection may initially be available to
    241     # handle a request, but then become unavailable.
    242     #
    243     # In this case we clear the connection and try again.

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/connection.py:103, in HTTPConnection.handle_request(self, request)
    101     raise exc
--> 103 return self._connection.handle_request(request)

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/http11.py:136, in HTTP11Connection.handle_request(self, request)
    135         self._response_closed()
--> 136 raise exc

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/http11.py:106, in HTTP11Connection.handle_request(self, request)
     97 with Trace(
     98     "receive_response_headers", logger, request, kwargs
     99 ) as trace:
    100     (
    101         http_version,
    102         status,
    103         reason_phrase,
    104         headers,
    105         trailing_data,
--> 106     ) = self._receive_response_headers(**kwargs)
    107     trace.return_value = (
    108         http_version,
    109         status,
    110         reason_phrase,
    111         headers,
    112     )

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/http11.py:177, in HTTP11Connection._receive_response_headers(self, request)
    176 while True:
--> 177     event = self._receive_event(timeout=timeout)
    178     if isinstance(event, h11.Response):

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/http11.py:217, in HTTP11Connection._receive_event(self, timeout)
    216 if event is h11.NEED_DATA:
--> 217     data = self._network_stream.read(
    218         self.READ_NUM_BYTES, timeout=timeout
    219     )
    221     # If we feed this case through h11 we'll raise an exception like:
    222     #
    223     #     httpcore.RemoteProtocolError: can't handle event type
   (...)    227     # perspective. Instead we handle this case distinctly and treat
    228     # it as a ConnectError.

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_backends/sync.py:126, in SyncStream.read(self, max_bytes, timeout)
    125 exc_map: ExceptionMapping = {socket.timeout: ReadTimeout, OSError: ReadError}
--> 126 with map_exceptions(exc_map):
    127     self._sock.settimeout(timeout)

File ~/.pyenv/versions/3.11.10/lib/python3.11/contextlib.py:158, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
    157 try:
--> 158     self.gen.throw(typ, value, traceback)
    159 except StopIteration as exc:
    160     # Suppress StopIteration *unless* it's the same exception that
    161     # was passed to throw().  This prevents a StopIteration
    162     # raised inside the "with" statement from being suppressed.

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_exceptions.py:14, in map_exceptions(map)
     13     if isinstance(exc, from_exc):
---> 14         raise to_exc(exc) from exc
     15 raise

ReadTimeout: The read operation timed out

The above exception was the direct cause of the following exception:

ReadTimeout                               Traceback (most recent call last)
Cell In[4], line 9
      1 options = GenerateOptions(
      2     project_id=WATSONX_PROJECT_ID,
      3     api_key=WATSONX_APIKEY,
      4     url=WATSONX_URL,
      5     max_qac=NUM_QUESTIONS_TO_GENERATE
      6 )
      8 generator = Generator(generate_options=options)
----> 9 results = generator.generate_from_sample(Path("docling_sdg_sample.jsonl"))
     10 print(results)

File ~/pocs/poc-venv/lib/python3.11/site-packages/pydantic/_internal/_validate_call.py:38, in update_wrapper_attributes.<locals>.wrapper_function(*args, **kwargs)
     36 @functools.wraps(wrapped)
     37 def wrapper_function(*args, **kwargs):
---> 38     return wrapper(*args, **kwargs)

File ~/pocs/poc-venv/lib/python3.11/site-packages/pydantic/_internal/_validate_call.py:111, in ValidateCallWrapper.__call__(self, *args, **kwargs)
    110 def __call__(self, *args: Any, **kwargs: Any) -> Any:
--> 111     res = self.__pydantic_validator__.validate_python(pydantic_core.ArgsKwargs(args, kwargs))
    112     if self.__return_pydantic_validator__:
    113         return self.__return_pydantic_validator__(res)

File ~/git/docling-sdg/docling_sdg/qa/generate.py:110, in Generator.generate_from_sample(self, source)
    106 start_time = time.time()
    108 passages: Iterator[QaChunk] = retrieve_stored_passages(in_file=source)
--> 110 result = self.generate_from_chunks(passages)
    111 end_time = time.time()
    112 result.time_taken = end_time - start_time

File ~/git/docling-sdg/docling_sdg/qa/generate.py:142, in Generator.generate_from_chunks(self, stored_chunks)
    139     continue
    141 # Generate question
--> 142 question, question_prompt = self.generate_from_prompt(
    143     key_dict={"context_str": chunk.text},
    144     question_types=self.qac_types,
    145     prompt_type=PromptTypes.QUESTION,
    146 )
    147 if question is None or question_prompt is None:
    148     continue

File ~/git/docling-sdg/docling_sdg/qa/generate.py:97, in Generator.generate_from_prompt(self, key_dict, question_types, prompt_type)
     93 prompt_template = PromptTemplate(template=template)
     94 prompt = format_string(prompt_template.template, **key_dict).strip()
     96 return (
---> 97     self.agent.ask(question=prompt, max_tokens=self.options.max_new_tokens)
     98     .replace("\n", " ")
     99     .strip(),
    100     prompt.strip(),
    101 )

File ~/git/docling-sdg/docling_sdg/qa/utils.py:139, in ChatAgent.ask(self, question, max_tokens)
    138 def ask(self, question: str, max_tokens: int) -> str:
--> 139     response = self.llm.chat([ChatMessage(content=question)], max_tokens=max_tokens)
    140     answer = str(response)
    141     return answer

File ~/pocs/poc-venv/lib/python3.11/site-packages/llama_index/core/instrumentation/dispatcher.py:322, in Dispatcher.span.<locals>.wrapper(func, instance, args, kwargs)
    319             _logger.debug(f"Failed to reset active_span_id: {e}")
    321 try:
--> 322     result = func(*args, **kwargs)
    323     if isinstance(result, asyncio.Future):
    324         # If the result is a Future, wrap it
    325         new_future = asyncio.ensure_future(result)

File ~/pocs/poc-venv/lib/python3.11/site-packages/llama_index/core/llms/callbacks.py:173, in llm_chat_callback.<locals>.wrap.<locals>.wrapped_llm_chat(_self, messages, **kwargs)
    164 event_id = callback_manager.on_event_start(
    165     CBEventType.LLM,
    166     payload={
   (...)    170     },
    171 )
    172 try:
--> 173     f_return_val = f(_self, messages, **kwargs)
    174 except BaseException as e:
    175     callback_manager.on_event_end(
    176         CBEventType.LLM,
    177         payload={EventPayload.EXCEPTION: e},
    178         event_id=event_id,
    179     )

File ~/pocs/poc-venv/lib/python3.11/site-packages/llama_index/llms/ibm/base.py:456, in WatsonxLLM.chat(self, messages, **kwargs)
    453 else:
    454     chat_fn = self._chat
--> 456 return chat_fn(messages, **kwargs)

File ~/pocs/poc-venv/lib/python3.11/site-packages/llama_index/llms/ibm/base.py:433, in WatsonxLLM._chat(self, messages, **kwargs)
    430 message_dicts = [to_watsonx_message_dict(message) for message in messages]
    432 params, generation_kwargs = self._split_chat_generation_params(kwargs)
--> 433 response = self._model.chat(
    434     messages=message_dicts,
    435     params=params,
    436     tools=generation_kwargs.get("tools"),
    437     tool_choice=generation_kwargs.get("tool_choice"),
    438     tool_choice_option=generation_kwargs.get("tool_choice_option"),
    439 )
    441 wx_message = response["choices"][0]["message"]
    442 message = from_watsonx_message(wx_message)

File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/foundation_models/inference/model_inference.py:312, in ModelInference.chat(self, messages, params, tools, tool_choice, tool_choice_option, context)
    307 if context and self.model_id:
    308     raise WMLClientError(
    309         "The `context` parameter is only supported for inferring a chat prompt deployment."
    310     )
--> 312 return self._inference.chat(
    313     messages=messages,
    314     params=params,
    315     tools=tools,
    316     tool_choice=tool_choice,
    317     tool_choice_option=tool_choice_option,
    318     context=context,
    319 )

File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/foundation_models/inference/fm_model_inference.py:148, in FMModelInference.chat(self, messages, params, tools, tool_choice, tool_choice_option, context)
    134 def chat(
    135     self,
    136     messages: list[dict],
   (...)    141     context: str | None = None,
    142 ) -> dict:
    144     text_chat_url = (
    145         self._client.service_instance._href_definitions.get_fm_chat_href("chat")
    146     )
--> 148     return self._send_chat_payload(
    149         messages=messages,
    150         params=params,
    151         generate_url=text_chat_url,
    152         tools=tools,
    153         tool_choice=tool_choice,
    154         tool_choice_option=tool_choice_option,
    155     )

File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/foundation_models/inference/base_model_inference.py:351, in BaseModelInference._send_chat_payload(self, messages, params, generate_url, tools, tool_choice, tool_choice_option)
    336 payload = self._prepare_chat_payload(
    337     messages,
    338     params=params,
   (...)    341     tool_choice_option=tool_choice_option,
    342 )
    344 post_params: dict[str, Any] = dict(
    345     url=generate_url,
    346     json=payload,
    347     params=self._client._params(skip_for_create=True, skip_userfs=True),
    348     headers=self._client._get_headers(),
    349 )
--> 351 response_scoring = self._post(self._http_client, **post_params)
    353 return self._handle_response(
    354     200,
    355     "chat",
    356     response_scoring,
    357     _field_to_hide="choices",
    358 )

File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/_wrappers/requests.py:711, in _with_retry.<locals>.decorator.<locals>.wrapper(self, *args, **kwargs)
    709 if response is not None:
    710     response.close()
--> 711 response = function(self, *args, **kwargs)
    713 if (
    714     response.status_code in wx_retry_status_codes
    715 ) and attempt != wx_max_retries:
    716     rate_limit_remaining = int(
    717         response.headers.get(
    718             "x-requests-limit-remaining",
    719             self.rate_limiter.capacity,
    720         )
    721     )

File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/foundation_models/inference/base_model_inference.py:1126, in BaseModelInference._post(self, http_client, *args, **kwargs)
   1122 @requests._with_retry()
   1123 def _post(
   1124     self, http_client: Any, *args: Any, **kwargs: Any
   1125 ) -> httpx.Response | _requests.Response:
-> 1126     return http_client.post(*args, **kwargs)

File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/_wrappers/requests.py:632, in HTTPXClient.post(self, url, content, json, headers, params, **kwargs)
    629     if headers is not None and headers.get("Content-Type") is not None:
    630         headers["Content-Type"] = "application/json"
--> 632 response = super().post(
    633     url=url,
    634     content=content,
    635     headers=headers,
    636     params=params,
    637     **kwargs,
    638 )
    639 return response

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:1144, in Client.post(self, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)
   1123 def post(
   1124     self,
   1125     url: URL | str,
   (...)   1137     extensions: RequestExtensions | None = None,
   1138 ) -> Response:
   1139     """
   1140     Send a `POST` request.
   1141 
   1142     **Parameters**: See `httpx.request`.
   1143     """
-> 1144     return self.request(
   1145         "POST",
   1146         url,
   1147         content=content,
   1148         data=data,
   1149         files=files,
   1150         json=json,
   1151         params=params,
   1152         headers=headers,
   1153         cookies=cookies,
   1154         auth=auth,
   1155         follow_redirects=follow_redirects,
   1156         timeout=timeout,
   1157         extensions=extensions,
   1158     )

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:825, in Client.request(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)
    810     warnings.warn(message, DeprecationWarning, stacklevel=2)
    812 request = self.build_request(
    813     method=method,
    814     url=url,
   (...)    823     extensions=extensions,
    824 )
--> 825 return self.send(request, auth=auth, follow_redirects=follow_redirects)

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:914, in Client.send(self, request, stream, auth, follow_redirects)
    910 self._set_timeout(request)
    912 auth = self._build_request_auth(request, auth)
--> 914 response = self._send_handling_auth(
    915     request,
    916     auth=auth,
    917     follow_redirects=follow_redirects,
    918     history=[],
    919 )
    920 try:
    921     if not stream:

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:942, in Client._send_handling_auth(self, request, auth, follow_redirects, history)
    939 request = next(auth_flow)
    941 while True:
--> 942     response = self._send_handling_redirects(
    943         request,
    944         follow_redirects=follow_redirects,
    945         history=history,
    946     )
    947     try:
    948         try:

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:979, in Client._send_handling_redirects(self, request, follow_redirects, history)
    976 for hook in self._event_hooks["request"]:
    977     hook(request)
--> 979 response = self._send_single_request(request)
    980 try:
    981     for hook in self._event_hooks["response"]:

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:1014, in Client._send_single_request(self, request)
   1009     raise RuntimeError(
   1010         "Attempted to send an async request with a sync Client instance."
   1011     )
   1013 with request_context(request=request):
-> 1014     response = transport.handle_request(request)
   1016 assert isinstance(response.stream, SyncByteStream)
   1018 response.request = request

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_transports/default.py:249, in HTTPTransport.handle_request(self, request)
    235 import httpcore
    237 req = httpcore.Request(
    238     method=request.method,
    239     url=httpcore.URL(
   (...)    247     extensions=request.extensions,
    248 )
--> 249 with map_httpcore_exceptions():
    250     resp = self._pool.handle_request(req)
    252 assert isinstance(resp.stream, typing.Iterable)

File ~/.pyenv/versions/3.11.10/lib/python3.11/contextlib.py:158, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
    156     value = typ()
    157 try:
--> 158     self.gen.throw(typ, value, traceback)
    159 except StopIteration as exc:
    160     # Suppress StopIteration *unless* it's the same exception that
    161     # was passed to throw().  This prevents a StopIteration
    162     # raised inside the "with" statement from being suppressed.
    163     return exc is not value

File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_transports/default.py:118, in map_httpcore_exceptions()
    115     raise
    117 message = str(exc)
--> 118 raise mapped_exc(message) from exc

ReadTimeout: The read operation timed out
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Generate fails with ReadTimeout error #23

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Generate fails with ReadTimeout error #23

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions