-
Notifications
You must be signed in to change notification settings - Fork 16
Open
Labels
enhancementNew feature or requestNew feature or requestquestionFurther information is requestedFurther information is requested
Description
When I try to generate a large number of questions (e.g., 2000) with docling-sdg, I always wind up with a timeout error such as the one shown below. I am using the default URL and model configuration (US South watsonx.ai). I would guess that the underlying problem is in watsonx.ai, but it would be good to make docling-sdg more robust to these failures and/or provide a way to save checkpoints and resume from a checkpoint when this happens.
ReadTimeout Traceback (most recent call last)
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_transports/default.py:101, in map_httpcore_exceptions()
100 try:
--> 101 yield
102 except Exception as exc:
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_transports/default.py:250, in HTTPTransport.handle_request(self, request)
249 with map_httpcore_exceptions():
--> 250 resp = self._pool.handle_request(req)
252 assert isinstance(resp.stream, typing.Iterable)
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/connection_pool.py:256, in ConnectionPool.handle_request(self, request)
255 self._close_connections(closing)
--> 256 raise exc from None
258 # Return the response. Note that in this case we still have to manage
259 # the point at which the response is closed.
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/connection_pool.py:236, in ConnectionPool.handle_request(self, request)
234 try:
235 # Send the request on the assigned connection.
--> 236 response = connection.handle_request(
237 pool_request.request
238 )
239 except ConnectionNotAvailable:
240 # In some cases a connection may initially be available to
241 # handle a request, but then become unavailable.
242 #
243 # In this case we clear the connection and try again.
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/connection.py:103, in HTTPConnection.handle_request(self, request)
101 raise exc
--> 103 return self._connection.handle_request(request)
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/http11.py:136, in HTTP11Connection.handle_request(self, request)
135 self._response_closed()
--> 136 raise exc
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/http11.py:106, in HTTP11Connection.handle_request(self, request)
97 with Trace(
98 "receive_response_headers", logger, request, kwargs
99 ) as trace:
100 (
101 http_version,
102 status,
103 reason_phrase,
104 headers,
105 trailing_data,
--> 106 ) = self._receive_response_headers(**kwargs)
107 trace.return_value = (
108 http_version,
109 status,
110 reason_phrase,
111 headers,
112 )
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/http11.py:177, in HTTP11Connection._receive_response_headers(self, request)
176 while True:
--> 177 event = self._receive_event(timeout=timeout)
178 if isinstance(event, h11.Response):
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_sync/http11.py:217, in HTTP11Connection._receive_event(self, timeout)
216 if event is h11.NEED_DATA:
--> 217 data = self._network_stream.read(
218 self.READ_NUM_BYTES, timeout=timeout
219 )
221 # If we feed this case through h11 we'll raise an exception like:
222 #
223 # httpcore.RemoteProtocolError: can't handle event type
(...) 227 # perspective. Instead we handle this case distinctly and treat
228 # it as a ConnectError.
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_backends/sync.py:126, in SyncStream.read(self, max_bytes, timeout)
125 exc_map: ExceptionMapping = {socket.timeout: ReadTimeout, OSError: ReadError}
--> 126 with map_exceptions(exc_map):
127 self._sock.settimeout(timeout)
File ~/.pyenv/versions/3.11.10/lib/python3.11/contextlib.py:158, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
157 try:
--> 158 self.gen.throw(typ, value, traceback)
159 except StopIteration as exc:
160 # Suppress StopIteration *unless* it's the same exception that
161 # was passed to throw(). This prevents a StopIteration
162 # raised inside the "with" statement from being suppressed.
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpcore/_exceptions.py:14, in map_exceptions(map)
13 if isinstance(exc, from_exc):
---> 14 raise to_exc(exc) from exc
15 raise
ReadTimeout: The read operation timed out
The above exception was the direct cause of the following exception:
ReadTimeout Traceback (most recent call last)
Cell In[4], line 9
1 options = GenerateOptions(
2 project_id=WATSONX_PROJECT_ID,
3 api_key=WATSONX_APIKEY,
4 url=WATSONX_URL,
5 max_qac=NUM_QUESTIONS_TO_GENERATE
6 )
8 generator = Generator(generate_options=options)
----> 9 results = generator.generate_from_sample(Path("docling_sdg_sample.jsonl"))
10 print(results)
File ~/pocs/poc-venv/lib/python3.11/site-packages/pydantic/_internal/_validate_call.py:38, in update_wrapper_attributes.<locals>.wrapper_function(*args, **kwargs)
36 @functools.wraps(wrapped)
37 def wrapper_function(*args, **kwargs):
---> 38 return wrapper(*args, **kwargs)
File ~/pocs/poc-venv/lib/python3.11/site-packages/pydantic/_internal/_validate_call.py:111, in ValidateCallWrapper.__call__(self, *args, **kwargs)
110 def __call__(self, *args: Any, **kwargs: Any) -> Any:
--> 111 res = self.__pydantic_validator__.validate_python(pydantic_core.ArgsKwargs(args, kwargs))
112 if self.__return_pydantic_validator__:
113 return self.__return_pydantic_validator__(res)
File ~/git/docling-sdg/docling_sdg/qa/generate.py:110, in Generator.generate_from_sample(self, source)
106 start_time = time.time()
108 passages: Iterator[QaChunk] = retrieve_stored_passages(in_file=source)
--> 110 result = self.generate_from_chunks(passages)
111 end_time = time.time()
112 result.time_taken = end_time - start_time
File ~/git/docling-sdg/docling_sdg/qa/generate.py:142, in Generator.generate_from_chunks(self, stored_chunks)
139 continue
141 # Generate question
--> 142 question, question_prompt = self.generate_from_prompt(
143 key_dict={"context_str": chunk.text},
144 question_types=self.qac_types,
145 prompt_type=PromptTypes.QUESTION,
146 )
147 if question is None or question_prompt is None:
148 continue
File ~/git/docling-sdg/docling_sdg/qa/generate.py:97, in Generator.generate_from_prompt(self, key_dict, question_types, prompt_type)
93 prompt_template = PromptTemplate(template=template)
94 prompt = format_string(prompt_template.template, **key_dict).strip()
96 return (
---> 97 self.agent.ask(question=prompt, max_tokens=self.options.max_new_tokens)
98 .replace("\n", " ")
99 .strip(),
100 prompt.strip(),
101 )
File ~/git/docling-sdg/docling_sdg/qa/utils.py:139, in ChatAgent.ask(self, question, max_tokens)
138 def ask(self, question: str, max_tokens: int) -> str:
--> 139 response = self.llm.chat([ChatMessage(content=question)], max_tokens=max_tokens)
140 answer = str(response)
141 return answer
File ~/pocs/poc-venv/lib/python3.11/site-packages/llama_index/core/instrumentation/dispatcher.py:322, in Dispatcher.span.<locals>.wrapper(func, instance, args, kwargs)
319 _logger.debug(f"Failed to reset active_span_id: {e}")
321 try:
--> 322 result = func(*args, **kwargs)
323 if isinstance(result, asyncio.Future):
324 # If the result is a Future, wrap it
325 new_future = asyncio.ensure_future(result)
File ~/pocs/poc-venv/lib/python3.11/site-packages/llama_index/core/llms/callbacks.py:173, in llm_chat_callback.<locals>.wrap.<locals>.wrapped_llm_chat(_self, messages, **kwargs)
164 event_id = callback_manager.on_event_start(
165 CBEventType.LLM,
166 payload={
(...) 170 },
171 )
172 try:
--> 173 f_return_val = f(_self, messages, **kwargs)
174 except BaseException as e:
175 callback_manager.on_event_end(
176 CBEventType.LLM,
177 payload={EventPayload.EXCEPTION: e},
178 event_id=event_id,
179 )
File ~/pocs/poc-venv/lib/python3.11/site-packages/llama_index/llms/ibm/base.py:456, in WatsonxLLM.chat(self, messages, **kwargs)
453 else:
454 chat_fn = self._chat
--> 456 return chat_fn(messages, **kwargs)
File ~/pocs/poc-venv/lib/python3.11/site-packages/llama_index/llms/ibm/base.py:433, in WatsonxLLM._chat(self, messages, **kwargs)
430 message_dicts = [to_watsonx_message_dict(message) for message in messages]
432 params, generation_kwargs = self._split_chat_generation_params(kwargs)
--> 433 response = self._model.chat(
434 messages=message_dicts,
435 params=params,
436 tools=generation_kwargs.get("tools"),
437 tool_choice=generation_kwargs.get("tool_choice"),
438 tool_choice_option=generation_kwargs.get("tool_choice_option"),
439 )
441 wx_message = response["choices"][0]["message"]
442 message = from_watsonx_message(wx_message)
File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/foundation_models/inference/model_inference.py:312, in ModelInference.chat(self, messages, params, tools, tool_choice, tool_choice_option, context)
307 if context and self.model_id:
308 raise WMLClientError(
309 "The `context` parameter is only supported for inferring a chat prompt deployment."
310 )
--> 312 return self._inference.chat(
313 messages=messages,
314 params=params,
315 tools=tools,
316 tool_choice=tool_choice,
317 tool_choice_option=tool_choice_option,
318 context=context,
319 )
File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/foundation_models/inference/fm_model_inference.py:148, in FMModelInference.chat(self, messages, params, tools, tool_choice, tool_choice_option, context)
134 def chat(
135 self,
136 messages: list[dict],
(...) 141 context: str | None = None,
142 ) -> dict:
144 text_chat_url = (
145 self._client.service_instance._href_definitions.get_fm_chat_href("chat")
146 )
--> 148 return self._send_chat_payload(
149 messages=messages,
150 params=params,
151 generate_url=text_chat_url,
152 tools=tools,
153 tool_choice=tool_choice,
154 tool_choice_option=tool_choice_option,
155 )
File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/foundation_models/inference/base_model_inference.py:351, in BaseModelInference._send_chat_payload(self, messages, params, generate_url, tools, tool_choice, tool_choice_option)
336 payload = self._prepare_chat_payload(
337 messages,
338 params=params,
(...) 341 tool_choice_option=tool_choice_option,
342 )
344 post_params: dict[str, Any] = dict(
345 url=generate_url,
346 json=payload,
347 params=self._client._params(skip_for_create=True, skip_userfs=True),
348 headers=self._client._get_headers(),
349 )
--> 351 response_scoring = self._post(self._http_client, **post_params)
353 return self._handle_response(
354 200,
355 "chat",
356 response_scoring,
357 _field_to_hide="choices",
358 )
File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/_wrappers/requests.py:711, in _with_retry.<locals>.decorator.<locals>.wrapper(self, *args, **kwargs)
709 if response is not None:
710 response.close()
--> 711 response = function(self, *args, **kwargs)
713 if (
714 response.status_code in wx_retry_status_codes
715 ) and attempt != wx_max_retries:
716 rate_limit_remaining = int(
717 response.headers.get(
718 "x-requests-limit-remaining",
719 self.rate_limiter.capacity,
720 )
721 )
File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/foundation_models/inference/base_model_inference.py:1126, in BaseModelInference._post(self, http_client, *args, **kwargs)
1122 @requests._with_retry()
1123 def _post(
1124 self, http_client: Any, *args: Any, **kwargs: Any
1125 ) -> httpx.Response | _requests.Response:
-> 1126 return http_client.post(*args, **kwargs)
File ~/pocs/poc-venv/lib/python3.11/site-packages/ibm_watsonx_ai/_wrappers/requests.py:632, in HTTPXClient.post(self, url, content, json, headers, params, **kwargs)
629 if headers is not None and headers.get("Content-Type") is not None:
630 headers["Content-Type"] = "application/json"
--> 632 response = super().post(
633 url=url,
634 content=content,
635 headers=headers,
636 params=params,
637 **kwargs,
638 )
639 return response
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:1144, in Client.post(self, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)
1123 def post(
1124 self,
1125 url: URL | str,
(...) 1137 extensions: RequestExtensions | None = None,
1138 ) -> Response:
1139 """
1140 Send a `POST` request.
1141
1142 **Parameters**: See `httpx.request`.
1143 """
-> 1144 return self.request(
1145 "POST",
1146 url,
1147 content=content,
1148 data=data,
1149 files=files,
1150 json=json,
1151 params=params,
1152 headers=headers,
1153 cookies=cookies,
1154 auth=auth,
1155 follow_redirects=follow_redirects,
1156 timeout=timeout,
1157 extensions=extensions,
1158 )
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:825, in Client.request(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)
810 warnings.warn(message, DeprecationWarning, stacklevel=2)
812 request = self.build_request(
813 method=method,
814 url=url,
(...) 823 extensions=extensions,
824 )
--> 825 return self.send(request, auth=auth, follow_redirects=follow_redirects)
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:914, in Client.send(self, request, stream, auth, follow_redirects)
910 self._set_timeout(request)
912 auth = self._build_request_auth(request, auth)
--> 914 response = self._send_handling_auth(
915 request,
916 auth=auth,
917 follow_redirects=follow_redirects,
918 history=[],
919 )
920 try:
921 if not stream:
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:942, in Client._send_handling_auth(self, request, auth, follow_redirects, history)
939 request = next(auth_flow)
941 while True:
--> 942 response = self._send_handling_redirects(
943 request,
944 follow_redirects=follow_redirects,
945 history=history,
946 )
947 try:
948 try:
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:979, in Client._send_handling_redirects(self, request, follow_redirects, history)
976 for hook in self._event_hooks["request"]:
977 hook(request)
--> 979 response = self._send_single_request(request)
980 try:
981 for hook in self._event_hooks["response"]:
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_client.py:1014, in Client._send_single_request(self, request)
1009 raise RuntimeError(
1010 "Attempted to send an async request with a sync Client instance."
1011 )
1013 with request_context(request=request):
-> 1014 response = transport.handle_request(request)
1016 assert isinstance(response.stream, SyncByteStream)
1018 response.request = request
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_transports/default.py:249, in HTTPTransport.handle_request(self, request)
235 import httpcore
237 req = httpcore.Request(
238 method=request.method,
239 url=httpcore.URL(
(...) 247 extensions=request.extensions,
248 )
--> 249 with map_httpcore_exceptions():
250 resp = self._pool.handle_request(req)
252 assert isinstance(resp.stream, typing.Iterable)
File ~/.pyenv/versions/3.11.10/lib/python3.11/contextlib.py:158, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
156 value = typ()
157 try:
--> 158 self.gen.throw(typ, value, traceback)
159 except StopIteration as exc:
160 # Suppress StopIteration *unless* it's the same exception that
161 # was passed to throw(). This prevents a StopIteration
162 # raised inside the "with" statement from being suppressed.
163 return exc is not value
File ~/pocs/poc-venv/lib/python3.11/site-packages/httpx/_transports/default.py:118, in map_httpcore_exceptions()
115 raise
117 message = str(exc)
--> 118 raise mapped_exc(message) from exc
ReadTimeout: The read operation timed out
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or requestquestionFurther information is requestedFurther information is requested