From c2c572238082d1fc9ddbeb122ba30020d9d92b8a Mon Sep 17 00:00:00 2001 From: hzt <3061613175@qq.com> Date: Fri, 20 Feb 2026 13:56:20 +0800 Subject: [PATCH] fix: prevent crash when reloading conversations with Base64-encoded files (#2784) Multiple issues caused conversations with file attachments to crash on reload: 1. Element.from_dict() called str() on bytes content, producing repr like "b'\x89PNG...'" instead of preserving the raw bytes. 2. ChainlitDataLayer.get_element() converted None url/objectKey/mime to the string "None" via str(None). 3. The frontend resume_thread handler did not resolve element URLs from chainlitKey, unlike the element and set_sidebar_elements handlers. 4. Storage client failures during get_thread crashed the entire resume flow. Added try/except in ChainlitDataLayer and DynamoDB data layers. 5. The socket connection_successful handler had no error handling around thread resume. Added try/except with error messages to the frontend. 6. The HTTP get_thread endpoint had no error handling. Added try/except. --- backend/chainlit/data/chainlit_data_layer.py | 19 ++- backend/chainlit/data/dynamodb.py | 13 +- backend/chainlit/element.py | 8 +- backend/chainlit/server.py | 9 +- backend/chainlit/socket.py | 29 +++- .../tests/data/test_chainlit_data_layer.py | 142 ++++++++++++++++++ libs/react-client/src/useChatSession.ts | 14 +- 7 files changed, 217 insertions(+), 17 deletions(-) create mode 100644 backend/tests/data/test_chainlit_data_layer.py diff --git a/backend/chainlit/data/chainlit_data_layer.py b/backend/chainlit/data/chainlit_data_layer.py index d2fb2d9f62..99afd77493 100644 --- a/backend/chainlit/data/chainlit_data_layer.py +++ b/backend/chainlit/data/chainlit_data_layer.py @@ -277,10 +277,10 @@ async def get_element( id=str(row["id"]), threadId=str(row["threadId"]), type=metadata.get("type", "file"), - url=str(row["url"]), + url=row.get("url"), name=str(row["name"]), - mime=str(row["mime"]), - objectKey=str(row["objectKey"]), + mime=str(row["mime"]) if row.get("mime") else None, + objectKey=row.get("objectKey"), forId=str(row["stepId"]), chainlitKey=row.get("chainlitKey"), display=row["display"], @@ -555,9 +555,16 @@ async def get_thread(self, thread_id: str) -> Optional[ThreadDict]: if self.storage_client is not None: for elem in elements_results: if not elem["url"] and elem["objectKey"]: - elem["url"] = await self.storage_client.get_read_url( - object_key=elem["objectKey"], - ) + try: + elem["url"] = await self.storage_client.get_read_url( + object_key=elem["objectKey"], + ) + except Exception as e: + logger.warning( + "Failed to get read URL for element '%s': %s", + elem.get("id", "unknown"), + e, + ) return ThreadDict( id=str(thread["id"]), diff --git a/backend/chainlit/data/dynamodb.py b/backend/chainlit/data/dynamodb.py index 7a87404b7f..f85c1d0bde 100644 --- a/backend/chainlit/data/dynamodb.py +++ b/backend/chainlit/data/dynamodb.py @@ -545,9 +545,16 @@ async def get_thread(self, thread_id: str) -> "Optional[ThreadDict]": elif item["SK"].startswith("ELEMENT"): if self.storage_provider is not None: - item["url"] = await self.storage_provider.get_read_url( - object_key=item["objectKey"], - ) + try: + item["url"] = await self.storage_provider.get_read_url( + object_key=item["objectKey"], + ) + except Exception as e: + _logger.warning( + "Failed to get read URL for element '%s': %s", + item.get("id", "unknown"), + e, + ) elements.append(item) elif item["SK"].startswith("STEP"): diff --git a/backend/chainlit/element.py b/backend/chainlit/element.py index 23a8b41a4a..f2c78c6dda 100644 --- a/backend/chainlit/element.py +++ b/backend/chainlit/element.py @@ -144,7 +144,13 @@ def from_dict(cls, e_dict: ElementDict): type = e_dict.get("type", "file") path = str(e_dict.get("path")) if e_dict.get("path") else None url = str(e_dict.get("url")) if e_dict.get("url") else None - content = str(e_dict.get("content")) if e_dict.get("content") else None + content = None + raw_content = e_dict.get("content") + if raw_content is not None: + if isinstance(raw_content, bytes): + content = raw_content + else: + content = str(raw_content) object_key = e_dict.get("objectKey") chainlit_key = e_dict.get("chainlitKey") display = e_dict.get("display", "inline") diff --git a/backend/chainlit/server.py b/backend/chainlit/server.py index 6accf1ccc4..5faeaff34c 100644 --- a/backend/chainlit/server.py +++ b/backend/chainlit/server.py @@ -975,7 +975,14 @@ async def get_thread( await is_thread_author(current_user.identifier, thread_id) - res = await data_layer.get_thread(thread_id) + try: + res = await data_layer.get_thread(thread_id) + except Exception as e: + logger.error(f"Failed to get thread {thread_id}: {e!s}") + raise HTTPException( + status_code=500, + detail="Failed to load conversation history", + ) return JSONResponse(content=res) diff --git a/backend/chainlit/socket.py b/backend/chainlit/socket.py index dd969e091b..29cbdadbf0 100644 --- a/backend/chainlit/socket.py +++ b/backend/chainlit/socket.py @@ -194,7 +194,15 @@ async def connection_successful(sid): return if context.session.thread_id_to_resume and config.code.on_chat_resume: - thread = await resume_thread(context.session) + try: + thread = await resume_thread(context.session) + except Exception as e: + logger.error(f"Failed to resume thread: {e!s}") + await context.emitter.send_resume_thread_error( + "Failed to load conversation history." + ) + thread = None + if thread: context.session.has_first_interaction = True await context.emitter.emit( @@ -204,10 +212,21 @@ async def connection_successful(sid): await config.code.on_chat_resume(thread) for step in thread.get("steps", []): - if "message" in step["type"]: - chat_context.add(Message.from_dict(step)) - - await context.emitter.resume_thread(thread) + try: + if "message" in step["type"]: + chat_context.add(Message.from_dict(step)) + except Exception as e: + logger.warning( + f"Failed to restore step {step.get('id')}: {e!s}" + ) + + try: + await context.emitter.resume_thread(thread) + except Exception as e: + logger.error(f"Failed to emit resume_thread: {e!s}") + await context.emitter.send_resume_thread_error( + "Failed to load conversation history." + ) return else: await context.emitter.send_resume_thread_error("Thread not found.") diff --git a/backend/tests/data/test_chainlit_data_layer.py b/backend/tests/data/test_chainlit_data_layer.py new file mode 100644 index 0000000000..48aaf1147d --- /dev/null +++ b/backend/tests/data/test_chainlit_data_layer.py @@ -0,0 +1,142 @@ +import json + +import pytest + +from chainlit.data.chainlit_data_layer import ChainlitDataLayer + + +class TestConvertElementRowToDict: + """Test suite for ChainlitDataLayer._convert_element_row_to_dict.""" + + def _make_layer(self): + return ChainlitDataLayer(database_url="postgresql://fake", storage_client=None) + + def _make_row(self, **overrides): + row = { + "id": "elem-1", + "threadId": "thread-1", + "stepId": "step-1", + "metadata": json.dumps({"type": "file"}), + "url": None, + "name": "test_file.txt", + "mime": "text/plain", + "objectKey": None, + "chainlitKey": None, + "display": "inline", + "size": None, + "language": None, + "page": None, + "autoPlay": None, + "playerConfig": None, + "props": "{}", + } + row.update(overrides) + return row + + def test_convert_element_row_with_none_url(self): + layer = self._make_layer() + result = layer._convert_element_row_to_dict(self._make_row(url=None)) + assert result["url"] is None + + def test_convert_element_row_with_none_object_key(self): + layer = self._make_layer() + result = layer._convert_element_row_to_dict(self._make_row(objectKey=None)) + assert result["objectKey"] is None + + def test_convert_element_row_with_valid_url(self): + layer = self._make_layer() + result = layer._convert_element_row_to_dict( + self._make_row(url="https://storage.example.com/file.txt") + ) + assert result["url"] == "https://storage.example.com/file.txt" + + def test_convert_element_row_preserves_chainlit_key(self): + layer = self._make_layer() + result = layer._convert_element_row_to_dict( + self._make_row(chainlitKey="file-abc-123") + ) + assert result["chainlitKey"] == "file-abc-123" + + def test_convert_element_row_type_from_metadata(self): + layer = self._make_layer() + result = layer._convert_element_row_to_dict( + self._make_row(metadata=json.dumps({"type": "image"})) + ) + assert result["type"] == "image" + + def test_convert_element_row_default_type(self): + layer = self._make_layer() + result = layer._convert_element_row_to_dict( + self._make_row(metadata=json.dumps({})) + ) + assert result["type"] == "file" + + def test_convert_element_row_full_data(self): + layer = self._make_layer() + result = layer._convert_element_row_to_dict( + self._make_row( + url="https://storage.example.com/file.txt", + objectKey="threads/thread-1/files/elem-1", + chainlitKey="file-abc-123", + display="side", + size="large", + language="python", + page=3, + mime="application/pdf", + props=json.dumps({"custom": "value"}), + ) + ) + assert result["id"] == "elem-1" + assert result["url"] == "https://storage.example.com/file.txt" + assert result["objectKey"] == "threads/thread-1/files/elem-1" + assert result["chainlitKey"] == "file-abc-123" + assert result["props"] == {"custom": "value"} + + +class TestGetElementNoneHandling: + """Test that get_element does not convert None values to 'None' strings.""" + + def _make_layer(self): + return ChainlitDataLayer(database_url="postgresql://fake", storage_client=None) + + @pytest.mark.asyncio + async def test_get_element_returns_none_url_not_string(self): + from unittest.mock import AsyncMock + + layer = self._make_layer() + layer.execute_query = AsyncMock( + return_value=[ + { + "id": "elem-1", + "threadId": "thread-1", + "stepId": "step-1", + "metadata": json.dumps({"type": "file"}), + "url": None, + "name": "test.txt", + "mime": None, + "objectKey": None, + "chainlitKey": "ck-1", + "display": "inline", + "size": None, + "language": None, + "page": None, + "autoPlay": None, + "playerConfig": None, + "props": "{}", + } + ] + ) + result = await layer.get_element("thread-1", "elem-1") + assert result is not None + assert result["url"] is None + assert result["objectKey"] is None + assert result["mime"] is None + + @pytest.mark.asyncio + async def test_get_element_not_found(self): + from unittest.mock import AsyncMock + + layer = self._make_layer() + layer.execute_query = AsyncMock(return_value=[]) + result = await layer.get_element("thread-1", "nonexistent") + assert result is None diff --git a/libs/react-client/src/useChatSession.ts b/libs/react-client/src/useChatSession.ts index 4be25c9141..83f55f1741 100644 --- a/libs/react-client/src/useChatSession.ts +++ b/libs/react-client/src/useChatSession.ts @@ -268,7 +268,19 @@ const useChatSession = () => { setChatSettingsValue(thread.metadata?.chat_settings); } setMessages(messages); - const elements = thread.elements || []; + const elements = (thread.elements || []).filter( + (e): e is IElement => e != null + ); + // Resolve element URLs from chainlitKey when url is missing, + // matching the behavior of the 'element' socket event handler. + elements.forEach((element) => { + if (!element.url && element.chainlitKey) { + element.url = client.getElementUrl( + element.chainlitKey, + sessionId + ); + } + }); setTasklists( (elements as ITasklistElement[]).filter((e) => e.type === 'tasklist') );