diff --git a/.gitignore b/.gitignore index 05726a80..7f6cd374 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,4 @@ build .promptx/ .spec-workflow/ .cursorindexingignore +.cursor/ diff --git a/reader/browser/BrowserWord.cpp b/reader/browser/BrowserWord.cpp index a26cb11c..ab92ebfc 100644 --- a/reader/browser/BrowserWord.cpp +++ b/reader/browser/BrowserWord.cpp @@ -44,10 +44,7 @@ void BrowserWord::setSelectable(bool enable) QRectF BrowserWord::boundingRect() const { - // qCDebug(appLog) << "BrowserWord::boundingRect() - Calculating bounding rectangle"; - QRectF rect = QRectF(m_word.boundingBox.x() * m_scaleFactor - 1, m_word.boundingBox.y() * m_scaleFactor - 1, m_word.boundingBox.width() * m_scaleFactor + 2, m_word.boundingBox.height() * m_scaleFactor + 2); - // qCDebug(appLog) << "BrowserWord::boundingRect() - Bounding rectangle:" << rect; - return rect; + return QRectF(m_word.boundingBox.x() * m_scaleFactor - 1, m_word.boundingBox.y() * m_scaleFactor - 1, m_word.boundingBox.width() * m_scaleFactor + 2, m_word.boundingBox.height() * m_scaleFactor + 2); } QRectF BrowserWord::boundingBox() const diff --git a/reader/document/XpsDocumentAdapter.cpp b/reader/document/XpsDocumentAdapter.cpp index 9dc838e8..18ad5220 100644 --- a/reader/document/XpsDocumentAdapter.cpp +++ b/reader/document/XpsDocumentAdapter.cpp @@ -5,6 +5,7 @@ #include "XpsDocumentAdapter.h" +#include "XpsTextExtractor.h" #include "ddlog.h" #include @@ -14,6 +15,8 @@ #include #include +#include + #ifdef signals #pragma push_macro("signals") #undef signals @@ -795,8 +798,63 @@ Link XpsPageAdapter::getLinkAtPoint(const QPointF &point) QString XpsPageAdapter::text(const QRectF &rect) const { - Q_UNUSED(rect) - return QString(); + if (!m_document || rect.isEmpty()) { + return QString(); + } + + // 直接调用文本提取器,避免通过非const的words()方法 + QString filePath = m_document->filePath(); + if (filePath.isEmpty()) { + qCWarning(appLog) << "XpsPageAdapter::text() - Empty file path"; + return QString(); + } + + QList allWords = XpsTextExtractor::extractWords(filePath, m_pageIndex); + if (allWords.isEmpty()) { + return QString(); + } + + // 筛选在矩形区域内的words并按位置排序 + QList selectedWords; + for (const Word &word : allWords) { + if (rect.intersects(word.boundingBox)) { + selectedWords.append(word); + } + } + + if (selectedWords.isEmpty()) { + return QString(); + } + + // 按位置排序(从上到下,从左到右) + std::sort(selectedWords.begin(), selectedWords.end(), [](const Word &a, const Word &b) { + const double yThreshold = 5.0; // Y坐标容差 + if (qAbs(a.boundingBox.y() - b.boundingBox.y()) > yThreshold) { + return a.boundingBox.y() < b.boundingBox.y(); + } + return a.boundingBox.x() < b.boundingBox.x(); + }); + + // 拼接文本,处理换行 + QStringList textParts; + qreal lastY = selectedWords.first().boundingBox.y(); + const double yThreshold = 5.0; + + for (const Word &word : selectedWords) { + // 检测换行 + if (qAbs(word.boundingBox.y() - lastY) > yThreshold && !textParts.isEmpty()) { + // 如果Y坐标变化较大,可能是新行,添加换行符(可选) + // 这里先不添加,让调用者处理 + } + textParts.append(word.text); + lastY = word.boundingBox.y(); + } + + QString result = textParts.join(QStringLiteral(" ")); + + qCDebug(appLog) << "XpsPageAdapter::text() - Extracted text for rect" << rect << ":" << result; + + return result.simplified(); } QVector XpsPageAdapter::search(const QString &text, bool matchCase, bool wholeWords) const @@ -814,7 +872,20 @@ QList XpsPageAdapter::annotations() const QList XpsPageAdapter::words() { - return {}; + if (!m_document) { + qCWarning(appLog) << "XpsPageAdapter::words() - Invalid document"; + return {}; + } + + // 获取文档文件路径 + QString filePath = m_document->filePath(); + if (filePath.isEmpty()) { + qCWarning(appLog) << "XpsPageAdapter::words() - Empty file path"; + return {}; + } + + // 使用文本提取器提取文本 + return XpsTextExtractor::extractWords(filePath, m_pageIndex); } } // namespace deepin_reader diff --git a/reader/document/XpsDocumentAdapter.h b/reader/document/XpsDocumentAdapter.h index d55ecec9..afc7eb16 100644 --- a/reader/document/XpsDocumentAdapter.h +++ b/reader/document/XpsDocumentAdapter.h @@ -40,6 +40,8 @@ class XpsDocumentAdapter : public Document QImage renderPage(int pageIndex, int width, int height, const QRect &slice) const; QSizeF pageSize(int pageIndex) const; + QString filePath() const { return m_filePath; } + private: class Handle; diff --git a/reader/document/XpsTextExtractor.cpp b/reader/document/XpsTextExtractor.cpp new file mode 100644 index 00000000..2af729b5 --- /dev/null +++ b/reader/document/XpsTextExtractor.cpp @@ -0,0 +1,505 @@ +// Copyright (C) 2019 ~ 2025 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2025 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "XpsTextExtractor.h" + +#include "ddlog.h" + +#include +#include +#include +#include + +#include + +#ifdef signals +#pragma push_macro("signals") +#undef signals +#endif +#ifdef slots +#pragma push_macro("slots") +#undef slots +#endif + +extern "C" { +#include +#include +} + +#ifdef slots +#pragma pop_macro("slots") +#endif +#ifdef signals +#pragma pop_macro("signals") +#endif + +namespace deepin_reader { + +QList XpsTextExtractor::extractWords(const QString &filePath, int pageIndex) +{ + QList textRuns = extractTextRuns(filePath, pageIndex); + if (textRuns.isEmpty()) { + return {}; + } + + // 将TextRun转换为Word列表 + // 为了支持字符级选择,需要将每个Glyph的文本拆分成单个字符 + QList allWords; + for (const TextRun &run : textRuns) { + if (!run.glyphs.isEmpty()) { + // 使用glyph信息,但将文本拆分成单个字符 + for (const GlyphInfo &glyph : run.glyphs) { + if (glyph.text.isEmpty() || glyph.boundingBox.isEmpty()) { + continue; + } + + // 计算每个字符的边界框 + qreal currentX = glyph.position.x(); + qreal baseY = glyph.position.y(); + // 改进字符高度计算:考虑字体的ascent和descent + // 通常ascent约占字体大小的70-80%,descent约占20-30% + // 使用更合理的估算:ascent = fontSize * 0.75, descent = fontSize * 0.25 + qreal charAscent = glyph.fontSize * 0.75; + qreal charDescent = glyph.fontSize * 0.25; + qreal charHeight = charAscent + charDescent; + + for (int i = 0; i < glyph.text.length(); ++i) { + QChar ch = glyph.text.at(i); + // 优先使用Indices中的精确宽度,否则使用估算 + double charWidth; + if (i < glyph.charWidths.size() && glyph.charWidths[i] > 0) { + charWidth = glyph.charWidths[i]; + } else { + charWidth = estimateCharWidth(ch, glyph.fontSize); + } + + // 创建单个字符的边界框 + // OriginY是基线位置,所以字符顶部 = baseY - charAscent + QRectF charBaseRect(currentX, baseY - charAscent, charWidth, charHeight); + + // 应用变换矩阵 + QPolygonF charBasePolygon; + charBasePolygon << charBaseRect.topLeft() << charBaseRect.topRight() + << charBaseRect.bottomRight() << charBaseRect.bottomLeft(); + QPolygonF transformedPolygon = glyph.transform.map(charBasePolygon); + QRectF charBoundingBox = transformedPolygon.boundingRect(); + + // 创建Word对象 + Word word; + word.text = QString(ch); + word.boundingBox = charBoundingBox; + allWords.append(word); + + // 移动到下一个字符位置 + currentX += charWidth; + } + } + } else if (!run.text.isEmpty() && !run.boundingBox.isEmpty()) { + // 如果没有glyph信息,将TextRun的文本拆分成单个字符 + // 估算每个字符的宽度 + qreal avgCharWidth = run.boundingBox.width() / run.text.length(); + qreal currentX = run.boundingBox.x(); + + for (int i = 0; i < run.text.length(); ++i) { + QChar ch = run.text.at(i); + Word word; + word.text = QString(ch); + word.boundingBox = QRectF(currentX, run.boundingBox.y(), avgCharWidth, run.boundingBox.height()); + allWords.append(word); + currentX += avgCharWidth; + } + } + } + + if (allWords.isEmpty()) { + return {}; + } + + // 按位置排序(从上到下,从左到右) + std::sort(allWords.begin(), allWords.end(), [](const Word &a, const Word &b) { + // 首先按Y坐标排序(从上到下) + const double yThreshold = 5.0; // Y坐标容差 + if (qAbs(a.boundingBox.y() - b.boundingBox.y()) > yThreshold) { + return a.boundingBox.y() < b.boundingBox.y(); + } + // Y坐标相近时,按X坐标排序(从左到右) + return a.boundingBox.x() < b.boundingBox.x(); + }); + + // 对于字符级选择,保持每个字符的精确边界框,不需要统一行高 + // 直接返回排序后的字符列表 + return allWords; +} + +QList XpsTextExtractor::extractTextRuns(const QString &filePath, int pageIndex) +{ + QByteArray xmlData = readFixedPageFromZip(filePath, pageIndex); + if (xmlData.isEmpty()) { + return {}; + } + + return parseFixedPage(xmlData, pageIndex); +} + +QByteArray XpsTextExtractor::readFixedPageFromZip(const QString &filePath, int pageIndex) +{ + QString fixedPagePath = findFixedPagePath(filePath, pageIndex); + if (fixedPagePath.isEmpty()) { + return QByteArray(); + } + + struct archive *a = archive_read_new(); + struct archive_entry *entry = nullptr; + + if (!a) { + return QByteArray(); + } + + archive_read_support_format_zip(a); + archive_read_support_format_all(a); + + QByteArray utf8Path = QFile::encodeName(filePath); + int r = archive_read_open_filename(a, utf8Path.constData(), 10240); + if (r != ARCHIVE_OK) { + archive_read_free(a); + return QByteArray(); + } + + QByteArray result; + bool found = false; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + QString entryPath = QString::fromUtf8(archive_entry_pathname(entry)); + if (entryPath == fixedPagePath) { + + la_int64_t size = archive_entry_size(entry); + if (size > 0 && size < 100 * 1024 * 1024) { // 限制100MB + result.resize(static_cast(size)); + la_ssize_t readSize = archive_read_data(a, result.data(), size); + if (readSize == size) { + found = true; + } else { + // 读取失败,清理并返回 + archive_read_free(a); + return QByteArray(); + } + } + break; + } + } + + archive_read_free(a); + + if (!found) { + return QByteArray(); + } + + return result; +} + +QString XpsTextExtractor::findFixedPagePath(const QString &filePath, int pageIndex) +{ + Q_UNUSED(filePath) // 当前实现使用标准路径,未来可以解析[Content_Types].xml + + if (pageIndex < 0) { + return QString(); + } + + // XPS标准格式: Documents/1/Pages/{pageNumber}.fpage + // pageIndex从0开始,但XPS页面编号通常从1开始 + int pageNumber = pageIndex + 1; + return QStringLiteral("Documents/1/Pages/%1.fpage").arg(pageNumber); +} + +QList XpsTextExtractor::parseFixedPage(const QByteArray &xmlData, int pageIndex) +{ + Q_UNUSED(pageIndex); + QList textRuns; + QXmlStreamReader xml(xmlData); + QList transformStack; + transformStack.append(QTransform()); // 初始单位矩阵 + + // 用于跟踪当前是否在RenderTransform子元素内部 + int renderTransformDepth = 0; + + while (!xml.atEnd() && !xml.hasError()) { + QXmlStreamReader::TokenType token = xml.readNext(); + + if (token == QXmlStreamReader::StartElement) { + QString elementName = xml.name().toString(); + + if (elementName == QLatin1String("FixedPage")) { + // 检查RenderTransform属性 + QString transformStr = xml.attributes().value(QLatin1String("RenderTransform")).toString(); + if (!transformStr.isEmpty()) { + QTransform transform = parseTransformMatrix(transformStr); + transformStack.last() = transformStack.last() * transform; + } + } else if (elementName == QLatin1String("Canvas") || elementName == QLatin1String("Path")) { + // 处理嵌套变换 + QString transformStr = xml.attributes().value(QLatin1String("RenderTransform")).toString(); + if (!transformStr.isEmpty()) { + // 属性形式的RenderTransform + QTransform transform = parseTransformMatrix(transformStr); + transformStack.append(transformStack.last() * transform); + } else { + // 没有属性形式的RenderTransform,可能需要查找子元素形式 + transformStack.append(transformStack.last()); + } + } else if (elementName == QLatin1String("Canvas.RenderTransform") || + elementName == QLatin1String("Path.RenderTransform")) { + // 进入RenderTransform子元素 + renderTransformDepth++; + } else if (elementName == QLatin1String("MatrixTransform")) { + // 检查是否在Canvas.RenderTransform或Path.RenderTransform内部 + // 如果是,解析Matrix属性并应用到当前transformStack + if (renderTransformDepth > 0 && !transformStack.isEmpty()) { + QString matrixStr = xml.attributes().value(QLatin1String("Matrix")).toString(); + if (!matrixStr.isEmpty()) { + QTransform transform = parseTransformMatrix(matrixStr); + transformStack.last() = transformStack.last() * transform; + } + } + } else if (elementName == QLatin1String("Glyphs")) { + QTransform parentTransform = transformStack.isEmpty() ? QTransform() : transformStack.last(); + GlyphInfo glyph = parseGlyphs(xml, parentTransform); + if (!glyph.text.isEmpty()) { + TextRun run; + run.text = glyph.text; + run.boundingBox = glyph.boundingBox; + run.glyphs.append(glyph); + textRuns.append(run); + } + } + } else if (token == QXmlStreamReader::EndElement) { + QString elementName = xml.name().toString(); + if (elementName == QLatin1String("Canvas") || elementName == QLatin1String("Path")) { + if (transformStack.size() > 1) { + transformStack.removeLast(); + } + } else if (elementName == QLatin1String("Canvas.RenderTransform") || + elementName == QLatin1String("Path.RenderTransform")) { + // RenderTransform子元素结束 + if (renderTransformDepth > 0) { + renderTransformDepth--; + } + } + } + } + + if (xml.hasError()) { + return {}; + } + + return textRuns; +} + +XpsTextExtractor::GlyphInfo XpsTextExtractor::parseGlyphs(QXmlStreamReader &xml, const QTransform &parentTransform) +{ + GlyphInfo glyph; + glyph.transform = parentTransform; + glyph.fontSize = 12.0; // 默认字体大小 + + QXmlStreamAttributes attrs = xml.attributes(); + + // 读取UnicodeString(文本内容) + QString unicodeString = attrs.value(QLatin1String("UnicodeString")).toString(); + if (unicodeString.isEmpty()) { + // 跳过空文本的Glyphs + return glyph; + } + glyph.text = unicodeString; + + // 读取位置 + bool okX = false, okY = false; + double originX = attrs.value(QLatin1String("OriginX")).toDouble(&okX); + double originY = attrs.value(QLatin1String("OriginY")).toDouble(&okY); + if (!okX || !okY) { + return glyph; + } + glyph.position = QPointF(originX, originY); + + // 读取字体大小 + bool okSize = false; + double fontSize = attrs.value(QLatin1String("FontRenderingEmSize")).toDouble(&okSize); + if (!okSize || fontSize <= 0) { + fontSize = 12.0; // 默认值 + } + glyph.fontSize = fontSize; + + // 读取字体URI + glyph.fontUri = attrs.value(QLatin1String("FontUri")).toString(); + + // 读取Indices属性(字符级位置信息) + QString indicesStr = attrs.value(QLatin1String("Indices")).toString(); + if (!indicesStr.isEmpty()) { + // 解析Indices获取精确的字符宽度 + glyph.charWidths = parseIndices(indicesStr, unicodeString.length(), fontSize); + } else { + glyph.charWidths.clear(); + } + + // 读取RenderTransform(如果有) + QString transformStr = attrs.value(QLatin1String("RenderTransform")).toString(); + if (!transformStr.isEmpty()) { + QTransform localTransform = parseTransformMatrix(transformStr); + glyph.transform = parentTransform * localTransform; + } + + // 计算边界框 + glyph.boundingBox = calculateGlyphBoundingBox(glyph); + + return glyph; +} + +QRectF XpsTextExtractor::calculateGlyphBoundingBox(const GlyphInfo &glyph) +{ + if (glyph.text.isEmpty()) { + return QRectF(); + } + + // 估算文本宽度 + double totalWidth = 0.0; + // 优先使用Indices中的精确宽度 + if (!glyph.charWidths.isEmpty() && glyph.charWidths.size() == glyph.text.length()) { + for (int i = 0; i < glyph.charWidths.size(); ++i) { + totalWidth += glyph.charWidths[i]; + } + } else { + // 如果没有Indices,使用估算 + for (int i = 0; i < glyph.text.length(); ++i) { + totalWidth += estimateCharWidth(glyph.text.at(i), glyph.fontSize); + } + } + + // 改进文本高度计算:考虑字体的ascent和descent + // 通常ascent约占字体大小的70-80%,descent约占20-30% + double charAscent = glyph.fontSize * 0.75; + double charDescent = glyph.fontSize * 0.25; + double height = charAscent + charDescent; + + // 创建基础边界框(在文本位置) + // 注意:XPS坐标系中,OriginY是文本基线位置,所以字符顶部 = OriginY - charAscent + QRectF baseRect(glyph.position.x(), glyph.position.y() - charAscent, totalWidth, height); + + // 应用变换矩阵 + QPolygonF basePolygon; + basePolygon << baseRect.topLeft() << baseRect.topRight() + << baseRect.bottomRight() << baseRect.bottomLeft(); + QPolygonF transformedPolygon = glyph.transform.map(basePolygon); + QRectF transformedRect = transformedPolygon.boundingRect(); + + return transformedRect; +} + +QTransform XpsTextExtractor::parseTransformMatrix(const QString &transformStr) +{ + if (transformStr.isEmpty()) { + return QTransform(); + } + + // XPS变换矩阵格式: "m11,m12,m21,m22,dx,dy" + QStringList parts = transformStr.split(QLatin1Char(','), QString::SkipEmptyParts); + if (parts.size() != 6) { + return QTransform(); + } + + bool ok = false; + double m11 = parts[0].toDouble(&ok); + if (!ok) return QTransform(); + double m12 = parts[1].toDouble(&ok); + if (!ok) return QTransform(); + double m21 = parts[2].toDouble(&ok); + if (!ok) return QTransform(); + double m22 = parts[3].toDouble(&ok); + if (!ok) return QTransform(); + double dx = parts[4].toDouble(&ok); + if (!ok) return QTransform(); + double dy = parts[5].toDouble(&ok); + if (!ok) return QTransform(); + + return QTransform(m11, m12, m21, m22, dx, dy); +} + +double XpsTextExtractor::estimateCharWidth(QChar ch, double fontSize) +{ + if (fontSize <= 0) { + return 12.0; // 默认宽度 + } + + // 简单的字符宽度估算 + // 中文字符、日文、韩文等全角字符 + if (ch.unicode() >= 0x4E00 && ch.unicode() <= 0x9FFF) { // CJK统一汉字 + return fontSize; + } + if (ch.unicode() >= 0x3040 && ch.unicode() <= 0x309F) { // 平假名 + return fontSize; + } + if (ch.unicode() >= 0x30A0 && ch.unicode() <= 0x30FF) { // 片假名 + return fontSize; + } + if (ch.unicode() >= 0xAC00 && ch.unicode() <= 0xD7AF) { // 韩文 + return fontSize; + } + + // 英文字母和数字 + if ((ch >= QLatin1Char('A') && ch <= QLatin1Char('Z')) || + (ch >= QLatin1Char('a') && ch <= QLatin1Char('z'))) { + return fontSize * 0.6; + } + if (ch >= QLatin1Char('0') && ch <= QLatin1Char('9')) { + return fontSize * 0.5; + } + + // 空格 + if (ch == QLatin1Char(' ') || ch == QChar(0x00A0)) { // 普通空格或非断行空格 + return fontSize * 0.3; + } + + // 标点符号等,使用中等宽度 + if (ch.isPunct()) { + return fontSize * 0.4; + } + + // 其他字符,使用默认值 + return fontSize * 0.6; +} + +QList XpsTextExtractor::parseIndices(const QString &indicesStr, int textLength, double fontSize) +{ + QList widths; + if (indicesStr.isEmpty() || textLength <= 0) { + return widths; + } + + // XPS Indices格式:每个字符对应一个值,用分号分隔 + // 格式可能是:",100;,100;,98.864;" 或 "100;100;98.864;" + // 值表示字符的advance width(相对于字体大小的比例,单位是1/100 em) + QStringList parts = indicesStr.split(QLatin1Char(';'), QString::SkipEmptyParts); + + for (int i = 0; i < parts.size() && i < textLength; ++i) { + QString part = parts[i].trimmed(); + // 移除可能的逗号前缀 + if (part.startsWith(QLatin1Char(','))) { + part = part.mid(1); + } + + bool ok = false; + double value = part.toDouble(&ok); + if (ok && value > 0) { + // Indices中的值是相对于字体大小的比例(单位是1/100 em) + // 所以实际宽度 = fontSize * value / 100.0 + double width = fontSize * value / 100.0; + widths.append(width); + } else { + // 如果解析失败,跳过(让调用者使用估算值) + } + } + + return widths; +} + +} // namespace deepin_reader + diff --git a/reader/document/XpsTextExtractor.h b/reader/document/XpsTextExtractor.h new file mode 100644 index 00000000..c5e6dece --- /dev/null +++ b/reader/document/XpsTextExtractor.h @@ -0,0 +1,134 @@ +// Copyright (C) 2019 ~ 2025 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2025 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef XPSTEXTEXTRACTOR_H +#define XPSTEXTEXTRACTOR_H + +#include "Model.h" + +#include +#include +#include +#include +#include + +class QXmlStreamReader; + +namespace deepin_reader { + +/** + * @brief XPS文本提取器 + * 从XPS文件中提取文本和坐标信息 + */ +class XpsTextExtractor +{ +public: + /** + * @brief Glyph信息结构 + */ + struct GlyphInfo { + QString text; // 字符文本 + QPointF position; // 基础位置 (OriginX, OriginY) + QRectF boundingBox; // 字符边界框 + double fontSize; // 字体大小(点) + QString fontUri; // 字体资源URI(相对路径) + QTransform transform; // 应用的变换矩阵 + QList charWidths; // 字符宽度列表(从Indices解析,如果可用) + }; + + /** + * @brief 文本运行结构 + */ + struct TextRun { + QString text; // 完整文本 + QRectF boundingBox; // 文本运行边界框 + QList glyphs; // 字符列表 + }; + + /** + * @brief 从XPS文件路径提取指定页面的文本 + * @param filePath XPS文件路径 + * @param pageIndex 页面索引(从0开始) + * @return Word列表 + */ + static QList extractWords(const QString &filePath, int pageIndex); + + /** + * @brief 从XPS文件路径提取指定页面的文本(返回详细信息) + * @param filePath XPS文件路径 + * @param pageIndex 页面索引(从0开始) + * @return TextRun列表 + */ + static QList extractTextRuns(const QString &filePath, int pageIndex); + +private: + /** + * @brief 解析FixedPage XML文件 + * @param xmlData XML数据 + * @param pageIndex 页面索引(用于日志) + * @return TextRun列表 + */ + static QList parseFixedPage(const QByteArray &xmlData, int pageIndex); + + /** + * @brief 解析Glyphs元素 + * @param xml XML读取器(当前位置应在Glyphs元素) + * @param parentTransform 父变换矩阵 + * @return GlyphInfo结构 + */ + static GlyphInfo parseGlyphs(QXmlStreamReader &xml, const QTransform &parentTransform); + + /** + * @brief 计算字符边界框 + * @param glyph 字符信息 + * @return 边界框 + */ + static QRectF calculateGlyphBoundingBox(const GlyphInfo &glyph); + + /** + * @brief 解析变换矩阵字符串 "m11,m12,m21,m22,dx,dy" + * @param transformStr 变换矩阵字符串 + * @return QTransform对象 + */ + static QTransform parseTransformMatrix(const QString &transformStr); + + /** + * @brief 估算字符宽度(当字体不可用时) + * @param ch 字符 + * @param fontSize 字体大小 + * @return 估算的字符宽度 + */ + static double estimateCharWidth(QChar ch, double fontSize); + + /** + * @brief 解析XPS Indices属性,获取字符宽度列表 + * @param indicesStr Indices属性字符串 + * @param textLength 文本长度 + * @param fontSize 字体大小 + * @return 字符宽度列表(单位:点) + */ + static QList parseIndices(const QString &indicesStr, int textLength, double fontSize); + + /** + * @brief 从XPS ZIP包中读取FixedPage文件 + * @param filePath XPS文件路径 + * @param pageIndex 页面索引 + * @return XML数据,失败返回空QByteArray + */ + static QByteArray readFixedPageFromZip(const QString &filePath, int pageIndex); + + /** + * @brief 查找FixedPage文件路径 + * @param filePath XPS文件路径 + * @param pageIndex 页面索引 + * @return FixedPage文件在ZIP中的路径,失败返回空字符串 + */ + static QString findFixedPagePath(const QString &filePath, int pageIndex); +}; + +} // namespace deepin_reader + +#endif // XPSTEXTEXTRACTOR_H +