From 23a3e5144e9fe5329b99b26879856e9ca3db769d Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Sun, 3 Mar 2024 15:36:15 +0100 Subject: [PATCH 01/19] Fix srcdir=builddir build cmake will create folders which so we need to ignore those --- utils/po/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/po/CMakeLists.txt b/utils/po/CMakeLists.txt index d9fca3c55..5cd3a9035 100644 --- a/utils/po/CMakeLists.txt +++ b/utils/po/CMakeLists.txt @@ -1,6 +1,6 @@ file(GLOB _files "${CMAKE_CURRENT_SOURCE_DIR}/*") foreach (_dir ${_files}) - if (IS_DIRECTORY "${_dir}") + if (IS_DIRECTORY "${_dir}" AND EXISTS "${_dir}/CMakeLists.txt") add_subdirectory("${_dir}") get_filename_component(_lang ${_dir} NAME) list(APPEND _langs ${_lang}) From 481ee336d15bdf6b2b6084dddcd1617b032d2cd5 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Sun, 3 Mar 2024 12:16:11 +0100 Subject: [PATCH 02/19] poppler 24.03.0 --- CMakeLists.txt | 4 ++-- NEWS | 12 ++++++++++++ cpp/Doxyfile | 2 +- qt5/src/Doxyfile | 2 +- qt6/src/Doxyfile | 2 +- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4afb638c7..1c52b5385 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,7 @@ if (ECM_FOUND) endif() set(POPPLER_MAJOR_VERSION "24") -set(POPPLER_MINOR_VERSION_STRING "02") +set(POPPLER_MINOR_VERSION_STRING "03") # We want the string version to have 08 but the integer version can't have a leading 0 since otherwise it's considered octal # So strip a leading 0 if found in POPPLER_MINOR_VERSION_STRING and store the result in POPPLER_MINOR_VERSION string(REGEX REPLACE "^0?(.+)$" "\\1" POPPLER_MINOR_VERSION "${POPPLER_MINOR_VERSION_STRING}") @@ -614,7 +614,7 @@ ADD_GPERF_FILE(TimesItalicWidths) ADD_GPERF_FILE(TimesRomanWidths) ADD_GPERF_FILE(ZapfDingbatsWidths) -set(POPPLER_SOVERSION_NUMBER "134") +set(POPPLER_SOVERSION_NUMBER "135") set(LINKER_SCRIPT "${CMAKE_BINARY_DIR}/libpoppler.map") configure_file( diff --git a/NEWS b/NEWS index 6c10aecc7..6bce616df 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,15 @@ +Release 24.03.0: + core: + * Fix opening some malformed files. Issue #1447 + * Skip drawing image when it has singular matrix. Issue #1114 + * Fix crash on malformed files + * Small internal code cleanup + + utils: + * pdfdetach: Fix potential directory traversal + * pdfimages: Enable to print filenames to stdout. + * pdfsig: Add visible name/date when signing an existing form signature field + Release 24.02.0: core: * Fix reading some JBIG2 streams. Issue #1319 diff --git a/cpp/Doxyfile b/cpp/Doxyfile index e9ac5abf2..d04ab98d8 100644 --- a/cpp/Doxyfile +++ b/cpp/Doxyfile @@ -31,7 +31,7 @@ PROJECT_NAME = "Poppler CPP" # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 24.02.0 +PROJECT_NUMBER = 24.03.0 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/qt5/src/Doxyfile b/qt5/src/Doxyfile index f733d0a95..f8ae14f62 100644 --- a/qt5/src/Doxyfile +++ b/qt5/src/Doxyfile @@ -31,7 +31,7 @@ PROJECT_NAME = "Poppler Qt5" # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 24.02.0 +PROJECT_NUMBER = 24.03.0 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/qt6/src/Doxyfile b/qt6/src/Doxyfile index 4d52a599b..923e11bf4 100644 --- a/qt6/src/Doxyfile +++ b/qt6/src/Doxyfile @@ -31,7 +31,7 @@ PROJECT_NAME = "Poppler Qt6" # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 24.02.0 +PROJECT_NUMBER = 24.03.0 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. From 9c2cf5608a21b6fb9be4e0c7918d13cd2b652c23 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Sun, 3 Mar 2024 18:39:00 +0100 Subject: [PATCH 03/19] CI: Switch debian to testing unstable is broken at the moment --- .gitlab-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5428cf68f..bff3bc173 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,4 @@ -image: debian:unstable +image: debian:testing stages: - build @@ -6,7 +6,7 @@ stages: - publish before_script: - - echo 'deb-src http://deb.debian.org/debian unstable main' >> /etc/apt/sources.list + - echo 'deb-src http://deb.debian.org/debian testing main' >> /etc/apt/sources.list - apt-get update - apt-get build-dep --yes --no-install-recommends poppler - apt-get install --yes --no-install-recommends ninja-build libcurl4-openssl-dev git ca-certificates locales libgtk-3-dev libbrotli-dev libboost-container-dev qt6-base-dev From 99f5416d9bfb6a34bbc5cbcb368e258b98fd3f08 Mon Sep 17 00:00:00 2001 From: "Josep M. Ferrer" Date: Fri, 1 Mar 2024 15:09:08 +0100 Subject: [PATCH 04/19] pdfsig: Catalan translation --- utils/po/ca/CMakeLists.txt | 4 ++++ utils/po/ca/pdfsig.po | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 utils/po/ca/CMakeLists.txt create mode 100644 utils/po/ca/pdfsig.po diff --git a/utils/po/ca/CMakeLists.txt b/utils/po/ca/CMakeLists.txt new file mode 100644 index 000000000..3225131ce --- /dev/null +++ b/utils/po/ca/CMakeLists.txt @@ -0,0 +1,4 @@ +get_filename_component(_lang ${CMAKE_CURRENT_SOURCE_DIR} NAME) +gettext_process_po_files(${_lang} ALL INSTALL_DESTINATION ${CMAKE_INSTALL_LOCALEDIR} PO_FILES + pdfsig.po +) diff --git a/utils/po/ca/pdfsig.po b/utils/po/ca/pdfsig.po new file mode 100644 index 000000000..fe6a14e7b --- /dev/null +++ b/utils/po/ca/pdfsig.po @@ -0,0 +1,26 @@ +# Translation of pdfsig.po to Catalan +# Copyright (C) 2024, Josep M. Ferrer +# This file is distributed under the same license as the pdfsig package. +# +msgid "" +msgstr "" +"Project-Id-Version: pdfsig\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2024-02-12 14:44-0700\n" +"PO-Revision-Date: 2024-03-04 23:01+0100\n" +"Last-Translator: Josep M. Ferrer \n" +"Language-Team: Catalan \n" +"Language: ca\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=2; plural=n != 1;\n" +"X-Generator: Lokalize 22.12.3\n" + +#: pdfsig.cc:544 +msgid "Digitally signed by {0:s}" +msgstr "Signat digitalment per {0:s}" + +#: pdfsig.cc:544 +msgid "Date: {0:s}" +msgstr "Data: {0:s}" From 6a7f14bd867bc7215e1c94d27fb00c84c88a1db9 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Tue, 5 Mar 2024 00:41:43 +0100 Subject: [PATCH 05/19] Revert "CI: Switch debian to testing" This reverts commit 9c2cf5608a21b6fb9be4e0c7918d13cd2b652c23. --- .gitlab-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bff3bc173..5428cf68f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,4 @@ -image: debian:testing +image: debian:unstable stages: - build @@ -6,7 +6,7 @@ stages: - publish before_script: - - echo 'deb-src http://deb.debian.org/debian testing main' >> /etc/apt/sources.list + - echo 'deb-src http://deb.debian.org/debian unstable main' >> /etc/apt/sources.list - apt-get update - apt-get build-dep --yes --no-install-recommends poppler - apt-get install --yes --no-install-recommends ninja-build libcurl4-openssl-dev git ca-certificates locales libgtk-3-dev libbrotli-dev libboost-container-dev qt6-base-dev From 10377f9df110aaeb8d68df4d01f049d9cc9f9c21 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Wed, 27 Mar 2024 00:43:01 +0100 Subject: [PATCH 06/19] qt6: Fix crash in SoundObject::data --- qt6/src/poppler-sound.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qt6/src/poppler-sound.cc b/qt6/src/poppler-sound.cc index 2a33a494d..94248db99 100644 --- a/qt6/src/poppler-sound.cc +++ b/qt6/src/poppler-sound.cc @@ -86,7 +86,7 @@ QByteArray SoundObject::data() const QByteArray fileArray; int i; while ((i = stream->getChar()) != EOF) { - fileArray[dataLen] = (char)i; + fileArray.append((char)i); ++dataLen; } fileArray.resize(dataLen); From a10901554010bc5bbc0f24a8d14fdcdecc1b8367 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Thu, 28 Mar 2024 00:05:28 +0100 Subject: [PATCH 07/19] Update (C) --- qt5/src/poppler-sound.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qt5/src/poppler-sound.cc b/qt5/src/poppler-sound.cc index 4339ab9e7..296b5120d 100644 --- a/qt5/src/poppler-sound.cc +++ b/qt5/src/poppler-sound.cc @@ -1,6 +1,6 @@ /* poppler-sound.cc: qt interface to poppler * Copyright (C) 2006-2007, Pino Toscano - * Copyright (C) 2008, 2018, 2020, Albert Astals Cid + * Copyright (C) 2008, 2018, 2020, 2024, Albert Astals Cid * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by From 7a435135a1bfb8c3f9f5984d88bbe5dd8977335a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nelson=20Ben=C3=ADtez=20Le=C3=B3n?= Date: Sat, 23 Mar 2024 12:40:07 +0000 Subject: [PATCH 08/19] Fix regression on issue #157 Redo the fix for issue #157 which is about doing transparent selection for glyphless documents (eg. tesseract scanned documents) because it stopped working after commit 29f32a47 --- poppler/TextOutputDev.cc | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 6fe1def1a..6e6f55d6a 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -4805,6 +4805,16 @@ bool TextSelectionPainter::hasGlyphLessFont() void TextSelectionPainter::endPage() { + /* Take a shortcut for glyphless fonts (eg. Tesseract scanned documents) + * cause we just paint a transparent fill over existent text.Issue #157 */ + if (hasGlyphLessFont()) { + state->setFillOpacity(glyphlessSelectionOpacity); + out->updateFillOpacity(state); + out->fill(state); + out->endPage(); + return; + } + out->fill(state); out->saveState(state); @@ -4814,12 +4824,6 @@ void TextSelectionPainter::endPage() state->setFillColor(glyph_color); - bool usingGlyphLessFont = hasGlyphLessFont(); - /* Paint transparent selection when using tesseract glyphless font. Issue #157 */ - if (usingGlyphLessFont) { - state->setFillOpacity(glyphlessSelectionOpacity); - } - out->updateFillColor(state); for (const TextWordSelection *sel : *selectionList) { @@ -4844,13 +4848,11 @@ void TextSelectionPainter::endPage() GooString *string = new GooString((char *)sel->word->charcode, fEnd - begin); out->beginString(state, string); - if (!usingGlyphLessFont) { - for (int j = begin; j < fEnd; j++) { - if (j != begin && sel->word->charPos[j] == sel->word->charPos[j - 1]) { - continue; - } - out->drawChar(state, sel->word->textMat[j].m[4], sel->word->textMat[j].m[5], 0, 0, 0, 0, sel->word->charcode[j], 1, nullptr, 0); + for (int j = begin; j < fEnd; j++) { + if (j != begin && sel->word->charPos[j] == sel->word->charPos[j - 1]) { + continue; } + out->drawChar(state, sel->word->textMat[j].m[4], sel->word->textMat[j].m[5], 0, 0, 0, 0, sel->word->charcode[j], 1, nullptr, 0); } out->endString(state); delete string; From 9ace4f33e38fe24add87dc4e7c2a43e1441f2bec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nelson=20Ben=C3=ADtez=20Le=C3=B3n?= Date: Tue, 12 Mar 2024 21:37:46 +0000 Subject: [PATCH 09/19] Fix text search across lines between paragraphs This commit fixes the "across lines" text search feature of TextPage::findText() when the match happens from the last line of a paragraph to the first line of next paragraph. Includes tests for this bug. Fixes #1475 Fixes https://gitlab.gnome.org/GNOME/evince/-/issues/2001 --- poppler/TextOutputDev.cc | 60 +++++++++++++++++++++++--------------- qt5/tests/check_search.cpp | 7 +++++ qt6/tests/check_search.cpp | 7 +++++ 3 files changed, 50 insertions(+), 24 deletions(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 6e6f55d6a..034209ebd 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -3860,7 +3860,8 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB TextLine *line; Unicode *s2, *txt, *reordered; Unicode *p; - Unicode *nextline; + TextLine *nextline; + Unicode *nextline_txt; int nextline_len; bool nextlineAfterHyphen = false; int txtSize, m, i, j, k; @@ -3969,11 +3970,22 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB line->normalized = unicodeNormalizeNFKC(line->text, line->len, &line->normalized_len, &line->normalized_idx, true); } - if (matchAcrossLines && line->next && !line->next->normalized) { - line->next->normalized = unicodeNormalizeNFKC(line->next->text, line->next->len, &line->next->normalized_len, &line->next->normalized_idx, true); - } nextline = nullptr; + nextline_txt = nullptr; nextline_len = 0; + if (line->next) { + nextline = line->next; + } else { + // set nextline to first line of next block + int ind = i + (backward ? -1 : 1); + if ((backward && ind >= 0) || (!backward && ind < nBlocks)) { + nextline = blocks[ind]->lines; + } + } + + if (matchAcrossLines && nextline && !nextline->normalized) { + nextline->normalized = unicodeNormalizeNFKC(nextline->text, nextline->len, &nextline->normalized_len, &nextline->normalized_idx, true); + } // convert the line to uppercase m = line->normalized_len; @@ -3988,8 +4000,8 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB ignoreDiacritics = false; } - if (matchAcrossLines && line->next && !line->next->ascii_translation) { - unicodeToAscii7(line->next->normalized, line->next->normalized_len, &line->next->ascii_translation, &line->next->ascii_len, line->next->normalized_idx, &line->next->ascii_idx); + if (matchAcrossLines && nextline && !nextline->ascii_translation) { + unicodeToAscii7(nextline->normalized, nextline->normalized_len, &nextline->ascii_translation, &nextline->ascii_len, nextline->normalized_idx, &nextline->ascii_idx); } } if (!caseSensitive) { @@ -4004,11 +4016,11 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB txt[k] = unicodeToUpper(line->normalized[k]); } } - if (matchAcrossLines && line->next) { - nextline_len = ignoreDiacritics ? line->next->ascii_len : line->next->normalized_len; - nextline = (Unicode *)gmallocn(nextline_len, sizeof(Unicode)); + if (matchAcrossLines && nextline) { + nextline_len = ignoreDiacritics ? nextline->ascii_len : nextline->normalized_len; + nextline_txt = (Unicode *)gmallocn(nextline_len, sizeof(Unicode)); for (k = 0; k < nextline_len; ++k) { - nextline[k] = ignoreDiacritics ? unicodeToUpper(line->next->ascii_translation[k]) : unicodeToUpper(line->next->normalized[k]); + nextline_txt[k] = ignoreDiacritics ? unicodeToUpper(nextline->ascii_translation[k]) : unicodeToUpper(nextline->normalized[k]); } } } else { @@ -4018,20 +4030,20 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB txt = line->normalized; } - if (matchAcrossLines && line->next) { - nextline_len = ignoreDiacritics ? line->next->ascii_len : line->next->normalized_len; - nextline = ignoreDiacritics ? line->next->ascii_translation : line->next->normalized; + if (matchAcrossLines && nextline) { + nextline_len = ignoreDiacritics ? nextline->ascii_len : nextline->normalized_len; + nextline_txt = ignoreDiacritics ? nextline->ascii_translation : nextline->normalized; } } // search each position in this line j = backward ? m - len : 0; p = txt + j; - while (backward ? j >= 0 : j <= m - (nextline ? 1 : len)) { + while (backward ? j >= 0 : j <= m - (nextline_txt ? 1 : len)) { bool wholeWordStartIsOk, wholeWordEndIsOk; if (wholeWord) { wholeWordStartIsOk = j == 0 || !unicodeTypeAlphaNum(txt[j - 1]); - if (nextline) { + if (nextline_txt) { wholeWordEndIsOk = true; // word end may be in next line, so we'll check it later } else { wholeWordEndIsOk = j + len == m || !unicodeTypeAlphaNum(txt[j + len]); @@ -4048,7 +4060,7 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB bool last_char_of_search_term = k == len - 1; bool match_started = (bool)k; - if (p[k] != s2[k] || (nextline && last_char_of_line && !last_char_of_search_term)) { + if (p[k] != s2[k] || (nextline_txt && last_char_of_line && !last_char_of_search_term)) { // now check if the comparison failed at the end-of-line hyphen, // and if so, keep on comparing at the next line nextlineAfterHyphen = false; @@ -4065,7 +4077,7 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB } for (; n < nextline_len && k < len; ++k, ++n) { - if (nextline[n] != s2[k]) { + if (nextline_txt[n] != s2[k]) { if (!spaceConsumedByNewline && !n && UnicodeIsWhitespace(s2[k])) { n = -1; spaceConsumedByNewline = true; @@ -4079,9 +4091,9 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB } found_it = k == len; - if (found_it && nextline && wholeWord) { // check word end for nextline case + if (found_it && nextline_txt && wholeWord) { // check word end for nextline case if (n) { // Match ended at next line - wholeWordEndIsOk = n == nextline_len || !unicodeTypeAlphaNum(nextline[n]); + wholeWordEndIsOk = n == nextline_len || !unicodeTypeAlphaNum(nextline_txt[n]); } else { // Match ended on same line wholeWordEndIsOk = j + len == m || !unicodeTypeAlphaNum(txt[j + len]); } @@ -4102,14 +4114,14 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB int normStart, normAfterEnd; if (ignoreDiacritics) { normStart = line->ascii_idx[j]; - if (nextline) { + if (nextline_txt) { normAfterEnd = line->ascii_idx[j + k - n]; } else { normAfterEnd = line->ascii_idx[j + len - 1] + 1; } } else { normStart = line->normalized_idx[j]; - if (nextline) { + if (nextline_txt) { normAfterEnd = line->normalized_idx[j + k - n]; } else { normAfterEnd = line->normalized_idx[j + len - 1] + 1; @@ -4142,7 +4154,7 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB } if (continueMatch) { - adjustRotation(line->next, 0, n, &xMin2, &xMax2, &yMin2, &yMax2); + adjustRotation(nextline, 0, n, &xMin2, &xMax2, &yMin2, &yMax2); continueMatch->x1 = xMin2; continueMatch->y1 = yMax2; continueMatch->x2 = xMax2; @@ -4169,8 +4181,8 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB } } - if (nextline && nextline != line->next->ascii_translation && nextline != line->next->normalized) { - gfree(nextline); + if (nextline_txt && nextline_txt != nextline->ascii_translation && nextline_txt != nextline->normalized) { + gfree(nextline_txt); } } } diff --git a/qt5/tests/check_search.cpp b/qt5/tests/check_search.cpp index c9bb65e3a..be2d6bcc9 100644 --- a/qt5/tests/check_search.cpp +++ b/qt5/tests/check_search.cpp @@ -350,6 +350,13 @@ void TestSearch::testAcrossLinesSearch() QCOMPARE(page0->search(str6, l, t, r, b, direction, mode1), true); QCOMPARE(page0->search(str6, l, t, r, b, direction, mode2), true); QCOMPARE(page0->search(str6, l, t, r, b, direction, mode2W), true); + // Check for the case when next line falls in next paragraph. Issue #1475 + const QString across_block = QString::fromUtf8("emacs jose"); // clazy:exclude=qstring-allocations + QCOMPARE(page0->search(across_block, l, t, r, b, direction, empty), false); + QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode0), false); + QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode1), false); + QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode2), true); + QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode2W), true); // Now for completeness, we will match the full text of two lines const QString full2lines = QString::fromUtf8("Las pruebas se practicarán en vista pública, si bien, excepcionalmente, el Tribunal podrá acordar, mediante providencia, que determinadas pruebas se celebren fuera del acto de juicio"); diff --git a/qt6/tests/check_search.cpp b/qt6/tests/check_search.cpp index ede2d0c29..6242676b6 100644 --- a/qt6/tests/check_search.cpp +++ b/qt6/tests/check_search.cpp @@ -348,6 +348,13 @@ void TestSearch::testAcrossLinesSearch() QCOMPARE(page0->search(str6, l, t, r, b, direction, mode1), true); QCOMPARE(page0->search(str6, l, t, r, b, direction, mode2), true); QCOMPARE(page0->search(str6, l, t, r, b, direction, mode2W), true); + // Check for the case when next line falls in next paragraph. Issue #1475 + const QString across_block = QString::fromUtf8("emacs jose"); // clazy:exclude=qstring-allocations + QCOMPARE(page0->search(across_block, l, t, r, b, direction, empty), false); + QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode0), false); + QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode1), false); + QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode2), true); + QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode2W), true); // Now for completeness, we will match the full text of two lines const QString full2lines = QString::fromUtf8( From e803b3714a44001ac1e001d948ae505b24086b66 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Thu, 8 Feb 2024 00:36:51 +0100 Subject: [PATCH 10/19] Fix clang-tidy-17 "unnecessary temporary object created while calling emplace_back" Says modernize-use-emplace No need to pass the c, we will set it later so we can just use the default constructed CharCodeToUnicodeString --- poppler/CharCodeToUnicode.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/poppler/CharCodeToUnicode.cc b/poppler/CharCodeToUnicode.cc index 6035a6ac6..7c92e0ac3 100644 --- a/poppler/CharCodeToUnicode.cc +++ b/poppler/CharCodeToUnicode.cc @@ -13,7 +13,7 @@ // All changes made under the Poppler project to this file are licensed // under GPL version 2 or later // -// Copyright (C) 2006, 2008-2010, 2012, 2018-2022 Albert Astals Cid +// Copyright (C) 2006, 2008-2010, 2012, 2018-2022, 2024 Albert Astals Cid // Copyright (C) 2007 Julien Rebetez // Copyright (C) 2007 Koji Otani // Copyright (C) 2008 Michael Vrable @@ -504,7 +504,7 @@ void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) } } if (!element) { - sMap.emplace_back(CharCodeToUnicodeString { c, {} }); + sMap.emplace_back(); element = std::ref(sMap.back()); } map[c] = 0; From 835987362d9873cf98cc3f86959910ff2107a509 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Br=C3=BCns?= Date: Sun, 24 Mar 2024 00:31:52 +0100 Subject: [PATCH 11/19] Reduce TextWord space and allocation overhead Currently, the word characters are allocated as a struct of arrays, e.g. text and charcode are allocated separately. This causes some space (6 pointers, 6 malloc chunk management words (size_t/flags), alignment, ...) and runtime overhead (6 allocs/ frees per word). Changing this to an array of struct reduces this overhead. It also allows to be more conservative with allocations, as resizing is less costly, i.e. starting with a single character allocation instead of 16. It is also more efficient, as most accesses affect multiple or all attributes, i.e. values in the same or neighboring CPU cache lines. Using a std::vector instead of separate raw arrays also reduces code and manual data management. The "charPos end index" and trailing "edge" attributes are no longer stored as an additional entry entry in the array, but as dedicated data members, `charPosEnd` and `edgeEnd`. The memory saving is most notably for short words, but even for words with 16 characters there are small savings, and still less allocations (1 + 4 allocations instead of 6. Growing is fairly cheap, as the CharInfo struct is trivially copyable.) See poppler#1173. --- poppler/TextOutputDev.cc | 378 ++++++++++++++++++--------------------- poppler/TextOutputDev.h | 42 +++-- 2 files changed, 197 insertions(+), 223 deletions(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 034209ebd..03b68bc23 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -419,13 +419,6 @@ TextWord::TextWord(const GfxState *state, int rotA, double fontSizeA) { rot = rotA; fontSize = fontSizeA; - text = nullptr; - charcode = nullptr; - edge = nullptr; - charPos = nullptr; - font = nullptr; - textMat = nullptr; - len = size = 0; spaceAfter = false; next = nullptr; invisible = state->getRender() == 3; @@ -447,27 +440,14 @@ TextWord::TextWord(const GfxState *state, int rotA, double fontSizeA) link = nullptr; } -TextWord::~TextWord() -{ - gfree(text); - gfree(charcode); - gfree(edge); - gfree(charPos); - gfree(font); - gfree(textMat); -} +TextWord::~TextWord() { } void TextWord::addChar(const GfxState *state, TextFontInfo *fontA, double x, double y, double dx, double dy, int charPosA, int charLen, CharCode c, Unicode u, const Matrix &textMatA) { - ensureCapacity(len + 1); - text[len] = u; - charcode[len] = c; - charPos[len] = charPosA; - charPos[len + 1] = charPosA + charLen; - font[len] = fontA; - textMat[len] = textMatA; + chars.push_back(CharInfo { u, c, charPosA, 0.0, fontA, textMatA }); + charPosEnd = charPosA + charLen; - if (len == 0) { + if (len() == 1) { setInitialBounds(fontA, x, y); } @@ -476,43 +456,42 @@ void TextWord::addChar(const GfxState *state, TextFontInfo *fontA, double x, dou // TextPage::beginWord()) for vertical writing mode switch (rot) { case 0: - edge[len] = x - fontSize; - xMax = edge[len + 1] = x; + chars.back().edge = x - fontSize; + xMax = edgeEnd = x; break; case 1: - edge[len] = y - fontSize; - yMax = edge[len + 1] = y; + chars.back().edge = y - fontSize; + yMax = edgeEnd = y; break; case 2: - edge[len] = x + fontSize; - xMin = edge[len + 1] = x; + chars.back().edge = x + fontSize; + xMin = edgeEnd = x; break; case 3: - edge[len] = y + fontSize; - yMin = edge[len + 1] = y; + chars.back().edge = y + fontSize; + yMin = edgeEnd = y; break; } } else { // horizontal writing mode switch (rot) { case 0: - edge[len] = x; - xMax = edge[len + 1] = x + dx; + chars.back().edge = x; + xMax = edgeEnd = x + dx; break; case 1: - edge[len] = y; - yMax = edge[len + 1] = y + dy; + chars.back().edge = y; + yMax = edgeEnd = y + dy; break; case 2: - edge[len] = x; - xMin = edge[len + 1] = x + dx; + chars.back().edge = x; + xMin = edgeEnd = x + dx; break; case 3: - edge[len] = y; - yMin = edge[len + 1] = y + dy; + chars.back().edge = y; + yMin = edgeEnd = y + dy; break; } } - ++len; } void TextWord::setInitialBounds(TextFontInfo *fontA, double x, double y) @@ -604,19 +583,6 @@ void TextWord::setInitialBounds(TextFontInfo *fontA, double x, double y) } } -void TextWord::ensureCapacity(int capacity) -{ - if (capacity > size) { - size = std::max(size + 16, capacity); - text = (Unicode *)greallocn(text, size, sizeof(Unicode)); - charcode = (CharCode *)greallocn(charcode, (size + 1), sizeof(CharCode)); - edge = (double *)greallocn(edge, (size + 1), sizeof(double)); - charPos = (int *)greallocn(charPos, size + 1, sizeof(int)); - font = (TextFontInfo **)greallocn(font, size, sizeof(TextFontInfo *)); - textMat = (Matrix *)greallocn(textMat, size, sizeof(Matrix)); - } -} - struct CombiningTable { Unicode base; @@ -651,19 +617,15 @@ static Unicode getCombiningChar(Unicode u) bool TextWord::addCombining(const GfxState *state, TextFontInfo *fontA, double fontSizeA, double x, double y, double dx, double dy, int charPosA, int charLen, CharCode c, Unicode u, const Matrix &textMatA) { - if (len == 0 || wMode != 0 || fontA->getWMode() != 0) { + if (chars.empty() || wMode != 0 || fontA->getWMode() != 0) { return false; } Unicode cCurrent = getCombiningChar(u); - Unicode cPrev = getCombiningChar(text[len - 1]); - double edgeMid = (edge[len - 1] + edge[len]) / 2; - double charMid, maxScaledMidDelta, charBase, maxScaledBaseDelta; - - if (cCurrent != 0 && unicodeTypeAlphaNum(text[len - 1])) { + if (cCurrent != 0 && unicodeTypeAlphaNum(chars.back().text)) { // Current is a combining character, previous is base character - maxScaledMidDelta = fabs(edge[len] - edge[len - 1]) * combMaxMidDelta; - charMid = charBase = maxScaledBaseDelta = 0; + double maxScaledMidDelta = fabs(edgeEnd - chars.back().edge) * combMaxMidDelta; + double charMid, charBase, maxScaledBaseDelta; // Test if characters overlap if (rot == 0 || rot == 2) { @@ -676,29 +638,24 @@ bool TextWord::addCombining(const GfxState *state, TextFontInfo *fontA, double f maxScaledBaseDelta = (xMax - xMin) * combMaxBaseDelta; } + double edgeMid = (chars.back().edge + edgeEnd) / 2; if (fabs(charMid - edgeMid) >= maxScaledMidDelta || fabs(charBase - base) >= maxScaledBaseDelta) { return false; } // Add character, but don't adjust edge / bounding box because // combining character's positioning could be odd. - ensureCapacity(len + 1); - text[len] = cCurrent; - charcode[len] = c; - charPos[len] = charPosA; - charPos[len + 1] = charPosA + charLen; - font[len] = fontA; - textMat[len] = textMatA; - edge[len + 1] = edge[len]; - edge[len] = (edge[len + 1] + edge[len - 1]) / 2; - ++len; + chars.emplace_back(CharInfo { cCurrent, c, charPosA, edgeMid, fontA, textMatA }); + charPosEnd = charPosA + charLen; + return true; } + Unicode cPrev = getCombiningChar(chars.back().text); if (cPrev != 0 && unicodeTypeAlphaNum(u)) { // Previous is a combining character, current is base character - maxScaledBaseDelta = (fontA->getAscent() - fontA->getDescent()) * fontSizeA * combMaxBaseDelta; - charMid = charBase = maxScaledMidDelta = 0; + double maxScaledBaseDelta = (fontA->getAscent() - fontA->getDescent()) * fontSizeA * combMaxBaseDelta; + double charMid, charBase, maxScaledMidDelta; // Test if characters overlap if (rot == 0 || rot == 2) { @@ -711,73 +668,71 @@ bool TextWord::addCombining(const GfxState *state, TextFontInfo *fontA, double f maxScaledMidDelta = fabs(dy * combMaxMidDelta); } + double edgeMid = (chars.back().edge + edgeEnd) / 2; if (fabs(charMid - edgeMid) >= maxScaledMidDelta || fabs(charBase - base) >= maxScaledBaseDelta) { return false; } - // move combining character to after base character - ensureCapacity(len + 1); fontSize = fontSizeA; - text[len] = cPrev; - charcode[len] = charcode[len - 1]; - charPos[len] = charPosA; - charPos[len + 1] = charPosA + charLen; - font[len] = font[len - 1]; - textMat[len] = textMat[len - 1]; - - text[len - 1] = u; - charcode[len - 1] = c; - font[len - 1] = fontA; - textMat[len - 1] = textMatA; - - if (len == 1) { + // move combining character to after base character + chars.emplace_back(CharInfo { cPrev, chars.back().charcode, charPosA, edgeMid, chars.back().font, chars.back().textMat }); + + auto &lastChar = chars[chars.size() - 2]; + + charPosEnd = charPosA + charLen; + lastChar.text = u; + lastChar.charcode = c; + lastChar.font = fontA; + lastChar.textMat = textMatA; + + if (len() == 2) { setInitialBounds(fontA, x, y); } // Updated edges / bounding box because we changed the base // character. if (wMode) { + // FIXME unreachable, wMode == 0 switch (rot) { case 0: - edge[len - 1] = x - fontSize; - xMax = edge[len + 1] = x; + lastChar.edge = x - fontSize; + xMax = edgeEnd = x; break; case 1: - edge[len - 1] = y - fontSize; - yMax = edge[len + 1] = y; + lastChar.edge = y - fontSize; + yMax = edgeEnd = y; break; case 2: - edge[len - 1] = x + fontSize; - xMin = edge[len + 1] = x; + lastChar.edge = x + fontSize; + xMin = edgeEnd = x; break; case 3: - edge[len - 1] = y + fontSize; - yMin = edge[len + 1] = y; + lastChar.edge = y + fontSize; + yMin = edgeEnd = y; break; } } else { switch (rot) { case 0: - edge[len - 1] = x; - xMax = edge[len + 1] = x + dx; + lastChar.edge = x; + xMax = edgeEnd = x + dx; break; case 1: - edge[len - 1] = y; - yMax = edge[len + 1] = y + dy; + lastChar.edge = y; + yMax = edgeEnd = y + dy; break; case 2: - edge[len - 1] = x; - xMin = edge[len + 1] = x + dx; + lastChar.edge = x; + xMin = edgeEnd = x + dx; break; case 3: - edge[len - 1] = y; - yMin = edge[len + 1] = y + dy; + lastChar.edge = y; + yMin = edgeEnd = y + dy; break; } } - edge[len] = (edge[len + 1] + edge[len - 1]) / 2; - ++len; + chars.back().edge = (edgeEnd + lastChar.edge) / 2; return true; } return false; @@ -785,8 +740,6 @@ bool TextWord::addCombining(const GfxState *state, TextFontInfo *fontA, double f void TextWord::merge(TextWord *word) { - int i; - if (word->xMin < xMin) { xMin = word->xMin; } @@ -799,18 +752,9 @@ void TextWord::merge(TextWord *word) if (word->yMax > yMax) { yMax = word->yMax; } - ensureCapacity(len + word->len); - for (i = 0; i < word->len; ++i) { - text[len + i] = word->text[i]; - charcode[len + i] = word->charcode[i]; - edge[len + i] = word->edge[i]; - charPos[len + i] = word->charPos[i]; - font[len + i] = word->font[i]; - textMat[len + i] = word->textMat[i]; - } - edge[len + word->len] = word->edge[word->len]; - charPos[len + word->len] = word->charPos[word->len]; - len += word->len; + chars.insert(chars.end(), word->chars.begin(), word->chars.end()); + edgeEnd = word->edgeEnd; + charPosEnd = word->charPosEnd; } inline int TextWord::primaryCmp(const TextWord *word) const @@ -877,14 +821,13 @@ GooString *TextWord::getText() const GooString *s; const UnicodeMap *uMap; char buf[8]; - int n, i; s = new GooString(); if (!(uMap = globalParams->getTextEncoding())) { return s; } - for (i = 0; i < len; ++i) { - n = uMap->mapUnicode(text[i], buf, sizeof(buf)); + for (size_t i = 0; i < len(); ++i) { + auto n = uMap->mapUnicode(chars[i].text, buf, sizeof(buf)); s->append(buf, n); } return s; @@ -892,33 +835,39 @@ GooString *TextWord::getText() const void TextWord::getCharBBox(int charIdx, double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) const { - if (charIdx < 0 || charIdx >= len) { + if (charIdx < 0) { return; } + size_t uCharIdx = charIdx; + if (uCharIdx >= len()) { + return; + } + auto startingEdge = chars[uCharIdx].edge; + auto endingEdge = (uCharIdx + 1 == len()) ? edgeEnd : chars[charIdx + 1].edge; switch (rot) { case 0: - *xMinA = edge[charIdx]; - *xMaxA = edge[charIdx + 1]; + *xMinA = startingEdge; + *xMaxA = endingEdge; *yMinA = yMin; *yMaxA = yMax; break; case 1: *xMinA = xMin; *xMaxA = xMax; - *yMinA = edge[charIdx]; - *yMaxA = edge[charIdx + 1]; + *yMinA = startingEdge; + *yMaxA = endingEdge; break; case 2: - *xMinA = edge[charIdx + 1]; - *xMaxA = edge[charIdx]; + *xMinA = endingEdge; + *xMaxA = startingEdge; *yMinA = yMin; *yMaxA = yMax; break; case 3: *xMinA = xMin; *xMaxA = xMax; - *yMinA = edge[charIdx + 1]; - *yMaxA = edge[charIdx]; + *yMinA = endingEdge; + *yMaxA = startingEdge; break; } } @@ -1189,21 +1138,19 @@ int TextLine::cmpXY(const void *p1, const void *p2) void TextLine::coalesce(const UnicodeMap *uMap) { - TextWord *word0, *word1; double space, delta, minSpace; bool isUnicode; char buf[8]; - int i, j; if (words->next) { // compute the inter-word space threshold - if (words->len > 1 || words->next->len > 1) { + if (words->len() > 1 || words->next->len() > 1) { minSpace = 0; } else { minSpace = words->primaryDelta(words->next); - for (word0 = words->next, word1 = word0->next; word1 && minSpace > 0; word0 = word1, word1 = word0->next) { - if (word1->len > 1) { + for (auto word0 = words->next, word1 = word0->next; word1 && minSpace > 0; word0 = word1, word1 = word0->next) { + if (word1->len() > 1) { minSpace = 0; } delta = word0->primaryDelta(word1); @@ -1222,15 +1169,17 @@ void TextLine::coalesce(const UnicodeMap *uMap) } // merge words - word0 = words; - word1 = words->next; + auto word0 = words; + auto word1 = words->next; while (word1) { if (word0->primaryDelta(word1) >= space) { word0->spaceAfter = true; word0 = word1; word1 = word1->next; - } else if (word0->font[word0->len - 1] == word1->font[0] && word0->underlined == word1->underlined && fabs(word0->fontSize - word1->fontSize) < maxWordFontSizeDelta * words->fontSize - && word1->charPos[0] == word0->charPos[word0->len]) { + } else if (word0->chars.back().font == word1->chars.front().font // + && word0->underlined == word1->underlined // + && fabs(word0->fontSize - word1->fontSize) < maxWordFontSizeDelta * words->fontSize // + && word1->chars.front().charPos == word0->charPosEnd) { word0->merge(word1); word0->next = word1->next; delete word1; @@ -1245,22 +1194,22 @@ void TextLine::coalesce(const UnicodeMap *uMap) // build the line text isUnicode = uMap ? uMap->isUnicode() : false; len = 0; - for (word1 = words; word1; word1 = word1->next) { - len += word1->len; + for (auto word1 = words; word1; word1 = word1->next) { + len += word1->len(); if (word1->spaceAfter) { ++len; } } text = (Unicode *)gmallocn(len, sizeof(Unicode)); edge = (double *)gmallocn(len + 1, sizeof(double)); - i = 0; - for (word1 = words; word1; word1 = word1->next) { - for (j = 0; j < word1->len; ++j) { - text[i] = word1->text[j]; - edge[i] = word1->edge[j]; + size_t i = 0; + for (auto word1 = words; word1; word1 = word1->next) { + for (size_t j = 0; j < word1->len(); ++j) { + text[i] = word1->chars[j].text; + edge[i] = word1->chars[j].edge; ++i; } - edge[i] = word1->edge[word1->len]; + edge[i] = word1->edgeEnd; if (word1->spaceAfter) { text[i] = (Unicode)0x0020; ++i; @@ -1270,12 +1219,12 @@ void TextLine::coalesce(const UnicodeMap *uMap) // compute convertedLen and set up the col array col = (int *)gmallocn(len + 1, sizeof(int)); convertedLen = 0; - for (i = 0; i < len; ++i) { - col[i] = convertedLen; + for (int ci = 0; ci < len; ++ci) { + col[ci] = convertedLen; if (isUnicode) { ++convertedLen; } else if (uMap) { - convertedLen += uMap->mapUnicode(text[i], buf, sizeof(buf)); + convertedLen += uMap->mapUnicode(text[ci], buf, sizeof(buf)); } } col[len] = convertedLen; @@ -1690,8 +1639,14 @@ void TextBlock::coalesce(const UnicodeMap *uMap, double fixedPitch) word1 = nullptr; word2 = pool->getPool(idx1); } + TextWord *word1 = prevWord->next; + + auto equalText = [](const TextWord &w1, const TextWord &w2) -> bool { // + return std::equal(w1.chars.begin(), w1.chars.end(), w2.chars.begin(), w2.chars.end(), // + [](auto c1, auto c2) { return c1.text == c2.text; }); + }; for (; word2; word1 = word2, word2 = word2->next) { - if (word2->len == word0->len && !memcmp(word2->text, word0->text, word0->len * sizeof(Unicode))) { + if (equalText(*word0, *word2)) { switch (rot) { case 0: case 2: @@ -2713,28 +2668,28 @@ void TextPage::addChar(const GfxState *state, double x, double y, double dx, dou // character to be in a word by itself at this stage), // (4) the font size has changed // (5) the WMode changed - if (curWord && curWord->len > 0) { + if (curWord && curWord->len() > 0) { base = sp = delta = 0; // make gcc happy switch (curWord->rot) { case 0: base = y1; sp = x1 - curWord->xMax; - delta = x1 - curWord->edge[curWord->len - 1]; + delta = x1 - curWord->chars.back().edge; break; case 1: base = x1; sp = y1 - curWord->yMax; - delta = y1 - curWord->edge[curWord->len - 1]; + delta = y1 - curWord->chars.back().edge; break; case 2: base = y1; sp = curWord->xMin - x1; - delta = curWord->edge[curWord->len - 1] - x1; + delta = curWord->chars.back().edge - x1; break; case 3: base = x1; sp = curWord->yMin - y1; - delta = curWord->edge[curWord->len - 1] - y1; + delta = curWord->chars.back().edge - y1; break; } overlap = fabs(delta) < dupMaxPriDelta * curWord->fontSize && fabs(base - curWord->base) < dupMaxSecDelta * curWord->fontSize; @@ -2813,7 +2768,7 @@ void TextPage::addWord(TextWord *word) { // throw away zero-length words -- they don't have valid xMin/xMax // values, and they're useless anyway - if (word->len == 0) { + if (word->len() == 0) { delete word; return; } @@ -3309,10 +3264,10 @@ void TextPage::coalesce(bool physLayout, double fixedPitch, bool doHTML, double for (blk = blkList; blk; blk = blk->next) { for (line = blk->lines; line; line = line->next) { for (word0 = line->words; word0; word0 = word0->next) { - for (int i = 0; i < word0->len; ++i) { - if (unicodeTypeL(word0->text[i])) { + for (size_t i = 0; i < word0->len(); ++i) { + if (unicodeTypeL(word0->chars[i].text)) { ++lrCount; - } else if (unicodeTypeR(word0->text[i])) { + } else if (unicodeTypeR(word0->chars[i].text)) { --lrCount; } } @@ -4587,12 +4542,16 @@ GooString *TextSelectionDumper::getText() spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); + std::vector uText; for (i = 0; i < nLines; i++) { std::vector *lineWords = lines[i]; for (std::size_t j = 0; j < lineWords->size(); j++) { TextWordSelection *sel = (*lineWords)[j]; - page->dumpFragment(sel->word->text + sel->begin, sel->end - sel->begin, uMap, text); + uText.resize(sel->end - sel->begin); + std::transform(sel->word->chars.begin() + sel->begin, sel->word->chars.begin() + sel->end, uText.begin(), [](auto &c) { return c.text; }); + page->dumpFragment(uText.data(), uText.size(), uMap, text); + if (j < lineWords->size() - 1 && sel->word->spaceAfter) { text->append(space, spaceLen); } @@ -4838,36 +4797,39 @@ void TextSelectionPainter::endPage() out->updateFillColor(state); + GooString string; for (const TextWordSelection *sel : *selectionList) { int begin = sel->begin; while (begin < sel->end) { - TextFontInfo *font = sel->word->font[begin]; - Matrix *mat = &sel->word->textMat[begin]; + TextFontInfo *font = sel->word->chars[begin].font; + const Matrix *mat = &sel->word->chars[begin].textMat; state->setTextMat(mat->m[0], mat->m[1], mat->m[2], mat->m[3], 0, 0); state->setFont(font->gfxFont, 1); out->updateFont(state); int fEnd = begin + 1; - while (fEnd < sel->end && font->matches(sel->word->font[fEnd]) && mat->m[0] == sel->word->textMat[fEnd].m[0] && mat->m[1] == sel->word->textMat[fEnd].m[1] && mat->m[2] == sel->word->textMat[fEnd].m[2] - && mat->m[3] == sel->word->textMat[fEnd].m[3]) { + while (fEnd < sel->end && font->matches(sel->word->chars[fEnd].font) // + && mat->m[0] == sel->word->chars[fEnd].textMat.m[0] && mat->m[1] == sel->word->chars[fEnd].textMat.m[1] // + && mat->m[2] == sel->word->chars[fEnd].textMat.m[2] && mat->m[3] == sel->word->chars[fEnd].textMat.m[3]) { fEnd++; } /* The only purpose of this string is to let the output device query * it's length. Might want to change this interface later. */ - GooString *string = new GooString((char *)sel->word->charcode, fEnd - begin); - out->beginString(state, string); + string.clear(); + std::for_each(sel->word->chars.begin() + begin, sel->word->chars.begin() + fEnd, [&string](const auto c) { string.append(c.charcode); }); + out->beginString(state, &string); for (int j = begin; j < fEnd; j++) { - if (j != begin && sel->word->charPos[j] == sel->word->charPos[j - 1]) { + const auto &charJ = sel->word->chars[j]; + if (j != begin && charJ.charPos == sel->word->chars[j - 1].charPos) { continue; } - out->drawChar(state, sel->word->textMat[j].m[4], sel->word->textMat[j].m[5], 0, 0, 0, 0, sel->word->charcode[j], 1, nullptr, 0); + out->drawChar(state, charJ.textMat.m[4], charJ.textMat.m[5], 0, 0, 0, 0, charJ.charcode, 1, nullptr, 0); } out->endString(state); - delete string; begin = fEnd; } } @@ -4878,7 +4840,6 @@ void TextSelectionPainter::endPage() void TextWord::visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style) { - int i, begin, end; double mid, s1, s2; if (rot == 0 || rot == 2) { @@ -4889,10 +4850,14 @@ void TextWord::visitSelection(TextSelectionVisitor *visitor, const PDFRectangle s2 = selection->y2; } - begin = len; - end = 0; - for (i = 0; i < len; i++) { - mid = (edge[i] + edge[i + 1]) / 2; + size_t begin = len(); + size_t end = 0; + for (size_t i = 0; i < len(); i++) { + if (i + 1 < len()) { + mid = (chars[i].edge + chars[i + 1].edge) / 2; + } else { + mid = (chars[i].edge + edgeEnd) / 2; + } if (XBetweenAB(mid, s1, s2)) { if (i < begin) { begin = i; @@ -5279,7 +5244,6 @@ bool TextPage::findCharRange(int pos, int length, double *xMin, double *yMin, do double xMin0, xMax0, yMin0, yMax0; double xMin1, xMax1, yMin1, yMax1; bool first; - int i, j0, j1; if (rawOrder) { return false; @@ -5291,41 +5255,44 @@ bool TextPage::findCharRange(int pos, int length, double *xMin, double *yMin, do first = true; xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy - for (i = 0; i < nBlocks; ++i) { + for (int i = 0; i < nBlocks; ++i) { blk = blocks[i]; for (line = blk->lines; line; line = line->next) { for (word = line->words; word; word = word->next) { - if (pos < word->charPos[word->len] && pos + length > word->charPos[0]) { - for (j0 = 0; j0 < word->len && pos >= word->charPos[j0 + 1]; ++j0) { + if (pos < word->charPosEnd && pos + length > word->chars.front().charPos) { + size_t j0, j1; + for (j0 = 0; (j0 + 1) < word->len() && pos >= word->chars[j0 + 1].charPos; ++j0) { ; } - for (j1 = word->len - 1; j1 > j0 && pos + length <= word->charPos[j1]; --j1) { + for (j1 = word->len(); j1 > j0 && pos + length <= word->chars[j1].charPos; --j1) { ; } + auto startingEdge = word->chars[j0].edge; + auto endingEdge = (j1 + 1 == word->len()) ? word->edgeEnd : word->chars[j1 + 1].edge; switch (line->rot) { case 0: - xMin1 = word->edge[j0]; - xMax1 = word->edge[j1 + 1]; + xMin1 = startingEdge; + xMax1 = endingEdge; yMin1 = word->yMin; yMax1 = word->yMax; break; case 1: xMin1 = word->xMin; xMax1 = word->xMax; - yMin1 = word->edge[j0]; - yMax1 = word->edge[j1 + 1]; + yMin1 = startingEdge; + yMax1 = endingEdge; break; case 2: - xMin1 = word->edge[j1 + 1]; - xMax1 = word->edge[j0]; + xMin1 = endingEdge; + xMax1 = startingEdge; yMin1 = word->yMin; yMax1 = word->yMax; break; case 3: xMin1 = word->xMin; xMax1 = word->xMax; - yMin1 = word->edge[j1 + 1]; - yMax1 = word->edge[j0]; + yMin1 = endingEdge; + yMax1 = startingEdge; break; } if (first || xMin1 < xMin0) { @@ -5367,7 +5334,6 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, bool physLayo TextLineFrag *frag; char space[8], eol[16], eop[8]; int spaceLen, eolLen, eopLen; - GooString *s; double delta; int col, i, j, d, n; @@ -5396,11 +5362,16 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, bool physLayo // output the page in raw (content stream) order if (rawOrder) { + GooString s; + std::vector uText; + for (word = rawWords; word; word = word->next) { - s = new GooString(); - dumpFragment(word->text, word->len, uMap, s); - (*outputFunc)(outputStream, s->c_str(), s->getLength()); - delete s; + s.clear(); + uText.resize(word->len()); + std::transform(word->chars.begin(), word->chars.end(), uText.begin(), [](auto &c) { return c.text; }); + dumpFragment(uText.data(), uText.size(), uMap, &s); + (*outputFunc)(outputStream, s.c_str(), s.getLength()); + if (word->next && fabs(word->next->base - word->base) < maxIntraLineDelta * word->fontSize && word->next->xMin > word->xMax - minDupBreakOverlap * word->fontSize) { if (word->next->xMin > word->xMax + minWordSpacing * word->fontSize) { (*outputFunc)(outputStream, space, spaceLen); @@ -5454,6 +5425,7 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, bool physLayo printf("\n"); #endif + GooString s; // generate output col = 0; for (i = 0; i < nFrags; ++i) { @@ -5465,10 +5437,9 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, bool physLayo } // print the line - s = new GooString(); - col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, s); - (*outputFunc)(outputStream, s->c_str(), s->getLength()); - delete s; + s.clear(); + col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, &s); + (*outputFunc)(outputStream, s.c_str(), s.getLength()); // print one or more returns if necessary if (i == nFrags - 1 || frags[i + 1].col < col || fabs(frags[i + 1].base - frag->base) > maxIntraLineDelta * frag->line->words->fontSize) { @@ -5500,10 +5471,9 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, bool physLayo if (line->hyphenated && (line->next || blk->next)) { --n; } - s = new GooString(); - dumpFragment(line->text, n, uMap, s); - (*outputFunc)(outputStream, s->c_str(), s->getLength()); - delete s; + GooString s; + dumpFragment(line->text, n, uMap, &s); + (*outputFunc)(outputStream, s.c_str(), s.getLength()); // output a newline when a hyphen is not suppressed if (n == line->len) { (*outputFunc)(outputStream, eol, eolLen); diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index af007fa02..98e1df183 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -171,16 +171,16 @@ class POPPLER_PRIVATE_EXPORT TextWord void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style); // Get the TextFontInfo object associated with a character. - const TextFontInfo *getFontInfo(int idx) const { return font[idx]; } + const TextFontInfo *getFontInfo(int idx) const { return chars[idx].font; } // Get the next TextWord on the linked list. const TextWord *getNext() const { return next; } #ifdef TEXTOUT_WORD_LIST - int getLength() const { return len; } - const Unicode *getChar(int idx) const { return &text[idx]; } + int getLength() const { return chars.size(); } + const Unicode *getChar(int idx) const { return &chars[idx].text; } GooString *getText() const; - const GooString *getFontName(int idx) const { return font[idx]->fontName; } + const GooString *getFontName(int idx) const { return chars[idx].font->fontName; } void getColor(double *r, double *g, double *b) const { *r = colorR; @@ -197,19 +197,19 @@ class POPPLER_PRIVATE_EXPORT TextWord void getCharBBox(int charIdx, double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) const; double getFontSize() const { return fontSize; } int getRotation() const { return rot; } - int getCharPos() const { return charPos[0]; } - int getCharLen() const { return charPos[len] - charPos[0]; } + int getCharPos() const { return chars.empty() ? 0 : chars.front().charPos; } + int getCharLen() const { return chars.empty() ? 0 : chars.back().charPos - chars.front().charPos; } bool getSpaceAfter() const { return spaceAfter; } #endif bool isUnderlined() const { return underlined; } const AnnotLink *getLink() const { return link; } - double getEdge(int i) const { return edge[i]; } + double getEdge(int i) const { return chars[i].edge; } double getBaseline() const { return base; } bool hasSpaceAfter() const { return spaceAfter; } const TextWord *nextWord() const { return next; }; + auto len() const { return chars.size(); } private: - void ensureCapacity(int capacity); void setInitialBounds(TextFontInfo *fontA, double x, double y); int rot; // rotation, multiple of 90 degrees @@ -218,18 +218,22 @@ class POPPLER_PRIVATE_EXPORT TextWord double xMin, xMax; // bounding box x coordinates double yMin, yMax; // bounding box y coordinates double base; // baseline x or y coordinate - Unicode *text; // the text - CharCode *charcode; // glyph indices - double *edge; // "near" edge x or y coord of each char - // (plus one extra entry for the last char) - int *charPos; // character position (within content stream) - // of each char (plus one extra entry for - // the last char) - int len; // length of text/edge/charPos/font arrays - int size; // size of text/edge/charPos/font arrays - TextFontInfo **font; // font information for each char - Matrix *textMat; // transformation matrix for each char + double fontSize; // font size + + struct CharInfo + { + Unicode text; + CharCode charcode; + int charPos; + double edge; + TextFontInfo *font; + Matrix textMat; + }; + std::vector chars; + int charPosEnd = 0; + double edgeEnd = 0; + bool spaceAfter; // set if there is a space between this // word and the next word on the line bool underlined; From f26b292412a9266aab46deb2ce1ffc4d016cc573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Br=C3=BCns?= Date: Thu, 28 Mar 2024 02:37:09 +0100 Subject: [PATCH 12/19] Reduce worst case algorithmic complexity of TextBlock::coalesce The old algorithm restarts the inner loop for the RHS word from the beginning on each match, i.e. the worst case complexity approaches O(N^3), while O(N^2) is obviously sufficient for a pairwise compare of all words. Fortunately, O(N^2) is hardly ever happening, as the inner N is limited by a) the maxBaseIdx, b) removing duplicates from the set. For some pathological cases this changes the runtime from minutes to seconds. See poppler#1173. --- poppler/TextOutputDev.cc | 109 +++++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 49 deletions(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 03b68bc23..69a205a90 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -1611,33 +1611,33 @@ void TextBlock::addWord(TextWord *word) void TextBlock::coalesce(const UnicodeMap *uMap, double fixedPitch) { - TextWord *word0, *word1, *word2, *bestWord0, *bestWord1, *lastWord; - TextLine *line, *line0, *line1; - int poolMinBaseIdx, startBaseIdx, minBaseIdx, maxBaseIdx; - int baseIdx, bestWordBaseIdx, idx0, idx1; - double minBase, maxBase; - double fontSize, wordSpacing, delta, priDelta, secDelta; - TextLine **lineArray; - bool found, overlap; - int col1, col2; - int i, j, k; - // discard duplicated text (fake boldface, drop shadows) - for (idx0 = pool->minBaseIdx; idx0 <= pool->maxBaseIdx; ++idx0) { - word0 = pool->getPool(idx0); + for (int idx0 = pool->minBaseIdx; idx0 <= pool->maxBaseIdx; ++idx0) { + // Get the first LHS word from the pool + TextWord *word0 = pool->getPool(idx0); + while (word0) { - priDelta = dupMaxPriDelta * word0->fontSize; - secDelta = dupMaxSecDelta * word0->fontSize; - maxBaseIdx = pool->getBaseIdx(word0->base + secDelta); - found = false; - word1 = word2 = nullptr; // make gcc happy - for (idx1 = idx0; idx1 <= maxBaseIdx; ++idx1) { - if (idx1 == idx0) { - word1 = word0; - word2 = word0->next; + double priDelta = dupMaxPriDelta * word0->fontSize; + double secDelta = dupMaxSecDelta * word0->fontSize; + double xDelta = ((rot == 0) || (rot == 2)) ? priDelta : secDelta; + double yDelta = ((rot == 0) || (rot == 2)) ? secDelta : priDelta; + + int maxBaseIdx = pool->getBaseIdx(word0->base + secDelta); + + for (int idx1 = idx0; idx1 <= maxBaseIdx; idx1++) { + TextWord *prevWord; + /* In case the RHS word is from the same pool as the LHS word, + * start the inner loop with the word following the LHS word. + * Otherwise, start with the second word from the subsequent pools + * - the first word is compared at the end. + */ + if (idx0 == idx1) { + prevWord = word0; } else { - word1 = nullptr; - word2 = pool->getPool(idx1); + prevWord = pool->getPool(idx1); + if (!prevWord) { + continue; + } } TextWord *word1 = prevWord->next; @@ -1645,40 +1645,51 @@ void TextBlock::coalesce(const UnicodeMap *uMap, double fixedPitch) return std::equal(w1.chars.begin(), w1.chars.end(), w2.chars.begin(), w2.chars.end(), // [](auto c1, auto c2) { return c1.text == c2.text; }); }; - for (; word2; word1 = word2, word2 = word2->next) { - if (equalText(*word0, *word2)) { - switch (rot) { - case 0: - case 2: - found = fabs(word0->xMin - word2->xMin) < priDelta && fabs(word0->xMax - word2->xMax) < priDelta && fabs(word0->yMin - word2->yMin) < secDelta && fabs(word0->yMax - word2->yMax) < secDelta; - break; - case 1: - case 3: - found = fabs(word0->xMin - word2->xMin) < secDelta && fabs(word0->xMax - word2->xMax) < secDelta && fabs(word0->yMin - word2->yMin) < priDelta && fabs(word0->yMax - word2->yMax) < priDelta; - break; - } + auto match = [&equalText, xDelta, yDelta](const TextWord &w1, const TextWord &w2) -> bool { + if (!equalText(w1, w2)) { + return false; } - if (found) { - break; + return fabs(w1.xMin - w2.xMin) < xDelta && fabs(w1.xMax - w2.xMax) < xDelta // + && fabs(w1.yMin - w2.yMin) < yDelta && fabs(w1.yMax - w2.yMax) < yDelta; + }; + + while (word1) { + if (match(*word0, *word1)) { + prevWord->next = word1->next; + delete word1; + word1 = prevWord->next; + } else { + prevWord = word1; + word1 = word1->next; } } - if (found) { - break; + + // Check the first word from each subsequent pool + if (idx0 != idx1) { + word1 = pool->getPool(idx1); } - } - if (found) { - if (word1) { - word1->next = word2->next; - } else { - pool->setPool(idx1, word2->next); + if (word1 && match(*word0, *word1)) { + pool->setPool(idx1, word1->next); + delete word1; } - delete word2; - } else { - word0 = word0->next; } + + word0 = word0->next; } } + TextWord *word0, *word1; + TextWord *bestWord0, *bestWord1, *lastWord; + TextLine *line, *line0, *line1; + TextLine **lineArray; + int poolMinBaseIdx, startBaseIdx, minBaseIdx, maxBaseIdx; + int baseIdx, bestWordBaseIdx; + double minBase, maxBase; + double fontSize, wordSpacing, delta; + bool overlap; + int col1, col2; + int i, j, k; + // build the lines curLine = nullptr; poolMinBaseIdx = pool->minBaseIdx; From d7e54f88c981f26b9477b2330070dccbdbbbdb55 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Sun, 31 Mar 2024 11:42:11 +0200 Subject: [PATCH 13/19] Update (C) --- poppler/TextOutputDev.cc | 3 ++- poppler/TextOutputDev.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 69a205a90..8ba60451c 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -39,13 +39,14 @@ // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, . Work sponsored by the LiMux project of the city of Munich // Copyright (C) 2018 Sanchit Anand // Copyright (C) 2018 Adam Reichold -// Copyright (C) 2018-2022 Nelson Benítez León +// Copyright (C) 2018-2022, 2024 Nelson Benítez León // Copyright (C) 2019 Christian Persch // Copyright (C) 2019, 2022 Oliver Sander // Copyright (C) 2019 Dan Shea // Copyright (C) 2021 Peter Williams // Copyright (C) 2024 Adam Sampson // Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela +// Copyright (C) 2024 Stefan Brüns // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index 98e1df183..d59814217 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -27,6 +27,7 @@ // Copyright (C) 2019, 2022 Oliver Sander // Copyright (C) 2019 Dan Shea // Copyright (C) 2020 Suzuki Toshiya +// Copyright (C) 2024 Stefan Brüns // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git From ec2427b7cb92cda6cd7bc9b1d3117552a65d518e Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Sun, 31 Mar 2024 16:32:36 +0200 Subject: [PATCH 14/19] CI: Use fedora 40 --- .gitlab-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5428cf68f..560689b6c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -85,9 +85,9 @@ build_ubuntu_20_04: - ninja - ctest --output-on-failure -build_mingw64_fedora39: +build_mingw64_fedora40: stage: build - image: fedora:39 + image: fedora:40 before_script: - dnf install -y 'dnf-command(builddep)' - dnf builddep -y mingw64-poppler From 80e0519077f74e2591d45e70801f6c3f2fe2c362 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Sun, 31 Mar 2024 17:02:34 +0200 Subject: [PATCH 15/19] qt: Include less files --- qt5/tests/check_actualtext.cpp | 2 +- qt5/tests/check_annotations.cpp | 2 +- qt5/tests/check_attachments.cpp | 2 +- qt5/tests/check_cidfontswidthsbuilder.cpp | 2 +- qt5/tests/check_dateConversion.cpp | 2 +- qt5/tests/check_distinguished_name_parser.cpp | 2 +- qt5/tests/check_fonts.cpp | 2 +- qt5/tests/check_forms.cpp | 2 +- qt5/tests/check_goostring.cpp | 2 +- qt5/tests/check_internal_outline.cpp | 3 ++- qt5/tests/check_lexer.cpp | 2 +- qt5/tests/check_links.cpp | 2 +- qt5/tests/check_metadata.cpp | 2 +- qt5/tests/check_object.cpp | 2 +- qt5/tests/check_optcontent.cpp | 2 +- qt5/tests/check_outline.cpp | 2 +- qt5/tests/check_overprint.cpp | 2 +- qt5/tests/check_pagelabelinfo.cpp | 2 +- qt5/tests/check_pagelayout.cpp | 2 +- qt5/tests/check_pagemode.cpp | 2 +- qt5/tests/check_password.cpp | 2 +- qt5/tests/check_permissions.cpp | 2 +- qt5/tests/check_search.cpp | 2 +- qt5/tests/check_signature_basics.cpp | 2 +- qt5/tests/check_strings.cpp | 2 +- qt5/tests/check_stroke_opacity.cpp | 2 +- qt5/tests/check_utf8document.cpp | 2 +- qt5/tests/check_utf_conversion.cpp | 2 +- qt6/tests/check_actualtext.cpp | 2 +- qt6/tests/check_annotations.cpp | 2 +- qt6/tests/check_attachments.cpp | 2 +- qt6/tests/check_cidfontswidthsbuilder.cpp | 2 +- qt6/tests/check_dateConversion.cpp | 2 +- qt6/tests/check_distinguished_name_parser.cpp | 2 +- qt6/tests/check_fonts.cpp | 2 +- qt6/tests/check_forms.cpp | 2 +- qt6/tests/check_goostring.cpp | 2 +- qt6/tests/check_internal_outline.cpp | 3 ++- qt6/tests/check_lexer.cpp | 2 +- qt6/tests/check_links.cpp | 2 +- qt6/tests/check_metadata.cpp | 2 +- qt6/tests/check_object.cpp | 2 +- qt6/tests/check_optcontent.cpp | 2 +- qt6/tests/check_outline.cpp | 2 +- qt6/tests/check_overprint.cpp | 2 +- qt6/tests/check_pagelabelinfo.cpp | 2 +- qt6/tests/check_pagelayout.cpp | 2 +- qt6/tests/check_pagemode.cpp | 2 +- qt6/tests/check_password.cpp | 2 +- qt6/tests/check_permissions.cpp | 2 +- qt6/tests/check_search.cpp | 2 +- qt6/tests/check_signature_basics.cpp | 2 +- qt6/tests/check_strings.cpp | 2 +- qt6/tests/check_stroke_opacity.cpp | 2 +- qt6/tests/check_utf8document.cpp | 2 +- qt6/tests/check_utf_conversion.cpp | 2 +- 56 files changed, 58 insertions(+), 56 deletions(-) diff --git a/qt5/tests/check_actualtext.cpp b/qt5/tests/check_actualtext.cpp index 709bb3b55..f1128ee26 100644 --- a/qt5/tests/check_actualtext.cpp +++ b/qt5/tests/check_actualtext.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_annotations.cpp b/qt5/tests/check_annotations.cpp index 0e8e88e4c..6c3f1b61d 100644 --- a/qt5/tests/check_annotations.cpp +++ b/qt5/tests/check_annotations.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/qt5/tests/check_attachments.cpp b/qt5/tests/check_attachments.cpp index 8e250dfa7..4a7359c4f 100644 --- a/qt5/tests/check_attachments.cpp +++ b/qt5/tests/check_attachments.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_cidfontswidthsbuilder.cpp b/qt5/tests/check_cidfontswidthsbuilder.cpp index 94c6d52ac..19869341f 100644 --- a/qt5/tests/check_cidfontswidthsbuilder.cpp +++ b/qt5/tests/check_cidfontswidthsbuilder.cpp @@ -9,7 +9,7 @@ #include "CIDFontsWidthsBuilder.h" -#include +#include class TestCIDFontsWidthsBuilder : public QObject { diff --git a/qt5/tests/check_dateConversion.cpp b/qt5/tests/check_dateConversion.cpp index acbd4b87c..e6abc84ee 100644 --- a/qt5/tests/check_dateConversion.cpp +++ b/qt5/tests/check_dateConversion.cpp @@ -1,4 +1,4 @@ -#include +#include Q_DECLARE_METATYPE(QDate) Q_DECLARE_METATYPE(QTime) diff --git a/qt5/tests/check_distinguished_name_parser.cpp b/qt5/tests/check_distinguished_name_parser.cpp index 48d311656..2628cbe6b 100644 --- a/qt5/tests/check_distinguished_name_parser.cpp +++ b/qt5/tests/check_distinguished_name_parser.cpp @@ -8,7 +8,7 @@ //======================================================================== #include "DistinguishedNameParser.h" -#include +#include #include class TestDistinguishedNameParser : public QObject diff --git a/qt5/tests/check_fonts.cpp b/qt5/tests/check_fonts.cpp index f3df1b203..bb7983d28 100644 --- a/qt5/tests/check_fonts.cpp +++ b/qt5/tests/check_fonts.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_forms.cpp b/qt5/tests/check_forms.cpp index 0695936ca..68530bb30 100644 --- a/qt5/tests/check_forms.cpp +++ b/qt5/tests/check_forms.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/qt5/tests/check_goostring.cpp b/qt5/tests/check_goostring.cpp index cfb14c2a3..a82b5b166 100644 --- a/qt5/tests/check_goostring.cpp +++ b/qt5/tests/check_goostring.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include "goo/GooString.h" diff --git a/qt5/tests/check_internal_outline.cpp b/qt5/tests/check_internal_outline.cpp index 5db6bf4d2..5be368aae 100644 --- a/qt5/tests/check_internal_outline.cpp +++ b/qt5/tests/check_internal_outline.cpp @@ -1,4 +1,5 @@ -#include +#include +#include #include "Outline.h" #include "PDFDoc.h" diff --git a/qt5/tests/check_lexer.cpp b/qt5/tests/check_lexer.cpp index 9c60962ad..8b7a46f24 100644 --- a/qt5/tests/check_lexer.cpp +++ b/qt5/tests/check_lexer.cpp @@ -1,4 +1,4 @@ -#include +#include #include "Object.h" #include "Lexer.h" diff --git a/qt5/tests/check_links.cpp b/qt5/tests/check_links.cpp index 314690149..04eeb9b66 100644 --- a/qt5/tests/check_links.cpp +++ b/qt5/tests/check_links.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_metadata.cpp b/qt5/tests/check_metadata.cpp index 8c87e419d..235a4ea2f 100644 --- a/qt5/tests/check_metadata.cpp +++ b/qt5/tests/check_metadata.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_object.cpp b/qt5/tests/check_object.cpp index 08a7f7502..cadc006e9 100644 --- a/qt5/tests/check_object.cpp +++ b/qt5/tests/check_object.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include "poppler/Object.h" diff --git a/qt5/tests/check_optcontent.cpp b/qt5/tests/check_optcontent.cpp index e38b9b690..9f3c68175 100644 --- a/qt5/tests/check_optcontent.cpp +++ b/qt5/tests/check_optcontent.cpp @@ -1,4 +1,4 @@ -#include +#include #include "PDFDoc.h" #include "GlobalParams.h" diff --git a/qt5/tests/check_outline.cpp b/qt5/tests/check_outline.cpp index bc12bc93c..92eb52967 100644 --- a/qt5/tests/check_outline.cpp +++ b/qt5/tests/check_outline.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_overprint.cpp b/qt5/tests/check_overprint.cpp index 3035c6520..c0d965a4e 100644 --- a/qt5/tests/check_overprint.cpp +++ b/qt5/tests/check_overprint.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_pagelabelinfo.cpp b/qt5/tests/check_pagelabelinfo.cpp index a8e0f9efb..f45afa59d 100644 --- a/qt5/tests/check_pagelabelinfo.cpp +++ b/qt5/tests/check_pagelabelinfo.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_pagelayout.cpp b/qt5/tests/check_pagelayout.cpp index 20c2f0f2f..3c134d6ad 100644 --- a/qt5/tests/check_pagelayout.cpp +++ b/qt5/tests/check_pagelayout.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_pagemode.cpp b/qt5/tests/check_pagemode.cpp index 9a0b5e93c..78cbc0cd3 100644 --- a/qt5/tests/check_pagemode.cpp +++ b/qt5/tests/check_pagemode.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_password.cpp b/qt5/tests/check_password.cpp index 4207c5eb9..b58fe1cf8 100644 --- a/qt5/tests/check_password.cpp +++ b/qt5/tests/check_password.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_permissions.cpp b/qt5/tests/check_permissions.cpp index ceaf4d66a..d6ea7b4f7 100644 --- a/qt5/tests/check_permissions.cpp +++ b/qt5/tests/check_permissions.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_search.cpp b/qt5/tests/check_search.cpp index be2d6bcc9..d93e851e4 100644 --- a/qt5/tests/check_search.cpp +++ b/qt5/tests/check_search.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt5/tests/check_signature_basics.cpp b/qt5/tests/check_signature_basics.cpp index 635e04c99..970b127be 100644 --- a/qt5/tests/check_signature_basics.cpp +++ b/qt5/tests/check_signature_basics.cpp @@ -12,7 +12,7 @@ // Note that this does not check the actual validity because // that will have an expiry date, and adding time bombs to unit tests is // probably not a good idea. -#include +#include #include "PDFDoc.h" #include "GlobalParams.h" #include "SignatureInfo.h" diff --git a/qt5/tests/check_strings.cpp b/qt5/tests/check_strings.cpp index 5772fd8af..5f18b42f9 100644 --- a/qt5/tests/check_strings.cpp +++ b/qt5/tests/check_strings.cpp @@ -17,7 +17,7 @@ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ -#include +#include #include #include diff --git a/qt5/tests/check_stroke_opacity.cpp b/qt5/tests/check_stroke_opacity.cpp index 8357f626e..50b4b7d79 100644 --- a/qt5/tests/check_stroke_opacity.cpp +++ b/qt5/tests/check_stroke_opacity.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include diff --git a/qt5/tests/check_utf8document.cpp b/qt5/tests/check_utf8document.cpp index 3641eb5bf..32c95f644 100644 --- a/qt5/tests/check_utf8document.cpp +++ b/qt5/tests/check_utf8document.cpp @@ -1,4 +1,4 @@ -#include +#include #include "PDFDoc.h" #include "GlobalParams.h" diff --git a/qt5/tests/check_utf_conversion.cpp b/qt5/tests/check_utf_conversion.cpp index 4f1728b03..ab1698831 100644 --- a/qt5/tests/check_utf_conversion.cpp +++ b/qt5/tests/check_utf_conversion.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include diff --git a/qt6/tests/check_actualtext.cpp b/qt6/tests/check_actualtext.cpp index d2eff79a6..1faf082eb 100644 --- a/qt6/tests/check_actualtext.cpp +++ b/qt6/tests/check_actualtext.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_annotations.cpp b/qt6/tests/check_annotations.cpp index ca00d4722..bf9d24444 100644 --- a/qt6/tests/check_annotations.cpp +++ b/qt6/tests/check_annotations.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/qt6/tests/check_attachments.cpp b/qt6/tests/check_attachments.cpp index 319ec4eec..74b55117d 100644 --- a/qt6/tests/check_attachments.cpp +++ b/qt6/tests/check_attachments.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_cidfontswidthsbuilder.cpp b/qt6/tests/check_cidfontswidthsbuilder.cpp index 94c6d52ac..19869341f 100644 --- a/qt6/tests/check_cidfontswidthsbuilder.cpp +++ b/qt6/tests/check_cidfontswidthsbuilder.cpp @@ -9,7 +9,7 @@ #include "CIDFontsWidthsBuilder.h" -#include +#include class TestCIDFontsWidthsBuilder : public QObject { diff --git a/qt6/tests/check_dateConversion.cpp b/qt6/tests/check_dateConversion.cpp index d675a405c..331dcf368 100644 --- a/qt6/tests/check_dateConversion.cpp +++ b/qt6/tests/check_dateConversion.cpp @@ -1,4 +1,4 @@ -#include +#include Q_DECLARE_METATYPE(QDate) Q_DECLARE_METATYPE(QTime) diff --git a/qt6/tests/check_distinguished_name_parser.cpp b/qt6/tests/check_distinguished_name_parser.cpp index 95d84e98d..d79b4d6a9 100644 --- a/qt6/tests/check_distinguished_name_parser.cpp +++ b/qt6/tests/check_distinguished_name_parser.cpp @@ -8,7 +8,7 @@ //======================================================================== #include "DistinguishedNameParser.h" -#include +#include #include class TestDistinguishedNameParser : public QObject diff --git a/qt6/tests/check_fonts.cpp b/qt6/tests/check_fonts.cpp index a3f0f9134..77b8f7e57 100644 --- a/qt6/tests/check_fonts.cpp +++ b/qt6/tests/check_fonts.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_forms.cpp b/qt6/tests/check_forms.cpp index 50f2f8984..ba6e08034 100644 --- a/qt6/tests/check_forms.cpp +++ b/qt6/tests/check_forms.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/qt6/tests/check_goostring.cpp b/qt6/tests/check_goostring.cpp index cfb14c2a3..a82b5b166 100644 --- a/qt6/tests/check_goostring.cpp +++ b/qt6/tests/check_goostring.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include "goo/GooString.h" diff --git a/qt6/tests/check_internal_outline.cpp b/qt6/tests/check_internal_outline.cpp index d23e773e3..05db72f2e 100644 --- a/qt6/tests/check_internal_outline.cpp +++ b/qt6/tests/check_internal_outline.cpp @@ -1,4 +1,5 @@ -#include +#include +#include #include "Outline.h" #include "PDFDoc.h" diff --git a/qt6/tests/check_lexer.cpp b/qt6/tests/check_lexer.cpp index 9c60962ad..8b7a46f24 100644 --- a/qt6/tests/check_lexer.cpp +++ b/qt6/tests/check_lexer.cpp @@ -1,4 +1,4 @@ -#include +#include #include "Object.h" #include "Lexer.h" diff --git a/qt6/tests/check_links.cpp b/qt6/tests/check_links.cpp index 04075c529..cb9981b82 100644 --- a/qt6/tests/check_links.cpp +++ b/qt6/tests/check_links.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_metadata.cpp b/qt6/tests/check_metadata.cpp index e50080093..53098cf90 100644 --- a/qt6/tests/check_metadata.cpp +++ b/qt6/tests/check_metadata.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_object.cpp b/qt6/tests/check_object.cpp index 08a7f7502..cadc006e9 100644 --- a/qt6/tests/check_object.cpp +++ b/qt6/tests/check_object.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include "poppler/Object.h" diff --git a/qt6/tests/check_optcontent.cpp b/qt6/tests/check_optcontent.cpp index 37a737ba6..f36290afe 100644 --- a/qt6/tests/check_optcontent.cpp +++ b/qt6/tests/check_optcontent.cpp @@ -1,4 +1,4 @@ -#include +#include #include "PDFDoc.h" #include "GlobalParams.h" diff --git a/qt6/tests/check_outline.cpp b/qt6/tests/check_outline.cpp index 8f7383e16..f497968c7 100644 --- a/qt6/tests/check_outline.cpp +++ b/qt6/tests/check_outline.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_overprint.cpp b/qt6/tests/check_overprint.cpp index 1b5515677..c70310da5 100644 --- a/qt6/tests/check_overprint.cpp +++ b/qt6/tests/check_overprint.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_pagelabelinfo.cpp b/qt6/tests/check_pagelabelinfo.cpp index a8e0f9efb..f45afa59d 100644 --- a/qt6/tests/check_pagelabelinfo.cpp +++ b/qt6/tests/check_pagelabelinfo.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_pagelayout.cpp b/qt6/tests/check_pagelayout.cpp index e17b293e6..a4d951314 100644 --- a/qt6/tests/check_pagelayout.cpp +++ b/qt6/tests/check_pagelayout.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_pagemode.cpp b/qt6/tests/check_pagemode.cpp index f20548d08..0c4551bdc 100644 --- a/qt6/tests/check_pagemode.cpp +++ b/qt6/tests/check_pagemode.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_password.cpp b/qt6/tests/check_password.cpp index f473fdc9b..b788fd0a2 100644 --- a/qt6/tests/check_password.cpp +++ b/qt6/tests/check_password.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_permissions.cpp b/qt6/tests/check_permissions.cpp index fde191610..ecbb9f90f 100644 --- a/qt6/tests/check_permissions.cpp +++ b/qt6/tests/check_permissions.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_search.cpp b/qt6/tests/check_search.cpp index 6242676b6..6c19e6eaa 100644 --- a/qt6/tests/check_search.cpp +++ b/qt6/tests/check_search.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/qt6/tests/check_signature_basics.cpp b/qt6/tests/check_signature_basics.cpp index 9a9955c28..e256ea343 100644 --- a/qt6/tests/check_signature_basics.cpp +++ b/qt6/tests/check_signature_basics.cpp @@ -12,7 +12,7 @@ // Note that this does not check the actual validity because // that will have an expiry date, and adding time bombs to unit tests is // probably not a good idea. -#include +#include #include "PDFDoc.h" #include "GlobalParams.h" #include "SignatureInfo.h" diff --git a/qt6/tests/check_strings.cpp b/qt6/tests/check_strings.cpp index 0fe8a7ee3..2157f2e6b 100644 --- a/qt6/tests/check_strings.cpp +++ b/qt6/tests/check_strings.cpp @@ -17,7 +17,7 @@ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ -#include +#include #include #include diff --git a/qt6/tests/check_stroke_opacity.cpp b/qt6/tests/check_stroke_opacity.cpp index 9ece677a9..fa69ad1cf 100644 --- a/qt6/tests/check_stroke_opacity.cpp +++ b/qt6/tests/check_stroke_opacity.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include diff --git a/qt6/tests/check_utf8document.cpp b/qt6/tests/check_utf8document.cpp index cedf634ae..c58ec0be9 100644 --- a/qt6/tests/check_utf8document.cpp +++ b/qt6/tests/check_utf8document.cpp @@ -1,4 +1,4 @@ -#include +#include #include "PDFDoc.h" #include "GlobalParams.h" diff --git a/qt6/tests/check_utf_conversion.cpp b/qt6/tests/check_utf_conversion.cpp index 7cb51e542..f4b3cb835 100644 --- a/qt6/tests/check_utf_conversion.cpp +++ b/qt6/tests/check_utf_conversion.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include From 1446d1fd2186e6573e737406c02a54c36fe1ad71 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Sun, 31 Mar 2024 17:12:43 +0200 Subject: [PATCH 16/19] CI: Use Fedora for clazy, it's newer and built against a newer clang --- .gitlab-ci.yml | 10 +++++++--- poppler/FlateEncoder.cc | 2 +- qt6/tests/check_search.cpp | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 560689b6c..313a3e18e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -97,12 +97,16 @@ build_mingw64_fedora40: - mingw64-cmake -DENABLE_NSS3=OFF -DENABLE_GPGME=OFF -G Ninja .. - ninja -build_clazy_clang16: +build_clazy_fedora40: stage: build + image: fedora:40 + before_script: + - dnf install -y 'dnf-command(builddep)' + - dnf builddep -y poppler + - dnf -y install clazy ninja-build glibc-langpack-en script: - - apt-get install --yes --no-install-recommends clazy clang-16 - mkdir -p build && cd build - - CC=clang-16 CXX=clazy CXXFLAGS="-Werror -Wno-deprecated-declarations" cmake -DENABLE_GPGME=OFF -G Ninja .. + - CC=clang-18 CXX=clazy CXXFLAGS="-Werror -Wno-deprecated-declarations" cmake -G Ninja .. - CLAZY_CHECKS="level0,level1,level2,isempty-vs-count,qhash-with-char-pointer-key,tr-non-literal,no-non-pod-global-static" ninja -j ${FDO_CI_CONCURRENT} build_qt5_android: diff --git a/poppler/FlateEncoder.cc b/poppler/FlateEncoder.cc index 0786565d8..0ae1d5287 100644 --- a/poppler/FlateEncoder.cc +++ b/poppler/FlateEncoder.cc @@ -31,7 +31,7 @@ FlateEncoder::FlateEncoder(Stream *strA) : FilterStream(strA) // -Wzero-as-null-pointer-constant warning. // For safety, check that the Z_NULL definition is equivalent to // 0 / null pointer. - static_assert(Z_NULL == 0); + static_assert(static_cast(Z_NULL) == 0); zlib_stream.zalloc = nullptr; zlib_stream.zfree = nullptr; zlib_stream.opaque = nullptr; diff --git a/qt6/tests/check_search.cpp b/qt6/tests/check_search.cpp index 6c19e6eaa..384088f85 100644 --- a/qt6/tests/check_search.cpp +++ b/qt6/tests/check_search.cpp @@ -357,8 +357,8 @@ void TestSearch::testAcrossLinesSearch() QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode2W), true); // Now for completeness, we will match the full text of two lines - const QString full2lines = QString::fromUtf8( - "Las pruebas se practicarán en vista pública, si bien, excepcionalmente, el Tribunal podrá acordar, mediante providencia, que determinadas pruebas se celebren fuera del acto de juicio"); // clazy:exclude=qstring-allocations + const QString full2lines = QString::fromUtf8( // clazy:exclude=qstring-allocations + "Las pruebas se practicarán en vista pública, si bien, excepcionalmente, el Tribunal podrá acordar, mediante providencia, que determinadas pruebas se celebren fuera del acto de juicio"); QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode0), true); QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode1), true); QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode2), true); From d8ae3ba659c71b4f5913ecf74d525461bb831366 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Sun, 4 Feb 2024 14:06:00 +0100 Subject: [PATCH 17/19] Require C++20 I want to use std::string::starts_with --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c52b5385..95a105e14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,7 +47,7 @@ string(REGEX REPLACE "^0?(.+)$" "\\1" POPPLER_MINOR_VERSION "${POPPLER_MINOR_VER set(POPPLER_MICRO_VERSION "0") set(POPPLER_VERSION "${POPPLER_MAJOR_VERSION}.${POPPLER_MINOR_VERSION_STRING}.${POPPLER_MICRO_VERSION}") -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_C_STANDARD 11) From 0d5431dba10c531b6e6897e088c1c6ccec84ef55 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Sun, 31 Mar 2024 17:50:50 +0200 Subject: [PATCH 18/19] CI: Update the hack to change c++ standard to 23 in the clang build --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 313a3e18e..a80059af6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -50,7 +50,7 @@ build_clang16_libcpp: stage: build script: - echo "We want to compile with C++23 here because it has some nice things like deleted std::string nullptr constructor" - - sed -i -e "s@CMAKE_CXX_STANDARD 17@CMAKE_CXX_STANDARD 23@g" CMakeLists.txt + - sed -i -e "s@CMAKE_CXX_STANDARD 20@CMAKE_CXX_STANDARD 23@g" CMakeLists.txt - git clone --branch ${CI_COMMIT_REF_NAME} --depth 1 ${TEST_DATA_URL} test-data || git clone --depth 1 ${UPSTREAM_TEST_DATA_URL} test-data - apt-get install --yes --no-install-recommends libclang-16-dev llvm-16-dev libc++-16-dev libc++abi-16-dev clang-tidy-16 clang-16 libunwind-16-dev gperf jq - srcdir=`pwd` && mkdir -p /tmp/poppler_build && cd /tmp/poppler_build From 199ccde44be1879bf5caa8bcbc050c1ef0327849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nelson=20Ben=C3=ADtez=20Le=C3=B3n?= Date: Sat, 9 Mar 2024 00:48:59 +0000 Subject: [PATCH 19/19] reset clipping path when the state is restored According to the specification, see NOTE 2 in https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf#G7.3882161 it appears that the clipping path should be reset when the restore (Q) operator is encountered. Fixes #739 --- poppler/Gfx.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/poppler/Gfx.cc b/poppler/Gfx.cc index f1c69b7cb..2db12d882 100644 --- a/poppler/Gfx.cc +++ b/poppler/Gfx.cc @@ -5380,6 +5380,7 @@ void Gfx::restoreState() state = state->restore(); out->restoreState(state); stackHeight--; + clip = clipNone; } // Create a new state stack, and initialize it with a copy of the