summaryrefslogtreecommitdiff
path: root/kde-frameworks/baloo
diff options
context:
space:
mode:
authorV3n3RiX <venerix@koprulu.sector>2023-03-28 09:49:11 +0100
committerV3n3RiX <venerix@koprulu.sector>2023-03-28 09:49:11 +0100
commit115dcc7054f5934a2c8e26fd8a8eed5f3e29e9ce (patch)
treec31afe35699960753f76770d6b0b4ea48af9f686 /kde-frameworks/baloo
parente292b671b113c2cc012beddad93a3df4f9410698 (diff)
gentoo auto-resync : 28:03:2023 - 09:49:10
Diffstat (limited to 'kde-frameworks/baloo')
-rw-r--r--kde-frameworks/baloo/Manifest3
-rw-r--r--kde-frameworks/baloo/baloo-5.104.0-r1.ebuild (renamed from kde-frameworks/baloo/baloo-5.104.0.ebuild)2
-rw-r--r--kde-frameworks/baloo/files/baloo-5.104.0-skip-all-unprintable-chars.patch70
3 files changed, 74 insertions, 1 deletions
diff --git a/kde-frameworks/baloo/Manifest b/kde-frameworks/baloo/Manifest
index 69838dfec6e1..dc675230c45d 100644
--- a/kde-frameworks/baloo/Manifest
+++ b/kde-frameworks/baloo/Manifest
@@ -1,5 +1,6 @@
+AUX baloo-5.104.0-skip-all-unprintable-chars.patch 2619 BLAKE2B faf3b9287d5ef10dd3f4be49cb15adfbccad602c067682b1f7d1db282afac1d003a351286e8514cc3596d8e708cd33a815bd701652b450ebeaa099cb2e843760 SHA512 55d319c7cb8fc05e5b60ac907169de2c37f04df1b98f19f38749a83fdd5321c1c4e7ccd5adf997bcae5e3c4dcf75a7f8cb88b580ea83055b9ba7ac2f54a2e063
DIST baloo-5.102.0.tar.xz 302812 BLAKE2B 9578facf3563f9d48595b5d41d588302c344b785ec0f8c29a5e03411e8165bceae7ccf57630d420acd1f19a4a129a73eca4b49d8763a602697c4706431b840f3 SHA512 ab2c2e5da169371e3bb27344f31b42f4ec63bc18fee0e3812a21e66e75c9e826f00f2e8ca8186b9660579b6990e67a162d0db14195b6b0b8de5fe9792966b25e
DIST baloo-5.104.0.tar.xz 305536 BLAKE2B ea76fa769acfb79b4af1c5c5338ef15c39943828e9ce68078ded7ea4abf77110d15aa629a40ed9d1fa666ad546ad896d1b9e51ea43a66b0a992efae089d46d3c SHA512 80ecea86735606798f8500eafcceee5b5dda13c33c2707791baf88d6961d843d239264aea27c1d97c131d8558b4421a251e1c9aed823e3d9214f633488f860fa
EBUILD baloo-5.102.0.ebuild 913 BLAKE2B 08a83cfd4a8add03e6d70ed24ba1454289d867612674f4242a579f633f36278769f9d6a140e730cfb8b79c51da5d2502117f9d58f8e549cfd729e375e3fc9790 SHA512 ae0a84a2f32b4cc4dc90d0e80eeecbb69b9e9240d666119a3758c1f2bf80aabdb656343bf6017f158f39decee5e93ce8374bd3c36e2049cb280cecddb43ed3fb
-EBUILD baloo-5.104.0.ebuild 916 BLAKE2B c07ff2b5290c9d17640e3873aa96468d0c83b7957c9deeffd855e751827a3054ce3d7d4b9b7a5153866191264bdce1c62985dd09b795bf6f9ffaf398eb695621 SHA512 68f0a5c316596c25651a0bf681177aaaf5e9f0c73f615f8cc3441cf1572b2f59100180ccfd003019e386f5ba219112290caa4a0e9cf3c842a647fd713a1d630d
+EBUILD baloo-5.104.0-r1.ebuild 981 BLAKE2B 9241f7c2c1409789957c2db56e760c4bee27a44e6663dafdb245b16230a0188e41a319b8650371af374565ad747820a1f7171341d79911876d453b8732abdc73 SHA512 bbf642b65e3cf2928d304abd523b3c32d6a8aa9bfec6f71c1545637b76a72570118ddf3146e6c9f744798b428d1b496946465f2a581996175f70f1d11505cfb6
MISC metadata.xml 456 BLAKE2B 4392b1cc6f304778d71236d5eb557dfbbd530143eea5cad9a3c3034e3e8b22c835f6c7f980124a21cefd35a2dd1efd5110adc0a5342170f88dfd7418b12bee99 SHA512 7ba65331cad434e2dceee012a5458d268eb2a04e0f7276b265c15644e6db5209bc7eee7d9695aa0038c435711e0f6f0dc53c7bae9d773b48f01e22a22e4dbb80
diff --git a/kde-frameworks/baloo/baloo-5.104.0.ebuild b/kde-frameworks/baloo/baloo-5.104.0-r1.ebuild
index 8bc9a436ec63..ead3aed883ec 100644
--- a/kde-frameworks/baloo/baloo-5.104.0.ebuild
+++ b/kde-frameworks/baloo/baloo-5.104.0-r1.ebuild
@@ -33,3 +33,5 @@ DEPEND="
=kde-frameworks/solid-${PVCUT}*:5
"
RDEPEND="${DEPEND}"
+
+PATCHES=( "${FILESDIR}/${P}-skip-all-unprintable-chars.patch" )
diff --git a/kde-frameworks/baloo/files/baloo-5.104.0-skip-all-unprintable-chars.patch b/kde-frameworks/baloo/files/baloo-5.104.0-skip-all-unprintable-chars.patch
new file mode 100644
index 000000000000..7e9eb0d74c42
--- /dev/null
+++ b/kde-frameworks/baloo/files/baloo-5.104.0-skip-all-unprintable-chars.patch
@@ -0,0 +1,70 @@
+From 886aba423f3659ef591903f1f3dea87f8b4c6016 Mon Sep 17 00:00:00 2001
+From: Igor Poboiko <igor.poboiko@gmail.com>
+Date: Mon, 20 Mar 2023 13:20:33 +0000
+Subject: [PATCH] [TermGenerator] Skip all unprintable characters
+
+Some extractors can produce text which includes special unicode
+control characters (e.g. Poppler can give us 0x0001 from some PDFs).
+TermGenerator then generates proper (yet meaningless) terms out of those
+characters, and they end up in database. It should be safe to skip all
+unprintable characters to avoid that (although surrogates are fine, they
+are dealt with later via QString::normalize call).
+
+Character 0x0001 is the worst, as it is used internally in DocTermsCodec
+for compactification. Such collision then leads to the corrupted database
+(some terms from DocTermsDB are not present in PostingDB).
+
+The corruption is not hypothetical (although not critical), I've encountered bunch of broken DB entries for some PDF files on my machine.
+
+
+(cherry picked from commit 492321e53a41762555ba6528e15cd0d0188ed153)
+---
+ autotests/unit/engine/termgeneratortest.cpp | 11 +++++++++++
+ src/engine/termgenerator.cpp | 2 +-
+ 2 files changed, 12 insertions(+), 1 deletion(-)
+
+diff --git a/autotests/unit/engine/termgeneratortest.cpp b/autotests/unit/engine/termgeneratortest.cpp
+index 361c4934c..69885c133 100644
+--- a/autotests/unit/engine/termgeneratortest.cpp
++++ b/autotests/unit/engine/termgeneratortest.cpp
+@@ -31,6 +31,7 @@ private Q_SLOTS:
+ void testWordPositions();
+ void testWordPositionsCJK();
+ void testNumbers();
++ void testControlCharacter();
+
+ QList<QByteArray> allWords(const QString& str)
+ {
+@@ -213,6 +214,16 @@ void TermGeneratorTest::testNumbers()
+ QCOMPARE(words, expectedWords);
+ }
+
++void TermGeneratorTest::testControlCharacter()
++{
++ QString str = QString::fromUtf8("word1\u0001word2");
++
++ QList<QByteArray> words = allWords(str);
++ QList<QByteArray> expectedWords = { "word1", "word2" };
++
++ QCOMPARE(words, expectedWords);
++}
++
+ QTEST_MAIN(TermGeneratorTest)
+
+ #include "termgeneratortest.moc"
+diff --git a/src/engine/termgenerator.cpp b/src/engine/termgenerator.cpp
+index d98b28416..832962da1 100644
+--- a/src/engine/termgenerator.cpp
++++ b/src/engine/termgenerator.cpp
+@@ -59,7 +59,7 @@ QByteArrayList TermGenerator::termList(const QString& text_)
+ int start = 0;
+
+ auto isSkipChar = [] (const QChar& c) {
+- return c.isPunct() || c.isMark() || c.isSpace();
++ return c.isPunct() || c.isMark() || c.isSpace() || (!c.isPrint() && !c.isSurrogate());
+ };
+
+ QByteArrayList list;
+--
+GitLab
+