Revert "Feature: Warn on unicode decoding errors in PDF annotations"

jsvine · Dec 9, 2024 · fa923cb · fa923cb
1 parent 871770a
commit fa923cb
Show file tree

Hide file tree

Showing 4 changed files with 1 addition and 39 deletions.
diff --git a/pdfplumber/page.py b/pdfplumber/page.py
@@ -13,7 +13,6 @@
     Union,
 )
 from unicodedata import normalize as normalize_unicode
-from warnings import warn
 
 from pdfminer.converter import PDFPageAggregator
 from pdfminer.layout import (
@@ -307,15 +306,7 @@ def parse(annot: T_obj) -> T_obj:
                     try:
                         extras[k] = v.decode("utf-8")
                     except UnicodeDecodeError:
-                        try:
-                            extras[k] = v.decode("utf-16")
-                        except UnicodeDecodeError:
-                            if self.pdf.raise_unicode_errors:
-                                raise
-                            warn(
-                                f"Could not decode {k} of annotation."
-                                f" {k} will be missing."
-                            )
+                        extras[k] = v.decode("utf-16")
 
             parsed = {
                 "page_number": self.page_number,

diff --git a/pdfplumber/pdf.py b/pdfplumber/pdf.py
@@ -35,7 +35,6 @@ def __init__(
         password: Optional[str] = None,
         strict_metadata: bool = False,
         unicode_norm: Optional[Literal["NFC", "NFKC", "NFD", "NFKD"]] = None,
-        raise_unicode_errors: bool = True,
     ):
         self.stream = stream
         self.stream_is_external = stream_is_external
@@ -44,7 +43,6 @@ def __init__(
         self.laparams = None if laparams is None else LAParams(**laparams)
         self.password = password
         self.unicode_norm = unicode_norm
-        self.raise_unicode_errors = raise_unicode_errors
 
         self.doc = PDFDocument(PDFParser(stream), password=password or "")
         self.rsrcmgr = PDFResourceManager()
@@ -78,7 +76,6 @@ def open(
         repair: bool = False,
         gs_path: Optional[Union[str, pathlib.Path]] = None,
         repair_setting: T_repair_setting = "default",
-        raise_unicode_errors: bool = True,
     ) -> "PDF":
 
         stream: Union[BufferedReader, BytesIO]
@@ -110,7 +107,6 @@ def open(
                 strict_metadata=strict_metadata,
                 unicode_norm=unicode_norm,
                 stream_is_external=stream_is_external,
-                raise_unicode_errors=raise_unicode_errors,
             )
 
         except PSException:

diff --git a/tests/pdfs/annotations-unicode-issues.pdf b/tests/pdfs/annotations-unicode-issues.pdf
diff --git a/tests/test_issues.py b/tests/test_issues.py
@@ -9,8 +9,6 @@
     resource = None
 import unittest
 
-import pytest
-
 import pdfplumber
 
 logging.disable(logging.ERROR)
@@ -334,26 +332,3 @@ def test_issue_1181(self):
                 ["Bar10", "Bar11", "Bar12"],
                 ["", "", ""],
             ]
-
-    def test_pr_1195(self):
-        """
-        In certain scenarios, annotations may include invalid or extraneous
-        data that can obstruct the annotation processing workflow.  To mitigate
-        this, the raise_unicode_errors parameter in the PDF initializer and the
-        .open() method provides a configurable option to bypass these errors
-        and generate warnings instead, ensuring smoother handling of such
-        anomalies.
-
-        The following tests verifies the functionality of the
-        raise_unicode_errors parameter.
-        """
-        path = os.path.join(HERE, "pdfs/annotations-unicode-issues.pdf")
-        with pdfplumber.open(path) as pdf, pytest.raises(UnicodeDecodeError):
-            for _ in pdf.annots:
-                pass
-
-        with pdfplumber.open(path, raise_unicode_errors=False) as pdf, pytest.warns(
-            UserWarning
-        ):
-            for _ in pdf.annots:
-                pass