From 01380e7ad082ed780ca99d80a7bc54259e47cdb8 Mon Sep 17 00:00:00 2001 From: Guillaume Raffy Date: Tue, 20 Feb 2024 08:21:34 +0100 Subject: [PATCH] adapted code to pypdf2 3.0 where numPages is no longer a member of PdfReader --- src/pymusco/main.py | 6 ++---- src/pymusco/pdf.py | 12 +++++------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/pymusco/main.py b/src/pymusco/main.py index 719b29d..a7c0d0d 100644 --- a/src/pymusco/main.py +++ b/src/pymusco/main.py @@ -348,8 +348,6 @@ def scan_to_stub(src_scanned_pdf_file_path: Path, dst_stub_pdf_file_path: Path, scanned_image_file_paths = [] with open(src_scanned_pdf_file_path, 'rb') as src_pdf_file: pdf_reader = PyPDF2.PdfReader(src_pdf_file) - # pdfReader.numPages - # 19 page_index = 0 for page in pdf_reader.pages: print(f'page_index = {page_index}') @@ -454,7 +452,7 @@ def split_double_pages(src_scanned_pdf_file_path: Path, dst_scanned_pdf_file_pat scanned_image_file_paths = [] with open(src_scanned_pdf_file_path, 'rb') as src_pdf_file: pdf_reader = PyPDF2.PdfReader(src_pdf_file) - for page_index in range(pdf_reader.numPages): + for page_index in range(len(pdf_reader.pages)): print(f'page_index = {page_index}') double_page = pdf_reader.pages[page_index] image_name = f'page{page_index:03d}' @@ -503,7 +501,7 @@ def crop_pdf(src_scanned_pdf_file_path: Path, dst_scanned_pdf_file_path: Path, x scanned_image_file_paths = [] with open(src_scanned_pdf_file_path, 'rb') as src_pdf_file: pdf_reader = PyPDF2.PdfReader(src_pdf_file) - for page_index in range(pdf_reader.numPages): + for page_index in range(len(pdf_reader.pages)): print(f'page_index = {page_index}') page = pdf_reader.pages[page_index] image_name = f'page{page_index:03d}' diff --git a/src/pymusco/pdf.py b/src/pymusco/pdf.py index f938af9..870042f 100644 --- a/src/pymusco/pdf.py +++ b/src/pymusco/pdf.py @@ -159,7 +159,7 @@ def find_pdf_page_raster_image(pdf_page: PyPDF2.PageObject) -> PyPDF2.generic.En :return PyPDF2.generic.EncodedStreamObject: a pdf node which is supposed to contain an image """ if '/XObject' in pdf_page['/Resources']: - x_object = pdf_page['/Resources']['/XObject'].getObject() + x_object = pdf_page['/Resources']['/XObject'].get_object() for obj in x_object: if x_object[obj]['/Subtype'] == '/Image': return x_object[obj] @@ -240,7 +240,7 @@ def extract_pdf_page_images(pdf_page: PyPDF2.PageObject, image_folder='/tmp'): :param PyPDF2.pdf.PageObject pdf_page: :param str image_folder: """ - x_object = pdf_page['/Resources']['/XObject'].getObject() + x_object = pdf_page['/Resources']['/XObject'].get_object() for obj in x_object: print(type(obj)) @@ -351,9 +351,7 @@ def add_stamp(src_pdf_file_path: Path, dst_pdf_file_path: Path, stamp_file_path: pdf_writer = PyPDF2.PdfWriter() with open(src_pdf_file_path, 'rb') as src_pdf_file: pdf_reader = PyPDF2.PdfReader(src_pdf_file) - # pdfReader.numPages - # 19 - for page_index in range(pdf_reader.numPages): + for page_index in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_index] # page.mergePage(watermark) page.mergeScaledTranslatedPage(watermark, scale=scale, tx=tx, ty=ty) @@ -380,11 +378,11 @@ def check_pdf(src_pdf_file_path: Path): """ with open(src_pdf_file_path, 'rb') as src_pdf_file: pdf_reader = PyPDF2.PdfReader(src_pdf_file) - for page_index in range(pdf_reader.numPages): + for page_index in range(len(pdf_reader.pages)): print(f'page_index = {page_index}') pdf_page = pdf_reader.pages[page_index] if '/XObject' in pdf_page['/Resources']: - x_object = pdf_page['/Resources']['/XObject'].getObject() + x_object = pdf_page['/Resources']['/XObject'].get_object() for obj in x_object: if x_object[obj]['/Subtype'] == '/Image': pdf_stream = x_object[obj]