Skip to content

Commit

Permalink
新增img2ofd img2pfd
Browse files Browse the repository at this point in the history
  • Loading branch information
renoyuan committed Nov 17, 2023
1 parent 64b21cf commit 8745392
Show file tree
Hide file tree
Showing 26 changed files with 1,024 additions and 1,605 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

5 添加gui 工具实现上述功能

6 jpg2ofd jpg2pfd

关于 jb2格式图片解析
使用了第三方库 jbig2dec 去读取jb2格式图片 参考下面链接安装使用jbig2dec
https://github.com/rillian/jbig2dec
Expand Down
13 changes: 9 additions & 4 deletions easyofd/draw/draw_ofd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from io import BytesIO

import xmltodict
import cv2
from PIL import Image
from .pdf_parse import DPFParser
from .ofdtemplate import OFDTemplate,DocumentTemplate,DocumentResTemplate,PulicResTemplate,ContentTemplate,OFDStructure
Expand Down Expand Up @@ -52,6 +53,7 @@ def build_content_res(self,pil_img_list):
"""一张图片是一页"""
content_res_list = []
for idx, pil_img in enumerate(pil_img_list):
print(pil_img)
print(idx,pil_img[1],pil_img[2])
PhysicalBox = f"0 0 {pil_img[1]} {pil_img[2]}"
ImageObject = [{
Expand Down Expand Up @@ -93,11 +95,14 @@ def pil_2_bytes(self, image):
img_bytesio.close()
return img_bytes

def __call__(self,pdf_bytes):
def __call__(self,pdf_bytes,CV2_img_list=None):
# 读取 pdf 转图片
pdf_obj = DPFParser()
img_list = pdf_obj.to_img(pdf_bytes)
pil_img_list = [(self.pil_2_bytes(Image.frombytes("RGB", [_img.width, _img.height], _img.samples)),_img.width,_img.height) for _img in img_list]
if CV2_img_list:
pil_img_list = [(self.pil_2_bytes(Image.fromarray(cv2.cvtColor(_img,cv2.COLOR_BGR2RGB))),_img.shape[1],_img.shape[0] ) for _img in CV2_img_list]
else:
pdf_obj = DPFParser()
img_list = pdf_obj.to_img(pdf_bytes)
pil_img_list = [(self.pil_2_bytes(Image.frombytes("RGB", [_img.width, _img.height], _img.samples)),_img.width,_img.height) for _img in img_list]
document = DocumentTemplate()
pulic_res = self.build_pulic_res()
document_res = self.build_document_res(len(pil_img_list))
Expand Down
46 changes: 0 additions & 46 deletions easyofd/main.py

This file was deleted.

19 changes: 19 additions & 0 deletions easyofd/ofd.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from typing import Any

import fitz
import cv2
import numpy as np
from loguru import logger

Expand Down Expand Up @@ -84,6 +85,24 @@ def pdf2img(self,pdfbytes):
logger.info(f"to_jpg")
return image_list

def jpg2ofd(self,imglist:list):
"""
imglist: cv2 image list
"""

ofd_byte = OFDWrite()(None,CV2_img_list=imglist)
return ofd_byte

def jpg2pfd(self,imglist:list ):
"""
imglist: cv2 image list
1 构建data
2 DrawPDF(self.data)()
"""

data = OFDParser(None).img2data(imglist)
return DrawPDF(data)()

def to_jpg(self,format="jpg"):
"""
return numpy list
Expand Down
55 changes: 55 additions & 0 deletions easyofd/parser_ofd/ofd_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import traceback
import base64
import re
import cv2
from typing import Any

from loguru import logger
Expand All @@ -34,6 +35,60 @@ def __init__(self, ofdb64):
self.file_tree = None
self.jbig2dec_path = r"C:/msys64/mingw64/bin/jbig2dec.exe"

def img2data(self,imglist):
"""
imglist to ofd data
"""
OP = 200/25.4
doc_list = []
img_info = {}
page_size = []
font_info = {}
page_info_d = {}



for idx, img_numpy in enumerate(imglist):
h,w,_=img_numpy.shape
_, img_encode = cv2.imencode('.jpg', img_numpy)
img_bytes = img_encode.tobytes()
imgb64 = str(base64.b64encode(img_bytes),encoding="utf-8")
img_info[str(idx)] = {
"format": "jpg",
"wrap_pos": "",
"type": "IMG",
"suffix": "jpg",
"fileName": f"{idx}.jpg",
"imgb64": imgb64,

}
text_list = []
img_list = []
img_d = {}
img_d["CTM"] = "" # 平移矩阵换
img_d["ID"] = str(idx) # 图片id
img_d["ResourceID"] = str(idx) # 图片id
img_d["pos"] = [0,0,w/OP,h/OP] # 平移矩阵换
page_size = [0,0,w/OP,h/OP]
# print(page_size)
img_list.append(img_d)

content_d = {
"text_list":text_list,
"img_list":img_list,
}
page_info_d[idx]=content_d
doc_list.append({
"pdf_name": "demo.pdf",
"doc_no":"0",
"images":img_info,
"page_size":page_size,
"fonts":font_info,
"page_info":page_info_d
})

return doc_list

# 获得xml 对象
def get_xml_obj(self, label):
Expand Down
Loading

0 comments on commit 8745392

Please sign in to comment.