Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transformer model integration #61

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ jobs:
- restore_cache:
keys:
- ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
- run: python -m pip install --upgrade pip
- run: make install
- run: make model
- save_cache:
Expand All @@ -27,6 +28,7 @@ jobs:
- restore_cache:
keys:
- ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
- run: python -m pip install --upgrade pip
- run: make install
- run: make model
- save_cache:
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
numpy
numpy >=1.21
setuptools >= 41
opencv-python-headless
ocrd >= 2.22.3
ocrd >= 2.38.0
tensorflow >= 2.4.0
2 changes: 1 addition & 1 deletion sbb_binarize/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
sbb_binarize CLI
"""

import click
from click import command, option, argument, version_option, types
from .sbb_binarize import SbbBinarizer

Expand Down
110 changes: 106 additions & 4 deletions sbb_binarize/sbb_binarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,72 @@
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.python.keras import backend as tensorflow_backend
from tensorflow.keras import layers
import tensorflow.keras.losses
from tensorflow.keras.layers import *
sys.stderr = stderr


import logging


projection_dim = 64
patch_size = 1
num_patches =14*14

def resize_image(img_in, input_height, input_width):
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)


class Patches(layers.Layer):
def __init__(self, **kwargs):
super(Patches, self).__init__()
self.patch_size = patch_size

def call(self, images):
batch_size = tf.shape(images)[0]
patches = tf.image.extract_patches(
images=images,
sizes=[1, self.patch_size, self.patch_size, 1],
strides=[1, self.patch_size, self.patch_size, 1],
rates=[1, 1, 1, 1],
padding="VALID",
)
patch_dims = patches.shape[-1]
patches = tf.reshape(patches, [batch_size, -1, patch_dims])
return patches
def get_config(self):

config = super().get_config().copy()
config.update({
'patch_size': self.patch_size,
})
return config


class PatchEncoder(layers.Layer):
def __init__(self, **kwargs):
super(PatchEncoder, self).__init__()
self.num_patches = num_patches
self.projection = layers.Dense(units=projection_dim)
self.position_embedding = layers.Embedding(
input_dim=num_patches, output_dim=projection_dim
)

def call(self, patch):
positions = tf.range(start=0, limit=self.num_patches, delta=1)
encoded = self.projection(patch) + self.position_embedding(positions)
return encoded
def get_config(self):

config = super().get_config().copy()
config.update({
'num_patches': self.num_patches,
'projection': self.projection,
'position_embedding': self.position_embedding,
})
return config

class SbbBinarizer:

def __init__(self, model_dir, logger=None):
Expand All @@ -33,7 +91,8 @@ def __init__(self, model_dir, logger=None):

self.start_new_session()

self.model_files = glob('%s/*.h5' % self.model_dir)
#self.model_files = glob('%s/*.h5' % self.model_dir)
self.model_files = glob(self.model_dir+"/*/", recursive = True)

self.models = []
for model_file in self.model_files:
Expand All @@ -52,7 +111,12 @@ def end_session(self):
del self.session

def load_model(self, model_name):
model = load_model(join(self.model_dir, model_name), compile=False)
try:
model = load_model(join(self.model_dir, model_name), compile=False)
self.margin_percent = 0.1
except:
model = load_model(join(self.model_dir, model_name) , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches})
self.margin_percent = 0.15
model_height = model.layers[len(model.layers)-1].output_shape[1]
model_width = model.layers[len(model.layers)-1].output_shape[2]
n_classes = model.layers[len(model.layers)-1].output_shape[3]
Expand Down Expand Up @@ -95,14 +159,31 @@ def predict(self, model_in, img, use_patches):
index_start_w = 0
img_padded = np.copy(img)

img_org_h_pad = img_padded.shape[0]
img_org_w_pad = img_padded.shape[1]

index_start_h_alw = 0#100
index_start_w_alw = 0#100

#img_padded_alw = np.zeros(( img_padded.shape[0]+2*index_start_h_alw, img.shape[1]+2*index_start_w_alw, img.shape[2] ))


#img_padded_alw [ 0: index_start_h_alw, index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:index_start_h_alw,:,:]
#img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], 0:index_start_w_alw, : ] = img_padded[:,0:index_start_w_alw,:]

#img_padded_alw [ img_padded_alw.shape[0]-index_start_h_alw: img_padded_alw.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[img_padded.shape[0]-index_start_h_alw:img_padded.shape[0],:,:]
#img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0],img_padded_alw.shape[1]-index_start_w_alw: img_padded_alw.shape[1], : ] = img_padded[:,img_padded.shape[1]-index_start_w_alw:img_padded.shape[1],:]

#img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:,:,:]

img = np.copy(img_padded)




if use_patches:

margin = int(0.1 * model_width)
margin = int(self.margin_percent * model_width)

width_mid = model_width - 2 * margin
height_mid = model_height - 2 * margin
Expand Down Expand Up @@ -153,13 +234,34 @@ def predict(self, model_in, img, use_patches):
index_y_d = img_h - model_height

img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]

#h_res = int( img_patch.shape[0]/1.05)
#w_res = int( img_patch.shape[1]/1.05)

#img_patch_resize = resize_image(img_patch, h_res, w_res)

#img_patch_resized_padded =np.ones((img_patch.shape[0],img_patch.shape[1],img_patch.shape[2])).astype(float)#self.do_padding()

#h_start=int( abs(img_patch.shape[0]-img_patch_resize.shape[0])/2. )

#w_start=int( abs(img_patch.shape[1]-img_patch_resize.shape[1])/2. )

#img_patch_resized_padded[h_start:h_start+img_patch_resize.shape[0],w_start:w_start+img_patch_resize.shape[1],:]=np.copy(img_patch_resize[:,:,:])

#label_p_pred_padded = model.predict(img_patch_resized_padded.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))

label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))

#seg = np.argmax(label_p_pred, axis=3)[0]

#label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))

seg = np.argmax(label_p_pred, axis=3)[0]

seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)

#seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)

if i == 0 and j == 0:
seg_color = seg_color[0:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :]
seg = seg[0:seg.shape[0] - margin, 0:seg.shape[1] - margin]
Expand Down Expand Up @@ -224,7 +326,7 @@ def predict(self, model_in, img, use_patches):
prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color



#prediction_true = prediction_true[index_start_h_alw: index_start_h_alw+img_org_h_pad, index_start_w_alw: index_start_w_alw+img_org_w_pad,:]
prediction_true = prediction_true[index_start_h: index_start_h+img_org_h, index_start_w: index_start_w+img_org_w,:]
prediction_true = prediction_true.astype(np.uint8)

Expand Down