qurator-spk · cneud · Jul 1, 2022 · Aug 18, 2022 · Aug 22, 2022 · May 4, 2023
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -10,6 +10,7 @@ jobs:
       - restore_cache:
           keys:
             - ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
+      - run: python -m pip install --upgrade pip
       - run: make install
       - run: make model
       - save_cache:
@@ -27,6 +28,7 @@ jobs:
       - restore_cache:
           keys:
             - ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
+      - run: python -m pip install --upgrade pip
       - run: make install
       - run: make model
       - save_cache:

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
-numpy
+numpy >=1.21
 setuptools >= 41
 opencv-python-headless
-ocrd >= 2.22.3
+ocrd >= 2.38.0
 tensorflow >= 2.4.0
diff --git a/sbb_binarize/cli.py b/sbb_binarize/cli.py
@@ -1,7 +1,7 @@
 """
 sbb_binarize CLI
 """
-
+import click
 from click import command, option, argument, version_option, types
 from .sbb_binarize import SbbBinarizer
 

diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py
@@ -17,14 +17,72 @@
 import tensorflow as tf
 from tensorflow.keras.models import load_model
 from tensorflow.python.keras import backend as tensorflow_backend
+from tensorflow.keras import layers
+import tensorflow.keras.losses
+from tensorflow.keras.layers import *
 sys.stderr = stderr
 
 
 import logging
 
+
+projection_dim = 64
+patch_size = 1
+num_patches =14*14
+
 def resize_image(img_in, input_height, input_width):
     return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
 
+
+class Patches(layers.Layer):
+    def __init__(self, **kwargs):
+        super(Patches, self).__init__()
+        self.patch_size = patch_size
+
+    def call(self, images):
+        batch_size = tf.shape(images)[0]
+        patches = tf.image.extract_patches(
+            images=images,
+            sizes=[1, self.patch_size, self.patch_size, 1],
+            strides=[1, self.patch_size, self.patch_size, 1],
+            rates=[1, 1, 1, 1],
+            padding="VALID",
+        )
+        patch_dims = patches.shape[-1]
+        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
+        return patches
+    def get_config(self):
+
+        config = super().get_config().copy()
+        config.update({
+            'patch_size': self.patch_size,
+        })
+        return config
+
+
+class PatchEncoder(layers.Layer):
+    def __init__(self, **kwargs):
+        super(PatchEncoder, self).__init__()
+        self.num_patches = num_patches
+        self.projection = layers.Dense(units=projection_dim)
+        self.position_embedding = layers.Embedding(
+            input_dim=num_patches, output_dim=projection_dim
+        )
+
+    def call(self, patch):
+        positions = tf.range(start=0, limit=self.num_patches, delta=1)
+        encoded = self.projection(patch) + self.position_embedding(positions)
+        return encoded
+    def get_config(self):
+
+        config = super().get_config().copy()
+        config.update({
+            'num_patches': self.num_patches,
+            'projection': self.projection,
+            'position_embedding': self.position_embedding,
+        })
+        return config
+
 class SbbBinarizer:
 
     def __init__(self, model_dir, logger=None):
@@ -33,7 +91,8 @@ def __init__(self, model_dir, logger=None):
 
         self.start_new_session()
 
-        self.model_files = glob('%s/*.h5' % self.model_dir)
+        #self.model_files = glob('%s/*.h5' % self.model_dir)
+        self.model_files = glob(self.model_dir+"/*/", recursive = True)
 
         self.models = []
         for model_file in self.model_files:
@@ -52,7 +111,12 @@ def end_session(self):
         del self.session
 
     def load_model(self, model_name):
-        model = load_model(join(self.model_dir, model_name), compile=False)
+        try:
+            model = load_model(join(self.model_dir, model_name), compile=False)
+            self.margin_percent = 0.1
+        except:
+            model = load_model(join(self.model_dir, model_name) , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches})
+            self.margin_percent = 0.15
         model_height = model.layers[len(model.layers)-1].output_shape[1]
         model_width = model.layers[len(model.layers)-1].output_shape[2]
         n_classes = model.layers[len(model.layers)-1].output_shape[3]
@@ -95,14 +159,31 @@ def predict(self, model_in, img, use_patches):
             index_start_w  = 0
             img_padded = np.copy(img)
 
+        img_org_h_pad = img_padded.shape[0]
+        img_org_w_pad = img_padded.shape[1]
 
+        index_start_h_alw = 0#100
+        index_start_w_alw = 0#100
+
+        #img_padded_alw = np.zeros(( img_padded.shape[0]+2*index_start_h_alw, img.shape[1]+2*index_start_w_alw, img.shape[2] ))
+
+
+        #img_padded_alw [ 0: index_start_h_alw, index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:index_start_h_alw,:,:]
+        #img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], 0:index_start_w_alw, : ] = img_padded[:,0:index_start_w_alw,:]
+
+        #img_padded_alw [ img_padded_alw.shape[0]-index_start_h_alw: img_padded_alw.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[img_padded.shape[0]-index_start_h_alw:img_padded.shape[0],:,:]
+        #img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0],img_padded_alw.shape[1]-index_start_w_alw: img_padded_alw.shape[1], : ] = img_padded[:,img_padded.shape[1]-index_start_w_alw:img_padded.shape[1],:]
+
+        #img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:,:,:]
+
         img = np.copy(img_padded)
+
 
 
 
         if use_patches:
 
-            margin = int(0.1 * model_width)
+            margin = int(self.margin_percent * model_width)
 
             width_mid = model_width - 2 * margin
             height_mid = model_height - 2 * margin
@@ -153,13 +234,34 @@ def predict(self, model_in, img, use_patches):
                         index_y_d = img_h - model_height
 
                     img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
+
+                    #h_res = int( img_patch.shape[0]/1.05)
+                    #w_res = int( img_patch.shape[1]/1.05)
+
+                    #img_patch_resize = resize_image(img_patch, h_res, w_res)
+
+                    #img_patch_resized_padded =np.ones((img_patch.shape[0],img_patch.shape[1],img_patch.shape[2])).astype(float)#self.do_padding()
+
+                    #h_start=int( abs(img_patch.shape[0]-img_patch_resize.shape[0])/2. )
+
+                    #w_start=int( abs(img_patch.shape[1]-img_patch_resize.shape[1])/2. )
+
+                    #img_patch_resized_padded[h_start:h_start+img_patch_resize.shape[0],w_start:w_start+img_patch_resize.shape[1],:]=np.copy(img_patch_resize[:,:,:])
+
+                    #label_p_pred_padded = model.predict(img_patch_resized_padded.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
 
                     label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
 
+                    #seg = np.argmax(label_p_pred, axis=3)[0]
+
+                    #label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
+
                     seg = np.argmax(label_p_pred, axis=3)[0]
 
                     seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
 
+                    #seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
+
                     if i == 0 and j == 0:
                         seg_color = seg_color[0:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :]
                         seg = seg[0:seg.shape[0] - margin, 0:seg.shape[1] - margin]
@@ -224,7 +326,7 @@ def predict(self, model_in, img, use_patches):
                         prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color
 
 
-
+            #prediction_true = prediction_true[index_start_h_alw: index_start_h_alw+img_org_h_pad, index_start_w_alw: index_start_w_alw+img_org_w_pad,:]
             prediction_true = prediction_true[index_start_h: index_start_h+img_org_h, index_start_w: index_start_w+img_org_w,:]
             prediction_true = prediction_true.astype(np.uint8)