-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfind_lines_words.py
77 lines (61 loc) · 2.24 KB
/
find_lines_words.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import cv2
import numpy as np
import matplotlib.pyplot as plt
img = cv2.imread('./orig_image.jpg',0)
# Otsu's thresholding after Gaussian filtering
blur = cv2.GaussianBlur(img,(5,5),0)
ret3,bin_img = cv2.threshold(blur,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
cv2.imwrite('bin_img.jpg',bin_img)
lines=list()
words=list()
start = 0
end = 0
flag = False
k = 0
horizontal_sum = bin_img.sum(axis = 1)
for index, item in enumerate(horizontal_sum):
if item > 0 and flag == False:
start = index;
flag = True
elif item == 0 and flag == True:
end = index;
lines.append([start,end])
flag = False
for i in lines:
vertical_sum = bin_img[i[0]:i[1]].sum(axis = 0)
for index, item in enumerate(vertical_sum):
if item > 0 and flag == False:
start = index;
flag = True
elif item == 0 and flag == True:
end = index;
words.append([i[0],i[1],start,end])
flag = False
for index, item in enumerate(lines):
cv2.imwrite('line_' + str(index).zfill(3) + '.jpg',bin_img[item[0]:item[1]])
for index, item in enumerate(words):
cv2.imwrite('word_' + str(index).zfill(3) + '.jpg',bin_img[item[0]:item[1],item[2]:item[3]])
#Use the following function to get horizontal/vertical histogram of any image
def word_plot(orientation , word_list):
dist = []
for word in word_list:
if orientation == 0:
vertical_sum = bin_img[word[0]:word[1],word[2]:word[3]].sum(axis=0)
plt.plot(vertical_sum)
mean = np.zeros(word[3]-word[2])
median = np.zeros(word[3]-word[2])
mean.fill(np.mean(vertical_sum))
median.fill(np.median(vertical_sum))
plt.plot(mean)
plt.plot(median)
else:
horizontal_sum = bin_img[word[0]:word[1],word[2]:word[3]].sum(axis=1)
plt.plot(horizontal_sum)
mean = np.zeros(word[1]-word[0])
median = np.zeros(word[1]-word[0])
variance = np.zeros(word[1]-word[0])
mean.fill(np.mean(horizontal_sum))
median.fill(np.median(horizontal_sum))
plt.plot(mean)
plt.plot(median)
plt.show()