-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathWaterfallProcess.py
245 lines (187 loc) · 8.99 KB
/
WaterfallProcess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import cv2
import numpy as np
import sys
import argparse
import json
def preprocess_image(image_file):
"""
Preprocess the input image to remove noise and threshold the keys.
Returns a numpy array of the preprocessed image and saves the result to a file.
"""
image = cv2.imread(image_file, 0)
_, thresholded = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
filtered = cv2.medianBlur(thresholded, 5)
cv2.imwrite("debug_threshold.png", filtered)
return filtered
# This function returns the area inside the rectangle that is filled with white pixels, and the part that is filled with black pixels.
def match_area(image, rectangle, outer_rectangle=None):
# If rectangle is a list of rectangles, return the sum of the areas of all rectangles. (Intersections are not counted twice.)
if isinstance(rectangle, list) and (isinstance(rectangle[0], list) or isinstance(rectangle[0], tuple)):
# Find the bounding box of all rectangles.
x, y, w, h = outer_rectangle
area = w * h
# Create a mask of the rectangles
intersection_mask = np.zeros(image.shape, dtype=np.uint8)
intersection_mask = intersection_mask[y:y + h, x:x + w]
for rect in rectangle:
# Fill the mask with white pixels where rect is located.
x1, y1, w1, h1 = rect
intersection_mask[y1 - y:y1 - y + h1, x1 - x:x1 - x + w1] = 255
# Count the number of white pixels that are both in the mask and in the image.
white_area = np.sum(np.logical_and(image[y:y + h, x:x + w] == 255, intersection_mask == 255))
black_area = area - white_area
return white_area, black_area
else:
x, y, w, h = rectangle
area = w * h
white_area = np.sum(image[y:y + h, x:x + w] == 255)
black_area = area - white_area
return white_area, black_area
def separate_contours(image, contour, threshold=10):
"""
Separates merged contours into individual convex contours.
Args:
image (np.array): Binary image containing the contours.
contour (list): A list of points representing the contour to be separated.
threshold (float): Distance threshold for convexity defect points to be considered as separation points.
Returns:
list: A list of individual convex contours.
"""
# Find convex hull
hull = cv2.convexHull(contour, returnPoints=False)
# Calculate convexity defects
defects = cv2.convexityDefects(contour, hull)
if defects is None:
return [contour]
separation_points = []
for i in range(defects.shape[0]):
s, e, f, d = defects[i, 0]
if d > threshold:
separation_points.append(f)
# If no separation points are found, return the original contour
if len(separation_points) == 0:
return [contour]
separated_contours = []
separation_points.append(separation_points[0]) # Close the loop
# Create separate contours based on the separation points
for i in range(len(separation_points) - 1):
start = separation_points[i]
end = separation_points[i + 1]
separated_contour = contour[start:end]
separated_contours.append(separated_contour)
return separated_contours
def subdivide_rectangle(image, contour):
"""
Takes a contour that mistakook multiple keys for a single contour, and subdivides it into the correct note rectangle contours.
"""
bounding_rect = cv2.boundingRect(contour)
# Use function separate_contours with different threshold values, and keep the result with the best area ratio.
best_ratio = 0
best_separated_contours = None
for thres in range(1, 100, 1):
separated_contours = separate_contours(image, contour, threshold=thres)
# Calculate the area ratio of the separated contours
rectangle_list = [cv2.boundingRect(c) for c in separated_contours]
# Max white area
max_white_area, black_area = match_area(image, bounding_rect)
aw, ab = match_area(image, rectangle_list, bounding_rect)
area_ratio = aw / max_white_area
if area_ratio > best_ratio:
best_ratio = area_ratio
best_separated_contours = separated_contours
# Convert the contours to rectangles
rectangle_list = [cv2.boundingRect(c) for c in best_separated_contours]
# Keep only the rectangles that are within the bounding rectangle
rectangle_list = [r for r in rectangle_list if r[0] >= bounding_rect[0] and r[1] >= bounding_rect[1] and r[0] + r[2] <= bounding_rect[0] + bounding_rect[2] and r[1] + r[3] <= bounding_rect[1] + bounding_rect[3]]
# Keep only the rectangles that are not too thin
rectangle_list = [r for r in rectangle_list if r[2] >= 5]
print(f"Area ratio: {best_ratio}")
return rectangle_list
def detect_rectangles(image):
"""
Detect the rectangles corresponding to the keys using Watershed Algorithm.
Handles cases where keys may be touching.
Returns a list of rectangles.
"""
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
dilated = cv2.dilate(image, kernel, iterations=0)
# Distance transform and normalization
dist_transform = cv2.distanceTransform(dilated, cv2.DIST_L2, 5)
_, sure_fg = cv2.threshold(dist_transform, 0.35 * dist_transform.max(), 255, 0)
# Finding sure background area
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(dilated, sure_fg)
# Marker labelling
_, markers = cv2.connectedComponents(sure_fg)
# Add one to all labels so that sure background is not 0, but 1
markers = markers + 1
# Mark the region of unknown with zero
markers[unknown == 255] = 0
# Apply the watershed
cv2.watershed(cv2.cvtColor(image, cv2.COLOR_GRAY2BGR), markers)
# Get the list of unique markers
unique_markers = np.unique(markers)
rectangles = []
min_cover_ratio = 0.95
for marker in unique_markers:
if marker == 0 or marker == 1: # Skip the background and the foreground.
continue
mask = np.zeros_like(image, dtype=np.uint8)
mask[markers == marker] = 255
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
if w > 0 and h > 0:
rect = (x, y, w, h)
white_area, black_area = match_area(image, rect)
area_coverage_ratio = white_area / (white_area + black_area)
print(f"Rectangle: x: {x}, y: {y}, w: {w}, h: {h}")
print(f"Area coverage ratio: {area_coverage_ratio}")
if area_coverage_ratio < 0.05:
print(f"### Skipping rectangle: {rect}")
continue # Likely a rectangle enclosing the entire image.
elif area_coverage_ratio < min_cover_ratio:
print(f"### Subdividing rectangle: {rect}")
# The rectangle may be covering multiple keys.
# Subdivide the rectangle into multiple rectangles.
rects = subdivide_rectangle(image, contour)
print(f"### Result: {rects}")
rectangles += rects
else:
rectangles.append(rect)
print("Single rectangle.")
return rectangles
def write_image(image, rectangles, output_file):
"""
To help debugging, write the rectangles on top of the image and save it to a file.
"""
for rect in rectangles:
x, y, w, h = rect
cv2.rectangle(image, (x, y), (x + w, y + h), (150, 150, 150), 2) # If the color isn't visible, change the values.
cv2.imwrite(output_file, image)
return rectangles
def write_json(rectangles, output_file):
"""
Write the rectangles to a json file.
"""
rects = []
for rect in rectangles:
x, y, w, h = rect
rects.append({"x": x, "y": y, "width": w, "height": h})
data = {"rectangles": rects}
with open(output_file, "w") as f:
json.dump(data, f)
def main():
parser = argparse.ArgumentParser(description="Convert a Synthesia-style piano roll image to a list of rectangles representing the notes.")
parser.add_argument("image_file", help="Input image file")
parser.add_argument("output_process", help="Debug image file showing the detected rectangles")
parser.add_argument("output_file", help="Output json file listing the rectangles")
args = parser.parse_args()
image = preprocess_image(args.image_file)
rectangles = detect_rectangles(image)
write_image(image, rectangles, args.output_process)
write_json(rectangles, args.output_file)
if __name__ == "__main__":
main()
# To run the script, use the following command:
# python WaterfallProcess.py output_stitch.png output_process.png rectangles.json