This is as far as I could get with simple techniques. I'm using thresholding to get the letters plus bits of numbers and contours to filter out the little number bits. I also end up losing the dots on the i's doing this. If you have control over the handwriting, it'd be a lot easier and cleaner to separate out red ink since the black numbers have some blue in them.
kmeans clustering might get you better results, but I've forgotten how to do that in OpenCV :p
import cv2
import numpy as np
# load image
img = cv2.imread("writing.png");
# convert to hsv
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
l, a, b = cv2.split(lab);
# threshold on b channel
done = False;
low = 0;
high = 124; # [0, 124, 8] b-channel
size = 8;
while not done:
# copy image
copy = b.copy();
# threshold
thresh = cv2.inRange(copy, low, high);
# contours
_, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# filter contours by size
big_cntrs = [];
marked = img.copy();
for contour in contours:
area = cv2.contourArea(contour);
if area > size:
big_cntrs.append(contour);
cv2.drawContours(marked, big_cntrs, -1, (0, 255, 0), 3);
# show
cv2.imshow("original", img);
cv2.imshow("marked", marked);
cv2.imshow("thresh", thresh);
key = cv2.waitKey(1);
# check keypress
done = key == ord('z');
if key == ord('d'):
high += 1;
if key == ord('a'):
high -= 1;
if key == ord('w'):
low += 1;
if key == ord('s'):
low -= 1;
if key == ord('e'):
size += 1;
if key == ord('q'):
size -= 1;
print([low, high, size]);
# create a mask of the contoured image
mask = np.zeros_like(thresh);
mask = cv2.drawContours(mask, big_cntrs, -1, 255, -1);
cv2.imshow("Mask", mask);
cv2.waitKey(0);
cv2.imwrite("masked.png", mask);
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…