From 7f6881749f0acf7962085759d070f7b70709fac1 Mon Sep 17 00:00:00 2001 From: Yun Date: Tue, 13 Jul 2021 09:12:43 +0200 Subject: [PATCH 1/2] Add additional image processing Ordered process: 1. dilation - thicken white portion of subtitles 2. resize - temporary hardcoded to 47% (assuming subtitles are 68 pixels in height) 3. apply hsv color mask - filter out non gray pixels and filter out pixels that are not bright enough 4. invert image - make it black text on white background 5. add border to top and bottom - assuming subtitles are cropped closely --- videocr/video.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/videocr/video.py b/videocr/video.py index 38cd009..a3c86de 100644 --- a/videocr/video.py +++ b/videocr/video.py @@ -18,6 +18,8 @@ class Video: num_frames: int fps: float height: int + width: int + resize_dim: List[int] pred_frames: List[PredictedFrame] pred_subs: List[PredictedSubtitle] @@ -27,6 +29,9 @@ class Video: self.num_frames = int(v.get(cv2.CAP_PROP_FRAME_COUNT)) self.fps = v.get(cv2.CAP_PROP_FPS) self.height = int(v.get(cv2.CAP_PROP_FRAME_HEIGHT)) + self.width = int(v.get(cv2.CAP_PROP_FRAME_WIDTH)) + scale_percent = 47 # apparently 32 pixels is the optimal character height for tesseract. + self.resize_dim=(int(self.width * scale_percent/100), int(self.height * scale_percent/100)) def run_ocr(self, lang: str, time_start: str, time_end: str, conf_threshold: int, use_fullframe: bool) -> None: @@ -56,8 +61,16 @@ class Video: if not self.use_fullframe: # only use bottom half of the frame by default img = img[self.height // 2:, :] - img = cv2.bitwise_not(cv2.bitwise_and(img, img, mask=cv2.inRange(img, (190, 190, 190), (255, 255, 255)))) - config = '--tessdata-dir "{}"'.format(constants.TESSDATA_DIR) + # dilate and resize + img=cv2.resize(cv2.dilate(img, np.ones(2, 2), np.uint8), self.resize_dim, interpolation=cv2.INTER_AREA) + + # mask to filter out non gray-like pixels/pixels that are not bright enough + hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + color_mask = cv2.inRange(hsv, (0, 0, 190), (179, 20, 255)) + + # apply mask, inverse image so it's black text on white background, add borders to top and bottom + img = cv2.copyMakeBorder(cv2.bitwise_not(cv2.bitwise_and(img, img, mask=color_mask)), 10, 10, 0, 0, cv2.BORDER_CONSTANT, None, (255,255,255) + config = '--tessdata-dir "{}" --psm 7 -c preserve_interword_spaces=1'.format(constants.TESSDATA_DIR) try: return pytesseract.image_to_data(img, lang=self.lang, config=config) except Exception as e: From aec2b9c95a30af354b366297732fdc7372fd7170 Mon Sep 17 00:00:00 2001 From: Yun Date: Tue, 13 Jul 2021 10:20:47 +0200 Subject: [PATCH 2/2] fixup --- videocr/video.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/videocr/video.py b/videocr/video.py index a3c86de..e4b7565 100644 --- a/videocr/video.py +++ b/videocr/video.py @@ -62,7 +62,7 @@ class Video: # only use bottom half of the frame by default img = img[self.height // 2:, :] # dilate and resize - img=cv2.resize(cv2.dilate(img, np.ones(2, 2), np.uint8), self.resize_dim, interpolation=cv2.INTER_AREA) + img=cv2.resize(cv2.dilate(img, np.ones((2, 2), np.uint8)), self.resize_dim, interpolation=cv2.INTER_AREA) # mask to filter out non gray-like pixels/pixels that are not bright enough hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)