Compare commits

...

9 Commits

Author SHA1 Message Date
Pradana AUMARS 92d131ecd6 Merge pull request 'Update image processing procedure' (#3) from Yun/videocr:master into master
Reviewed-on: #3
2021-07-14 18:48:31 +02:00
Yun b5e6f5a57f Update image processing procedure
Apply threshold after dilution and select only white pixels from result.
Erode afterwards to thin out the text.
2021-07-14 06:22:55 +02:00
Pradana AUMARS 25765b8b6f Import numpy in video.py 2021-07-13 16:39:03 +02:00
Pradana AUMARS 09f5098e19 Fix missing parenthesis 2021-07-13 16:36:35 +02:00
Pradana AUMARS b005e36fcd Merge pull request 'Add additional image processing' (#1) from Yun/videocr:master into master
Reviewed-on: #1
2021-07-13 16:16:53 +02:00
Yun aec2b9c95a fixup 2021-07-13 10:20:47 +02:00
Yun 7f6881749f Add additional image processing
Ordered process:
1. dilation - thicken white portion of subtitles
2. resize - temporary hardcoded to 47% (assuming subtitles are 68 pixels in height)
3. apply hsv color mask - filter out non gray pixels and filter out pixels that are not bright enough
4. invert image - make it black text on white background
5. add border to top and bottom - assuming subtitles are cropped closely
2021-07-13 09:12:43 +02:00
Pradana AUMARS edc1bc28a2 Fix indentation on last commit 2021-07-12 23:52:26 +02:00
Pradana AUMARS 5534ae317f Isolate subtitles as black over white background (kudos to u/Yun on hexbear.net) 2021-07-12 22:20:00 +02:00
1 changed files with 15 additions and 1 deletions

View File

@ -4,6 +4,7 @@ import sys
import multiprocessing
import pytesseract
import cv2
import numpy as np
from . import constants
from . import utils
@ -18,6 +19,8 @@ class Video:
num_frames: int
fps: float
height: int
width: int
resize_dim: List[int]
pred_frames: List[PredictedFrame]
pred_subs: List[PredictedSubtitle]
@ -27,6 +30,9 @@ class Video:
self.num_frames = int(v.get(cv2.CAP_PROP_FRAME_COUNT))
self.fps = v.get(cv2.CAP_PROP_FPS)
self.height = int(v.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.width = int(v.get(cv2.CAP_PROP_FRAME_WIDTH))
scale_percent = 47 # apparently 32 pixels is the optimal character height for tesseract.
self.resize_dim=(int(self.width * scale_percent/100), int(self.height * scale_percent/100))
def run_ocr(self, lang: str, time_start: str, time_end: str,
conf_threshold: int, use_fullframe: bool) -> None:
@ -56,7 +62,15 @@ class Video:
if not self.use_fullframe:
# only use bottom half of the frame by default
img = img[self.height // 2:, :]
config = '--tessdata-dir "{}"'.format(constants.TESSDATA_DIR)
img = cv2.dilate(img, np.ones((2, 2), np.uint8))
_, img = cv2.threshold(img, 215, 255, cv2.THRESH_BINARY)
color_mask = cv2.inRange(img, (255, 255, 255), (255, 255, 255))
img = cv2.bitwise_and(img, img, mask=color_mask)
img = cv2.erode(img, np.ones((2, 2), np.uint8))
img = cv2.bitwise_not(img)
img = cv2.resize(img, self.resize_dim, interpolation=cv2.INTER_AREA)
img = cv2.copyMakeBorder(img, 20, 20, 0, 0, cv2.BORDER_CONSTANT, None, (255,255,255))
config = '--tessdata-dir "{}" --psm 7 -c preserve_interword_spaces=1'.format(constants.TESSDATA_DIR)
try:
return pytesseract.image_to_data(img, lang=self.lang, config=config)
except Exception as e: