Add additional image processing
Ordered process: 1. dilation - thicken white portion of subtitles 2. resize - temporary hardcoded to 47% (assuming subtitles are 68 pixels in height) 3. apply hsv color mask - filter out non gray pixels and filter out pixels that are not bright enough 4. invert image - make it black text on white background 5. add border to top and bottom - assuming subtitles are cropped closely
This commit is contained in:
parent
edc1bc28a2
commit
7f6881749f
@ -18,6 +18,8 @@ class Video:
|
|||||||
num_frames: int
|
num_frames: int
|
||||||
fps: float
|
fps: float
|
||||||
height: int
|
height: int
|
||||||
|
width: int
|
||||||
|
resize_dim: List[int]
|
||||||
pred_frames: List[PredictedFrame]
|
pred_frames: List[PredictedFrame]
|
||||||
pred_subs: List[PredictedSubtitle]
|
pred_subs: List[PredictedSubtitle]
|
||||||
|
|
||||||
@ -27,6 +29,9 @@ class Video:
|
|||||||
self.num_frames = int(v.get(cv2.CAP_PROP_FRAME_COUNT))
|
self.num_frames = int(v.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||||
self.fps = v.get(cv2.CAP_PROP_FPS)
|
self.fps = v.get(cv2.CAP_PROP_FPS)
|
||||||
self.height = int(v.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
self.height = int(v.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||||
|
self.width = int(v.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||||
|
scale_percent = 47 # apparently 32 pixels is the optimal character height for tesseract.
|
||||||
|
self.resize_dim=(int(self.width * scale_percent/100), int(self.height * scale_percent/100))
|
||||||
|
|
||||||
def run_ocr(self, lang: str, time_start: str, time_end: str,
|
def run_ocr(self, lang: str, time_start: str, time_end: str,
|
||||||
conf_threshold: int, use_fullframe: bool) -> None:
|
conf_threshold: int, use_fullframe: bool) -> None:
|
||||||
@ -56,8 +61,16 @@ class Video:
|
|||||||
if not self.use_fullframe:
|
if not self.use_fullframe:
|
||||||
# only use bottom half of the frame by default
|
# only use bottom half of the frame by default
|
||||||
img = img[self.height // 2:, :]
|
img = img[self.height // 2:, :]
|
||||||
img = cv2.bitwise_not(cv2.bitwise_and(img, img, mask=cv2.inRange(img, (190, 190, 190), (255, 255, 255))))
|
# dilate and resize
|
||||||
config = '--tessdata-dir "{}"'.format(constants.TESSDATA_DIR)
|
img=cv2.resize(cv2.dilate(img, np.ones(2, 2), np.uint8), self.resize_dim, interpolation=cv2.INTER_AREA)
|
||||||
|
|
||||||
|
# mask to filter out non gray-like pixels/pixels that are not bright enough
|
||||||
|
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
||||||
|
color_mask = cv2.inRange(hsv, (0, 0, 190), (179, 20, 255))
|
||||||
|
|
||||||
|
# apply mask, inverse image so it's black text on white background, add borders to top and bottom
|
||||||
|
img = cv2.copyMakeBorder(cv2.bitwise_not(cv2.bitwise_and(img, img, mask=color_mask)), 10, 10, 0, 0, cv2.BORDER_CONSTANT, None, (255,255,255)
|
||||||
|
config = '--tessdata-dir "{}" --psm 7 -c preserve_interword_spaces=1'.format(constants.TESSDATA_DIR)
|
||||||
try:
|
try:
|
||||||
return pytesseract.image_to_data(img, lang=self.lang, config=config)
|
return pytesseract.image_to_data(img, lang=self.lang, config=config)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
Loading…
Reference in New Issue
Block a user