From efd72236249e706cbf68760944f29e61e40a6e48 Mon Sep 17 00:00:00 2001 From: Yi Ge Date: Mon, 29 Apr 2019 03:50:06 +0200 Subject: [PATCH] make sim_threshold adjustable through api --- videocr/api.py | 11 ++++++----- videocr/models.py | 8 +++++--- videocr/video.py | 17 +++++++++-------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/videocr/api.py b/videocr/api.py index cd1c8d0..b182d43 100644 --- a/videocr/api.py +++ b/videocr/api.py @@ -5,8 +5,9 @@ from . import constants from .video import Video -def get_subtitles(video_path: str, lang='eng', time_start='0:00', time_end='', - conf_threshold=65, use_fullframe=False) -> str: +def get_subtitles( + video_path: str, lang='eng', time_start='0:00', time_end='', + conf_threshold=65, sim_threshold=90, use_fullframe=False) -> str: # download tesseract data file to ~/tessdata if necessary fpath = constants.TESSDATA_DIR / '{}.traineddata'.format(lang) if not fpath.is_file(): @@ -20,14 +21,14 @@ def get_subtitles(video_path: str, lang='eng', time_start='0:00', time_end='', v = Video(video_path) v.run_ocr(lang, time_start, time_end, conf_threshold, use_fullframe) - return v.get_subtitles() + return v.get_subtitles(sim_threshold) def save_subtitles_to_file( video_path: str, file_path='subtitle.srt', lang='eng', - time_start='0:00', time_end='', conf_threshold=65, + time_start='0:00', time_end='', conf_threshold=65, sim_threshold=90, use_fullframe=False) -> None: with open(file_path, 'w+') as f: f.write(get_subtitles( video_path, lang, time_start, time_end, conf_threshold, - use_fullframe)) + sim_threshold, use_fullframe)) diff --git a/videocr/models.py b/videocr/models.py index 0dc4298..121cb0f 100644 --- a/videocr/models.py +++ b/videocr/models.py @@ -54,10 +54,12 @@ class PredictedFrame: class PredictedSubtitle: frames: List[PredictedFrame] + sim_threshold: int text: str - def __init__(self, frames: List[PredictedFrame]): + def __init__(self, frames: List[PredictedFrame], sim_threshold: int): self.frames = [f for f in frames if f.confidence > 0] + self.sim_threshold = sim_threshold if self.frames: self.text = max(self.frames, key=lambda f: f.confidence).text @@ -76,8 +78,8 @@ class PredictedSubtitle: return self.frames[-1].index return 0 - def is_similar_to(self, other: PredictedSubtitle, threshold=90) -> bool: - return fuzz.partial_ratio(self.text, other.text) >= threshold + def is_similar_to(self, other: PredictedSubtitle) -> bool: + return fuzz.partial_ratio(self.text, other.text) >= self.sim_threshold def __repr__(self): return '{} - {}. {}'.format(self.index_start, self.index_end, self.text) diff --git a/videocr/video.py b/videocr/video.py index 493a53c..6f68eba 100644 --- a/videocr/video.py +++ b/videocr/video.py @@ -29,7 +29,7 @@ class Video: v.release() def run_ocr(self, lang: str, time_start: str, time_end: str, - conf_threshold: int, use_fullframe: bool) -> None: + conf_threshold:int, use_fullframe: bool) -> None: self.lang = lang self.use_fullframe = use_fullframe @@ -80,8 +80,8 @@ class Video: config = '--tessdata-dir "{}"'.format(constants.TESSDATA_DIR) return pytesseract.image_to_data(img, lang=self.lang, config=config) - def get_subtitles(self) -> str: - self._generate_subtitles() + def get_subtitles(self, sim_threshold: int) -> str: + self._generate_subtitles(sim_threshold) return ''.join( '{}\n{} --> {}\n{}\n\n'.format( i, @@ -90,7 +90,7 @@ class Video: sub.text) for i, sub in enumerate(self.pred_subs)) - def _generate_subtitles(self) -> None: + def _generate_subtitles(self, sim_threshold: int) -> None: self.pred_subs = [] if self.pred_frames is None: @@ -112,8 +112,8 @@ class Video: else: # divide subtitle paragraphs para_new = j - WIN_BOUND - self._append_sub( - PredictedSubtitle(self.pred_frames[i:para_new])) + self._append_sub(PredictedSubtitle( + self.pred_frames[i:para_new], sim_threshold)) i = para_new j = i bound = WIN_BOUND @@ -122,7 +122,8 @@ class Video: # also handle the last remaining frames if i < len(self.pred_frames) - 1: - self._append_sub(PredictedSubtitle(self.pred_frames[i:])) + self._append_sub(PredictedSubtitle( + self.pred_frames[i:], sim_threshold)) def _append_sub(self, sub: PredictedSubtitle) -> None: if len(sub.text) == 0: @@ -132,7 +133,7 @@ class Video: while self.pred_subs and sub.is_similar_to(self.pred_subs[-1]): ls = self.pred_subs[-1] del self.pred_subs[-1] - sub = PredictedSubtitle(ls.frames + sub.frames) + sub = PredictedSubtitle(ls.frames + sub.frames, sub.sim_threshold) self.pred_subs.append(sub)