make conf_threshold adjustable through api

2019-04-29 03:05:02 +02:00 · 2019-04-29 03:05:02 +02:00 · 77362dce1a
commit 77362dce1a
parent a5e6845a1b
3 changed files with 16 additions and 9 deletions
--- a/videocr/api.py
+++ b/videocr/api.py
@ -5,8 +5,8 @@ from . import constants
 from .video import Video
-def get_subtitles(video_path: str, lang='eng',
+def get_subtitles(video_path: str, lang='eng', time_start='0:00', time_end='',
-                  time_start='0:00', time_end='', use_fullframe=False) -> str:
+                  conf_threshold=65, use_fullframe=False) -> str:
    # download tesseract data file to ~/tessdata if necessary
    fpath = constants.TESSDATA_DIR / '{}.traineddata'.format(lang)
    if not fpath.is_file():
@ -19,13 +19,15 @@ def get_subtitles(video_path: str, lang='eng',
            shutil.copyfileobj(res, f)
    v = Video(video_path)
-    v.run_ocr(lang, time_start, time_end, use_fullframe)
+    v.run_ocr(lang, time_start, time_end, conf_threshold, use_fullframe)
    return v.get_subtitles()
 def save_subtitles_to_file(
        video_path: str, file_path='subtitle.srt', lang='eng',
-        time_start='0:00', time_end='', use_fullframe=False) -> None:
+        time_start='0:00', time_end='', conf_threshold=65,
        use_fullframe=False) -> None:
    with open(file_path, 'w+') as f:
        f.write(get_subtitles(
-            video_path, lang, time_start, time_end, use_fullframe))
+            video_path, lang, time_start, time_end, conf_threshold,
            use_fullframe))
--- a/videocr/models.py
+++ b/videocr/models.py
@ -17,7 +17,7 @@ class PredictedFrame:
    confidence: int  # total confidence of all words
    text: str
-    def __init__(self, index: int, pred_data: str, conf_threshold=70):
+    def __init__(self, index: int, pred_data: str, conf_threshold: int):
        self.index = index
        self.words = []
--- a/videocr/video.py
+++ b/videocr/video.py
@ -14,18 +14,22 @@ class Video:
    use_fullframe: bool
    num_frames: int
    fps: float
    height: int
    pred_frames: List[PredictedFrame]
    pred_subs: List[PredictedSubtitle]
    def __init__(self, path: str):
        self.path = path
        v = cv2.VideoCapture(path)
        if not v.isOpened():
            raise IOError('can not open video format {}'.format(path))
        self.num_frames = int(v.get(cv2.CAP_PROP_FRAME_COUNT))
        self.fps = v.get(cv2.CAP_PROP_FPS)
        self.height = int(v.get(cv2.CAP_PROP_FRAME_HEIGHT))
        v.release()
    def run_ocr(self, lang: str, time_start: str, time_end: str,
-                use_fullframe: bool) -> None:
+                conf_threshold: int, use_fullframe: bool) -> None:
        self.lang = lang
        self.use_fullframe = use_fullframe
@ -44,8 +48,9 @@ class Video:
        # perform ocr to frames in parallel
        with futures.ProcessPoolExecutor() as pool:
            ocr_map = pool.map(self._single_frame_ocr, frames, chunksize=10)
-            self.pred_frames = [PredictedFrame(i + ocr_start, data) 
+            self.pred_frames = [
-                                for i, data in enumerate(ocr_map)]
+                PredictedFrame(i + ocr_start, data, conf_threshold) 
                for i, data in enumerate(ocr_map)]
        v.release()