From efd72236249e706cbf68760944f29e61e40a6e48 Mon Sep 17 00:00:00 2001
From: Yi Ge <me@yige.ch>
Date: Mon, 29 Apr 2019 03:50:06 +0200
Subject: [PATCH] make sim_threshold adjustable through api

---
 videocr/api.py    | 11 ++++++-----
 videocr/models.py |  8 +++++---
 videocr/video.py  | 17 +++++++++--------
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/videocr/api.py b/videocr/api.py
index cd1c8d0..b182d43 100644
--- a/videocr/api.py
+++ b/videocr/api.py
@@ -5,8 +5,9 @@ from . import constants
 from .video import Video
 
 
-def get_subtitles(video_path: str, lang='eng', time_start='0:00', time_end='',
-                  conf_threshold=65, use_fullframe=False) -> str:
+def get_subtitles(
+        video_path: str, lang='eng', time_start='0:00', time_end='',
+        conf_threshold=65, sim_threshold=90, use_fullframe=False) -> str:
     # download tesseract data file to ~/tessdata if necessary
     fpath = constants.TESSDATA_DIR / '{}.traineddata'.format(lang)
     if not fpath.is_file():
@@ -20,14 +21,14 @@ def get_subtitles(video_path: str, lang='eng', time_start='0:00', time_end='',
 
     v = Video(video_path)
     v.run_ocr(lang, time_start, time_end, conf_threshold, use_fullframe)
-    return v.get_subtitles()
+    return v.get_subtitles(sim_threshold)
 
 
 def save_subtitles_to_file(
         video_path: str, file_path='subtitle.srt', lang='eng',
-        time_start='0:00', time_end='', conf_threshold=65,
+        time_start='0:00', time_end='', conf_threshold=65, sim_threshold=90,
         use_fullframe=False) -> None:
     with open(file_path, 'w+') as f:
         f.write(get_subtitles(
             video_path, lang, time_start, time_end, conf_threshold,
-            use_fullframe))
+            sim_threshold, use_fullframe))
diff --git a/videocr/models.py b/videocr/models.py
index 0dc4298..121cb0f 100644
--- a/videocr/models.py
+++ b/videocr/models.py
@@ -54,10 +54,12 @@ class PredictedFrame:
 
 class PredictedSubtitle:
     frames: List[PredictedFrame]
+    sim_threshold: int
     text: str
 
-    def __init__(self, frames: List[PredictedFrame]):
+    def __init__(self, frames: List[PredictedFrame], sim_threshold: int):
         self.frames = [f for f in frames if f.confidence > 0]
+        self.sim_threshold = sim_threshold
 
         if self.frames:
             self.text = max(self.frames, key=lambda f: f.confidence).text
@@ -76,8 +78,8 @@ class PredictedSubtitle:
             return self.frames[-1].index
         return 0
 
-    def is_similar_to(self, other: PredictedSubtitle, threshold=90) -> bool:
-        return fuzz.partial_ratio(self.text, other.text) >= threshold
+    def is_similar_to(self, other: PredictedSubtitle) -> bool:
+        return fuzz.partial_ratio(self.text, other.text) >= self.sim_threshold
 
     def __repr__(self):
         return '{} - {}. {}'.format(self.index_start, self.index_end, self.text)
diff --git a/videocr/video.py b/videocr/video.py
index 493a53c..6f68eba 100644
--- a/videocr/video.py
+++ b/videocr/video.py
@@ -29,7 +29,7 @@ class Video:
         v.release()
 
     def run_ocr(self, lang: str, time_start: str, time_end: str,
-                conf_threshold: int, use_fullframe: bool) -> None:
+                conf_threshold:int, use_fullframe: bool) -> None:
         self.lang = lang
         self.use_fullframe = use_fullframe
 
@@ -80,8 +80,8 @@ class Video:
         config = '--tessdata-dir "{}"'.format(constants.TESSDATA_DIR)
         return pytesseract.image_to_data(img, lang=self.lang, config=config)
 
-    def get_subtitles(self) -> str:
-        self._generate_subtitles()
+    def get_subtitles(self, sim_threshold: int) -> str:
+        self._generate_subtitles(sim_threshold)
         return ''.join(
             '{}\n{} --> {}\n{}\n\n'.format(
                 i,
@@ -90,7 +90,7 @@ class Video:
                 sub.text)
             for i, sub in enumerate(self.pred_subs))
 
-    def _generate_subtitles(self) -> None:
+    def _generate_subtitles(self, sim_threshold: int) -> None:
         self.pred_subs = []
 
         if self.pred_frames is None:
@@ -112,8 +112,8 @@ class Video:
             else:
                 # divide subtitle paragraphs
                 para_new = j - WIN_BOUND
-                self._append_sub(
-                    PredictedSubtitle(self.pred_frames[i:para_new]))
+                self._append_sub(PredictedSubtitle(
+                    self.pred_frames[i:para_new], sim_threshold))
                 i = para_new
                 j = i
                 bound = WIN_BOUND
@@ -122,7 +122,8 @@ class Video:
 
         # also handle the last remaining frames
         if i < len(self.pred_frames) - 1:
-            self._append_sub(PredictedSubtitle(self.pred_frames[i:]))
+            self._append_sub(PredictedSubtitle(
+                self.pred_frames[i:], sim_threshold))
 
     def _append_sub(self, sub: PredictedSubtitle) -> None:
         if len(sub.text) == 0:
@@ -132,7 +133,7 @@ class Video:
         while self.pred_subs and sub.is_similar_to(self.pred_subs[-1]):
             ls = self.pred_subs[-1]
             del self.pred_subs[-1]
-            sub = PredictedSubtitle(ls.frames + sub.frames)
+            sub = PredictedSubtitle(ls.frames + sub.frames, sub.sim_threshold)
 
         self.pred_subs.append(sub)