divide ocr of frames into subtitle paragraphs

2019-04-25 01:40:46 +02:00 · 2019-04-25 01:40:46 +02:00 · 0d86e14fbc
parent 0e932936a1
commit 0d86e14fbc
1 changed files with 45 additions and 9 deletions
--- a/videocr/video.py
+++ b/videocr/video.py
@ -4,13 +4,17 @@ import pytesseract
 import cv2
 import timeit
-from .models import PredictedFrame
+from .models import PredictedFrame, PredictedSubtitle
 SUBTITLE_BOUND = 10
 class Video:
    path: str
    lang: str
    num_frames: int
    pred_frames: List[PredictedFrame]
    def __init__(self, path, lang):
        self.path = path
@ -19,26 +23,58 @@ class Video:
        self.num_frames = int(v.get(cv2.CAP_PROP_FRAME_COUNT))
        v.release()
-    def _frame_ocr(self, img):
+    def _single_frame_ocr(self, img) -> str:
        img = img[img.shape[0] // 2:, :]  # only use bottom half of the frame
        data = pytesseract.image_to_data(img, lang=self.lang)
        return data
-    def run_ocr(self):
+    def run_ocr(self) -> None:
        v = cv2.VideoCapture(self.path)
-        print(self.num_frames)
+        frames = (v.read()[1] for _ in range(self.num_frames))
        frames = (v.read()[1] for _ in range(40))
        # perform ocr to all frames in parallel
        with futures.ProcessPoolExecutor() as pool:
-            frames_ocr = pool.map(self._frame_ocr, frames, chunksize=1)
+            frames_ocr = pool.map(self._single_frame_ocr, frames, chunksize=10)
-            for i, data in enumerate(frames_ocr):
+            self.pred_frames = [PredictedFrame(i, data) 
-                pred = PredictedFrame(i, data)
+                                for i, data in enumerate(frames_ocr)]
                print(pred.text)
        v.release()
    def get_subtitles(self) -> str:
        if self.pred_frames is None:
            raise AttributeError(
                'Please call self.run_ocr() first to generate ocr of frames')
        # divide ocr of frames into subtitle paragraphs using sliding window
        i = 0
        j = 1
        bound = SUBTITLE_BOUND
        while j < self.num_frames:
            fi, fj = self.pred_frames[i], self.pred_frames[j]
            if fi.is_similar_to(fj):
                bound = SUBTITLE_BOUND
            elif bound > 0:
                bound -= 1
            else:
                # divide subtitle paragraphs
                para_new = j - SUBTITLE_BOUND
                print(PredictedSubtitle(self.pred_frames[i:para_new]).text)
                i = para_new
                j = i
                bound = SUBTITLE_BOUND
            j += 1
        if i < self.num_frames - 1:
            print(PredictedSubtitle(self.pred_frames[i:]).text)
        return ''
 time_start = timeit.default_timer()
 v = Video('1.mp4', 'HanS')
 v.run_ocr()
 v.get_subtitles()
 time_stop = timeit.default_timer()
 print(time_stop - time_start)