From 63873af476c8a732abae625f9464931cf23d2e59 Mon Sep 17 00:00:00 2001
From: Yi Ge <me@yige.ch>
Date: Wed, 24 Apr 2019 21:18:31 +0200
Subject: [PATCH] add Video class

---
 videocr/models.py | 36 ++++++++++++++++++------------------
 videocr/video.py  | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 18 deletions(-)
 create mode 100644 videocr/video.py

diff --git a/videocr/models.py b/videocr/models.py
index 16c1137..3ce8af6 100644
--- a/videocr/models.py
+++ b/videocr/models.py
@@ -3,8 +3,8 @@ from typing import List
 from dataclasses import dataclass
 
 
-CONFIDENCE_THRESHOLD = 60
-# predictions with lower confidence will be filtered out
+CONF_THRESHOLD = 60
+# word predictions with lower confidence will be filtered out
 
 
 @dataclass
@@ -15,33 +15,33 @@ class PredictedWord:
 
 
 class PredictedFrame:
+    index: int  # 0-based index of the frame
     words: List[PredictedWord]
+    confidence: int  # total confidence of all words
+    text: str
 
-    def __init__(self, pred_data: str):
+    def __init__(self, index, pred_data: str):
+        self.index = index
         self.words = []
 
-        block_current = 1
-        for line in pred_data.split('\n')[1:]:
-            tmp = line.split()
-            if len(tmp) < 12:
+        block = 0  # keep track of line breaks
+
+        for l in pred_data.splitlines()[1:]:
+            word_data = l.split()
+            if len(word_data) < 12:
                 # no word is predicted
                 continue
-            _, _, block_num, *_, conf, text = tmp
+            _, _, block_num, *_, conf, text = word_data
             block_num, conf = int(block_num), int(conf)
 
             # handle line breaks
-            if block_current < block_num:
-                block_current = block_num
+            if block < block_num:
+                block = block_num
                 self.words.append(PredictedWord(0, '\n'))
 
-            if conf >= CONFIDENCE_THRESHOLD:
+            if conf >= CONF_THRESHOLD:
                 self.words.append(PredictedWord(conf, text))
 
-    @property
-    def confidence(self) -> int:
-        return sum(word.confidence for word in self.words)
-
-    @property
-    def text(self) -> str:
-        return ''.join(word.text + ' ' for word in self.words)
+        self.confidence = sum(word.confidence for word in self.words)
+        self.text = ''.join(word.text + ' ' for word in self.words).strip()
 
diff --git a/videocr/video.py b/videocr/video.py
new file mode 100644
index 0000000..a91fb80
--- /dev/null
+++ b/videocr/video.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+from concurrent import futures
+import pytesseract
+import cv2
+import timeit
+
+from .models import PredictedFrame
+
+
+class Video:
+    path: str
+    lang: str
+    num_frames: int
+
+    def __init__(self, path, lang):
+        self.path = path
+        self.lang = lang
+        v = cv2.VideoCapture(path)
+        self.num_frames = int(v.get(cv2.CAP_PROP_FRAME_COUNT))
+        v.release()
+
+    def _frame_ocr(self, img):
+        data = pytesseract.image_to_data(img, lang=self.lang)
+        return data
+
+    def run_ocr(self):
+        v = cv2.VideoCapture(self.path)
+        print(self.num_frames)
+        frames = (v.read()[1] for _ in range(40))
+
+        with futures.ProcessPoolExecutor() as pool:
+            frames_ocr = pool.map(self._frame_ocr, frames, chunksize=1)
+            for i, data in enumerate(frames_ocr):
+                pred = PredictedFrame(i, data)
+                print(pred.text)
+
+        v.release()
+
+
+time_start = timeit.default_timer()
+v = Video('1.mp4', 'HanS')
+v.run_ocr()
+time_stop = timeit.default_timer()
+print(time_stop - time_start)