From 9b37319961afe28f360b44cafde061cd9832177c Mon Sep 17 00:00:00 2001
From: Yun <mrqianhuzi@gmail.com>
Date: Fri, 16 Jul 2021 16:58:44 +0200
Subject: [PATCH] Update model to use PaddleOCR results

---
 videocr/models.py | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/videocr/models.py b/videocr/models.py
index 121cb0f..d0bb090 100644
--- a/videocr/models.py
+++ b/videocr/models.py
@@ -17,25 +17,15 @@ class PredictedFrame:
     confidence: int  # total confidence of all words
     text: str
 
-    def __init__(self, index: int, pred_data: str, conf_threshold: int):
+    def __init__(self, index: int, pred_data: list[list], conf_threshold: int):
         self.index = index
         self.words = []
 
-        block = 0  # keep track of line breaks
-
-        for l in pred_data.splitlines()[1:]:
-            word_data = l.split()
-            if len(word_data) < 12:
-                # no word is predicted
+        for l in pred_data:
+            if len(l) < 2:
                 continue
-            _, _, block_num, *_, conf, text = word_data
-            block_num, conf = int(block_num), int(conf)
-
-            # handle line breaks
-            if block < block_num:
-                block = block_num
-                if self.words and self.words[-1].text != '\n':
-                    self.words.append(PredictedWord(0, '\n'))
+            text = l[1][0]
+            conf = int(l[1][1] * 100)
 
             # word predictions with low confidence will be filtered out
             if conf >= conf_threshold: