From 9b37319961afe28f360b44cafde061cd9832177c Mon Sep 17 00:00:00 2001 From: Yun Date: Fri, 16 Jul 2021 16:58:44 +0200 Subject: [PATCH] Update model to use PaddleOCR results --- videocr/models.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/videocr/models.py b/videocr/models.py index 121cb0f..d0bb090 100644 --- a/videocr/models.py +++ b/videocr/models.py @@ -17,25 +17,15 @@ class PredictedFrame: confidence: int # total confidence of all words text: str - def __init__(self, index: int, pred_data: str, conf_threshold: int): + def __init__(self, index: int, pred_data: list[list], conf_threshold: int): self.index = index self.words = [] - block = 0 # keep track of line breaks - - for l in pred_data.splitlines()[1:]: - word_data = l.split() - if len(word_data) < 12: - # no word is predicted + for l in pred_data: + if len(l) < 2: continue - _, _, block_num, *_, conf, text = word_data - block_num, conf = int(block_num), int(conf) - - # handle line breaks - if block < block_num: - block = block_num - if self.words and self.words[-1].text != '\n': - self.words.append(PredictedWord(0, '\n')) + text = l[1][0] + conf = int(l[1][1] * 100) # word predictions with low confidence will be filtered out if conf >= conf_threshold: