Replace tesseract with PaddleOCR #5
@ -17,25 +17,15 @@ class PredictedFrame:
|
|||||||
confidence: int # total confidence of all words
|
confidence: int # total confidence of all words
|
||||||
text: str
|
text: str
|
||||||
|
|
||||||
def __init__(self, index: int, pred_data: str, conf_threshold: int):
|
def __init__(self, index: int, pred_data: list[list], conf_threshold: int):
|
||||||
self.index = index
|
self.index = index
|
||||||
self.words = []
|
self.words = []
|
||||||
|
|
||||||
block = 0 # keep track of line breaks
|
for l in pred_data:
|
||||||
|
if len(l) < 2:
|
||||||
for l in pred_data.splitlines()[1:]:
|
|
||||||
word_data = l.split()
|
|
||||||
if len(word_data) < 12:
|
|
||||||
# no word is predicted
|
|
||||||
continue
|
continue
|
||||||
_, _, block_num, *_, conf, text = word_data
|
text = l[1][0]
|
||||||
block_num, conf = int(block_num), int(conf)
|
conf = int(l[1][1] * 100)
|
||||||
|
|
||||||
# handle line breaks
|
|
||||||
if block < block_num:
|
|
||||||
block = block_num
|
|
||||||
if self.words and self.words[-1].text != '\n':
|
|
||||||
self.words.append(PredictedWord(0, '\n'))
|
|
||||||
|
|
||||||
# word predictions with low confidence will be filtered out
|
# word predictions with low confidence will be filtered out
|
||||||
if conf >= conf_threshold:
|
if conf >= conf_threshold:
|
||||||
|
Loading…
Reference in New Issue
Block a user