from __future__ import annotations from typing import List from dataclasses import dataclass from fuzzywuzzy import fuzz @dataclass class PredictedWord: __slots__ = 'confidence', 'text' confidence: int text: str class PredictedFrame: index: int # 0-based index of the frame words: List[PredictedWord] confidence: int # total confidence of all words text: str def __init__(self, index: int, pred_data: list[list], conf_threshold: int): self.index = index self.words = [] for l in pred_data: if len(l) < 2: continue text = l[1][0] conf = int(l[1][1] * 100) # word predictions with low confidence will be filtered out if conf >= conf_threshold: self.words.append(PredictedWord(conf, text)) self.confidence = sum(word.confidence for word in self.words) self.text = ' '.join(word.text for word in self.words) # remove chars that are obviously ocr errors table = str.maketrans('|', 'I', '<>{}[];`@#$%^*_=~\\') self.text = self.text.translate(table).replace(' \n ', '\n').strip() def is_similar_to(self, other: PredictedFrame, threshold=70) -> bool: return fuzz.ratio(self.text, other.text) >= threshold class PredictedSubtitle: frames: List[PredictedFrame] sim_threshold: int text: str def __init__(self, frames: List[PredictedFrame], sim_threshold: int): self.frames = [f for f in frames if f.confidence > 0] self.sim_threshold = sim_threshold if self.frames: self.text = max(self.frames, key=lambda f: f.confidence).text else: self.text = '' @property def index_start(self) -> int: if self.frames: return self.frames[0].index return 0 @property def index_end(self) -> int: if self.frames: return self.frames[-1].index return 0 def is_similar_to(self, other: PredictedSubtitle) -> bool: return fuzz.partial_ratio(self.text, other.text) >= self.sim_threshold def __repr__(self): return '{} - {}. {}'.format(self.index_start, self.index_end, self.text)