From 3f73cb9bcafbd639ef5791a846b861d633cdb9dc Mon Sep 17 00:00:00 2001 From: Yi Ge Date: Sat, 27 Apr 2019 03:18:27 +0200 Subject: [PATCH] adjust text similarity metrics --- videocr/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/videocr/models.py b/videocr/models.py index 31259c4..789ece1 100644 --- a/videocr/models.py +++ b/videocr/models.py @@ -48,12 +48,12 @@ class PredictedFrame: self.text = ' '.join(word.text for word in self.words) # remove chars that are obviously ocr errors - translate_table = {ord(c): None for c in '<>{};`@#$%^*_=\\'} + translate_table = {ord(c): None for c in '<>{}[];`@#$%^*_=~\\'} translate_table[ord('|')] = 'I' self.text = self.text.translate(translate_table).strip() def is_similar_to(self, other: PredictedFrame, threshold=70) -> bool: - return fuzz.partial_ratio(self.text, other.text) >= threshold + return fuzz.ratio(self.text, other.text) >= threshold class PredictedSubtitle: @@ -81,7 +81,7 @@ class PredictedSubtitle: return self.frames[-1].index return 0 - def is_similar_to(self, other: PredictedSubtitle, threshold=70) -> bool: + def is_similar_to(self, other: PredictedSubtitle, threshold=90) -> bool: return fuzz.partial_ratio(self.text, other.text) >= threshold def __repr__(self):