forked from pradana.aumars/videocr
adjust text similarity metrics
This commit is contained in:
parent
a3986b3279
commit
3f73cb9bca
@ -48,12 +48,12 @@ class PredictedFrame:
|
|||||||
|
|
||||||
self.text = ' '.join(word.text for word in self.words)
|
self.text = ' '.join(word.text for word in self.words)
|
||||||
# remove chars that are obviously ocr errors
|
# remove chars that are obviously ocr errors
|
||||||
translate_table = {ord(c): None for c in '<>{};`@#$%^*_=\\'}
|
translate_table = {ord(c): None for c in '<>{}[];`@#$%^*_=~\\'}
|
||||||
translate_table[ord('|')] = 'I'
|
translate_table[ord('|')] = 'I'
|
||||||
self.text = self.text.translate(translate_table).strip()
|
self.text = self.text.translate(translate_table).strip()
|
||||||
|
|
||||||
def is_similar_to(self, other: PredictedFrame, threshold=70) -> bool:
|
def is_similar_to(self, other: PredictedFrame, threshold=70) -> bool:
|
||||||
return fuzz.partial_ratio(self.text, other.text) >= threshold
|
return fuzz.ratio(self.text, other.text) >= threshold
|
||||||
|
|
||||||
|
|
||||||
class PredictedSubtitle:
|
class PredictedSubtitle:
|
||||||
@ -81,7 +81,7 @@ class PredictedSubtitle:
|
|||||||
return self.frames[-1].index
|
return self.frames[-1].index
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def is_similar_to(self, other: PredictedSubtitle, threshold=70) -> bool:
|
def is_similar_to(self, other: PredictedSubtitle, threshold=90) -> bool:
|
||||||
return fuzz.partial_ratio(self.text, other.text) >= threshold
|
return fuzz.partial_ratio(self.text, other.text) >= threshold
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user