@ -18,6 +18,8 @@ class Video:
num_frames : int
fps : float
height : int
width : int
resize_dim : List [ int ]
pred_frames : List [ PredictedFrame ]
pred_subs : List [ PredictedSubtitle ]
@ -27,6 +29,9 @@ class Video:
self . num_frames = int ( v . get ( cv2 . CAP_PROP_FRAME_COUNT ) )
self . fps = v . get ( cv2 . CAP_PROP_FPS )
self . height = int ( v . get ( cv2 . CAP_PROP_FRAME_HEIGHT ) )
self . width = int ( v . get ( cv2 . CAP_PROP_FRAME_WIDTH ) )
scale_percent = 47 # apparently 32 pixels is the optimal character height for tesseract.
self . resize_dim = ( int ( self . width * scale_percent / 100 ) , int ( self . height * scale_percent / 100 ) )
def run_ocr ( self , lang : str , time_start : str , time_end : str ,
conf_threshold : int , use_fullframe : bool ) - > None :
@ -56,8 +61,16 @@ class Video:
if not self . use_fullframe :
# only use bottom half of the frame by default
img = img [ self . height / / 2 : , : ]
img = cv2 . bitwise_not ( cv2 . bitwise_and ( img , img , mask = cv2 . inRange ( img , ( 190 , 190 , 190 ) , ( 255 , 255 , 255 ) ) ) )
config = ' --tessdata-dir " {} " ' . format ( constants . TESSDATA_DIR )
# dilate and resize
img = cv2 . resize ( cv2 . dilate ( img , np . ones ( ( 2 , 2 ) , np . uint8 ) ) , self . resize_dim , interpolation = cv2 . INTER_AREA )
# mask to filter out non gray-like pixels/pixels that are not bright enough
hsv = cv2 . cvtColor ( img , cv2 . COLOR_BGR2HSV )
color_mask = cv2 . inRange ( hsv , ( 0 , 0 , 190 ) , ( 179 , 20 , 255 ) )
# apply mask, inverse image so it's black text on white background, add borders to top and bottom
img = cv2 . copyMakeBorder ( cv2 . bitwise_not ( cv2 . bitwise_and ( img , img , mask = color_mask ) ) , 10 , 10 , 0 , 0 , cv2 . BORDER_CONSTANT , None , ( 255 , 255 , 255 )
config = ' --tessdata-dir " {} " --psm 7 -c preserve_interword_spaces=1 ' . format ( constants . TESSDATA_DIR )
try :
return pytesseract . image_to_data ( img , lang = self . lang , config = config )
except Exception as e :