Upgrade Tesseract to 5.0

This commit is contained in:
Adrien Bourmault 2019-08-20 18:38:22 +02:00 committed by GitHub
parent 9423e316eb
commit 2db1c28e24
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 21 additions and 16 deletions

View File

@ -54,8 +54,8 @@ def main():
mainw = mainWindow()
try:
os.environ['PATH'] = globs.CNIRFolder + '\\Tesseract-OCR4\\'
os.environ['TESSDATA_PREFIX'] = globs.CNIRFolder + '\\Tesseract-OCR4\\tessdata'
os.environ['PATH'] = globs.CNIRFolder + '\\Tesseract-OCR5\\'
os.environ['TESSDATA_PREFIX'] = globs.CNIRFolder + '\\Tesseract-OCR5\\tessdata'
tesser_version = pytesseract.get_tesseract_version()
except Exception as e:
logfile.printerr('ERROR WITH TESSERACT MODULE ' + str(e))

View File

@ -44,11 +44,11 @@ def crashCNIR(shutdown=True):
root.withdraw()
logfile = logger.logCur
logfile.printerr("FATAL ERROR : see traceback below.\n{}".format(traceback.format_exc()))
showerror(lang.all[globs.CNIRlang]["CNIRevelator Fatal Eror"], lang.all[globs.CNIRlang]["CNIRevelator crashed because a fatal error occured. View log for more infos and please open an issue on Github"], parent=root)
res = askquestion(lang.all[globs.CNIRlang]["CNIRevelator Fatal Eror"], lang.all[globs.CNIRlang]["Would you like to open the log file ?"], parent=root)
showerror(lang.all[globs.CNIRlang]["CNIRevelator Fatal Eror"], lang.all[globs.CNIRlang]["CNIRevelator crashed because a fatal error occured. View log for more infos and please open an issue on Github"])
res = askquestion(lang.all[globs.CNIRlang]["CNIRevelator Fatal Eror"], lang.all[globs.CNIRlang]["Would you like to open the log file ?"])
if res == "yes":
webbrowser.open_new(globs.CNIRErrLog)
res = askquestion(lang.all[globs.CNIRlang]["CNIRevelator Fatal Eror"], lang.all[globs.CNIRlang]["Would you like to open an issue on Github to report this bug ?"], parent=root)
res = askquestion(lang.all[globs.CNIRlang]["CNIRevelator Fatal Eror"], lang.all[globs.CNIRlang]["Would you like to open an issue on Github to report this bug ?"])
if res == "yes":
webbrowser.open_new("https://github.com/neox95/CNIRevelator/issues")
root.destroy()

View File

@ -26,7 +26,7 @@ import os
# CNIRevelator version
verType = "final release"
version = [3, 1, 1]
version = [3, 1, 2]
verstring_full = "{}.{}.{} {}".format(version[0], version[1], version[2], verType)
verstring = "{}.{}".format(version[0], version[1])
@ -38,7 +38,7 @@ CNIRCryptoKey = '82Xh!efX3#@P~2eG'
CNIRNewVersion = False
CNIRConfig = CNIRFolder + '\\config\\conf.ig'
CNIRTesser = CNIRFolder + '\\Tesseract-OCR4\\'
CNIRTesser = CNIRFolder + '\\Tesseract-OCR5\\'
CNIRErrLog = CNIRFolder + '\\logs\\error.log'
CNIRMainLog = CNIRFolder + '\\logs\\main.log'
CNIRUrlConfig = CNIRFolder + '\\config\\urlconf.ig'
@ -56,7 +56,7 @@ CNIRLangFile = CNIRFolder + '\\config\\lang.ig'
CNIRlang = "fr"
CNIRConfig = CNIRFolder + '\\config\\conf.ig'
CNIRTesser = CNIRFolder + '\\Tesseract-OCR4\\'
CNIRTesser = CNIRFolder + '\\Tesseract-OCR5\\'
CNIRErrLog = CNIRFolder + '\\logs\\error.log'
CNIRMainLog = CNIRFolder + '\\logs\\main.log'
CNIRUrlConfig = CNIRFolder + '\\config\\urlconf.ig'

View File

@ -36,7 +36,6 @@ import critical # critical.py
import logger # logger.py
import globs # globs.py
import lang # lang.py
import updater # updater.py
import critical # critical.py

View File

@ -138,7 +138,8 @@ french = \
"Coller :\t\t\t\tCtrl-V \n"
"Forcer une nouvelle détection du document :\tEchap\n",
"CHANGELOG" : "Version 3.1.1 \nMise-à-jour mineure avec les progressions suivantes :\n- Correction d'un bug sévère du système de mise à jour\n\n" + \
"CHANGELOG" : "Version 3.1.2 \nMise-à-jour mineure avec les progressions suivantes :\n- Montée de version de Tesseract OCR : 5.0\n\n" + \
"Version 3.1.1 \nMise-à-jour mineure avec les progressions suivantes :\n- Correction d'un bug sévère du système de mise à jour\n\n" + \
"Version 3.1.0 \nMise-à-jour majeure avec les progressions suivantes :\n- Modifications cosmétiques de l'interface utilisateur\n- Stabilisation des changements effectués sur la version mineure 3.0 : interface utilisateur, OCR, VISA A et B, logging\n- Rationalisation du système de langues\n- Ajout des canaux de mise-à-jour\n\n" + \
"Version 3.0.8 finale\nCorrectif : bug du système de mise-à-jour'\n\n" + \
"Version 3.0.6 \nMise-à-jour mineure avec les corrections suivantes :\n- Changement de l'apparence du launcher de l'application\n- Améliorations de l'interface, notamment de la stabilité\n- Ajout de la signature numérique de l'exécutable\n\n" + \
@ -189,6 +190,8 @@ french = \
"Pièce d'identité/de voyage" : "Pièce d'identité/de voyage",
"Pièce d'identité FR" : "Pièce d'identité FR",
"Permis de conduire" : "Permis de conduire",
"The file you provided is not "
"valid : {}" : "Le fichier transmis n'est pas valide : {}",
"LANDCODE2" : {
'AW': 'Aruba',
@ -827,7 +830,8 @@ english = \
"Paste:\t\t\t\tCtrl-V\n"
"Force a new document detection:\tEchap\n",
"CHANGELOG" : "Version 3.1.1 \nMinor update with the following progressions: \n- Fixed a severe bug in the update system" + \
"CHANGELOG" : "Version 3.1.2 \nMinor update with the following progressions:\n- Upgrade Tesseract OCR to 5.0\n\n" + \
"Version 3.1.1 \nMinor update with the following progressions: \n- Fixed a severe bug in the update system\n\n" + \
"Version 3.1.0 \nMajor update with the following progressions: \n- Cosmetic modifications of the user interface \n- Stabilization of the changes made on the minor version 3.0 : user interface, OCR, VISA A and B, logging\n- Rationalization of the language system\n- Added update channels\n\n" + \
"Version 3.0.8 final\nCorrection: bug in the update system'\n\n" + \
"Version 3.0.6 \nMinor update with the following fixes:\n- Change in the appearance of the application launcher\n- Improvements to the interface, including stability\n- Added digital signature of the executable\n" + \
@ -879,6 +883,8 @@ english = \
"Pièce d'identité/de voyage" : "Identity/travel document",
"Pièce d'identité FR" : "French Identity card",
"Permis de conduire" : "Driver License",
"The file you provided is not "
"valid : {}" : "The file you provided is not valid : {}",
"LANDCODE2" : {
"AW": "Aruba",

View File

@ -451,7 +451,7 @@ class mainWindow(Tk):
os.environ['PATH'] = globs.CNIRTesser
os.environ['TESSDATA_PREFIX'] = globs.CNIRTesser + '\\tessdata'
text = pytesseract.image_to_string(crop_img, lang='ocrb', boxes=False, config='--psm 6 --oem 0 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890<')
text = pytesseract.image_to_string(crop_img, lang='ocrb', config='--psm 6 --oem 1 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890<')
# manual validation
# the regex

View File

@ -171,18 +171,18 @@ def tessInstall(PATH, credentials):
# Verifying that Tesseract is installed
if not os.path.exists(PATH + '\\Tesseract-OCR4\\'):
if not os.path.exists(PATH + '\\Tesseract-OCR5\\'):
finalver, finalurl, finalchecksum = getLatestVersion(credentials)
if finalurl == None:
logfile.printerr('Unable to get the Tesseract url')
return False
tesseracturl = finalurl.replace("CNIRevelator.zip", "tesseract_4.zip")
tesseracturl = finalurl.replace("CNIRevelator.zip", "tesseract_5.zip")
# WE ASSUME THAT THE MAIN FILE IS CNIRevelator.zip AND THAT THE TESSERACT PACKAGE IS tesseract_4.zip
# WE ASSUME THAT THE MAIN FILE IS CNIRevelator.zip AND THAT THE TESSERACT PACKAGE IS tesseract_5.zip
logfile.printdbg('Preparing download of Tesseract OCR 4...')
getTesseract = downloader.newdownload(credentials, tesseracturl, PATH + '\\downloads\\TsrtPackage.zip', "Tesseract 4 OCR Module").download()
getTesseract = downloader.newdownload(credentials, tesseracturl, PATH + '\\downloads\\TsrtPackage.zip', "Tesseract 5 OCR Module").download()
try:
# CHECKSUM