Working on MRZ analyzer

This commit is contained in:
Adrien Bourmault 2019-07-17 17:12:21 +02:00 committed by GitHub
parent 49777a9caa
commit 5b7e4f65e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 233 additions and 148 deletions

View File

@ -31,6 +31,7 @@ from tkinter import filedialog
from tkinter import ttk
import threading
from datetime import datetime
import re
import logger # logger.py
import mrz # mrz.py
@ -141,11 +142,8 @@ class mainWindow(Tk):
self.termframe.grid(column=0, row=0, sticky='EW')
self.termframe.grid_columnconfigure(0, weight=1)
self.termframe.grid_rowconfigure(0, weight=1)
self.termtext = Text((self.termframe), state='disabled', width=60, height=4, wrap='none', font='Terminal 17', fg='#121f38')
self.termtext = Text((self.termframe), state='normal', width=60, height=4, wrap='none', font='Terminal 17', fg='#121f38')
self.termtext.grid(column=0, row=0, sticky='NEW', padx=5)
vcmd = (self.register(self.entryValidation), '%S', '%P', '%d')
self.termentry = Entry((self.termframe), font='Terminal 17', validate='all', validatecommand=vcmd, fg='#121f38', width=44)
self.termentry.grid(column=0, row=0, sticky='SEW', padx=5)
# The monitor that indicates some useful infos
self.monitor = ttk.Labelframe(self, text='Moniteur')
@ -198,26 +196,29 @@ class mainWindow(Tk):
self.geometry('%dx%d+%d+%d' % (w, h, x, y))
# Some bindings
self.termentry.bind('<Return>', self.preentryValidation)
self.termtext.bind('<Return>', self.preentryValidation)
self.termentry.bind('<Escape>', self.onTabPressed)
self.termtext.bind('<Key>', self.entryValidation)
self.update()
logfile.printdbg('mainWindow() : Initialization successful')
def preentryValidation(self, event):
"""
Function that detects the kind of identity document this is
"""
logfile.printdbg('Initialization successful')
def onTabPressed(self, event):
return 'break'
def entryValidation(self, char, entry_value, typemod):
def entryValidation(self, event):
"""
On the fly validation with regex
"""
currentText = self.termtext.get("1.0", "end")
currentText = (currentText.upper()[:-1]).replace(" ", "<")
# XXX : we must recreate a proper on the fly validation with regex !
regex = re.compile("([A-Z]|[0-9]|<)*")
while not regex.fullmatch(currentText):
currentText = currentText[:-1]
return isValid
self.termtext.delete("1.0", "end")
self.termtext.insert("1.0", currentText)
print(currentText)
def logOnTerm(self, text):
self.monlog['state'] = 'normal'
@ -226,7 +227,7 @@ class mainWindow(Tk):
self.monlog.yview(END)
def openingScan(self):
pass
# OPEN A SCAN
def newEntry(self):
@ -261,12 +262,11 @@ class mainWindow(Tk):
parent=self)
def calculSigma(self, MRZtxt, numtype):
pass
# CALCUL DE TOUTES LES SOMMES DE LA CARTE CONFORMEMENT A SON TYPE
class OpenScan(ttk.Frame):
def __init__(self, mainframe, fileorig, type, nframe=1, pagenum=0, file=None):
""" Initialize the main Frame """
if file == None:

View File

@ -24,6 +24,9 @@
********************************************************************************
"""
import re
import logger # logger.py
## SEX CODES
sexcode = {'M':'Homme', 'F':'Femme', 'X':'Non spécifié'}
@ -552,192 +555,274 @@ landcode3 = {
}
## DOCUMENTS TYPES
P = [
"11222333333333333333333333333333333333333333|444444444566677777789AAAAAABCCCCCCCCCCCCCCDE",
["11222333333333333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCCCCCCCCDE"],
{
"1": "2|CODE|P*",
"2": "3|PAYS|AAA",
"3": "39|NOM|&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&",
"4": "9|NO|*********",
"5": "1|CTRL|4",
"6": "3|NAT|AAA",
"7": "6|BDATE|000000",
"8": "1|CTRL|7",
"9": "1|SEX|A",
"A": "6|EDATE|000000",
"B": "1|CTRL|A",
"C": "14|FACULT|**************",
"D": "1|CTRLF|C",
"E": "1|CTRL|4578ABCD"
"1": ["2", "CODE", "P."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["39", "NOM", "([A-Z]|<)+"],
"4": ["9", "NO", ".+"],
"5": ["1", "CTRL", "[0-9]", "4"],
"6": ["3", "NAT", "[A-Z]+"],
"7": ["6", "BDATE", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "7"],
"9": ["1", "SEX", "[A-Z]"],
"A": ["6", "EDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["14", "FACULT", ".+"],
"D": ["1", "CTRLF", "[0-9]", "C"],
"E": ["1", "CTRL", "[0-9]", "4578ABCD"]
},
"Passeport"
]
IP = [
"112223333333334555555555555555|66666678999999ABBBCCCCCCCCCCCD",
["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
{
"1": "2|CODE|IP",
"2": "3|PAYS|AAA",
"3": "9|NO|*********",
"4": "1|CTRL|3",
"5": "15|FACULT|***************",
"6": "6|BDATE|000000",
"7": "1|CTRL|6",
"8": "1|SEX|A",
"9": "6|EDATE|000000",
"A": "1|CTRL|9",
"B": "3|NAT|AAA",
"C": "11|FACULT|***********",
"D": "1|CTRL|345679AC"
"1": ["2", "CODE", "IP"],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["9", "NO", ".+"],
"4": ["1", "CTRL", "[0-9]", "3"],
"5": ["15", "FACULT", ".+"],
"6": ["6", "BDATE", "[0-9]+"],
"7": ["1", "CTRL", "[0-9]", "6"],
"8": ["1", "SEX", "[A-Z]"],
"9": ["6", "EDATE", "[0-9]+"],
"A": ["1", "CTRL", "[0-9]", "9"],
"B": ["3", "NAT", "[A-Z]+"],
"C": ["11", "FACULT", ".+"],
"D": ["1", "CTRL", "[0-9]", "345679AC"]
},
"Carte-passeport"
]
I_ = [
"112223333333334555555555555555|66666678999999ABBBCCCCCCCCCCCD",
["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
{
"1": "2|CODE|I*",
"2": "3|PAYS|AAA",
"3": "9|NO|*********",
"4": "1|CTRL|3",
"5": "15|FACULT|***************",
"6": "6|BDATE|000000",
"7": "1|CTRL|6",
"8": "1|SEX|A",
"9": "6|EDATE|000000",
"A": "1|CTRL|9",
"B": "3|NAT|AAA",
"C": "11|FACULT|***********",
"D": "1|CTRL|345679AC"
"1": ["2", "CODE", "I."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["9", "NO", ".+"],
"4": ["1", "CTRL", "[0-9]", "3"],
"5": ["15", "FACULT", ".+"],
"6": ["6", "BDATE", "[0-9]+"],
"7": ["1", "CTRL", "[0-9]", "6"],
"8": ["1", "SEX", "[A-Z]"],
"9": ["6", "EDATE", "[0-9]+"],
"A": ["1", "CTRL", "[0-9]", "9"],
"B": ["3", "NAT", "[A-Z]+"],
"C": ["11", "FACULT", ".+"],
"D": ["1", "CTRL", "[0-9]", "345679AC"]
},
"Titre d'identité/de voyage"
]
AC = [
"112223333333334EEE555555555555|66666678999999ABBBCCCCCCCCCCCD",
["112223333333334EEE555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
{
"1": "2|CODE|AC",
"2": "3|PAYS|AAA",
"3": "9|NO|*********",
"4": "1|CTRL|3",
"5": "15|FACULT|***************",
"6": "6|BDATE|000000",
"7": "1|CTRL|6",
"8": "1|SEX|A",
"9": "6|EDATE|000000",
"A": "1|CTRL|9",
"B": "3|NAT|AAA",
"C": "11|FACULT|***********",
"D": "1|CTRL|345679AC",
"E": "3|INDIC|AA&"
"1": ["2", "CODE", "AC"],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["9", "NO", ".+"],
"4": ["1", "CTRL", "[0-9]", "3"],
"E": ["3", "INDIC", "[A-Z]{1,2}."],
"5": ["12", "FACULT", ".+"],
"6": ["6", "BDATE", "[0-9]+ "],
"7": ["1", "CTRL", "[0-9]", "6"],
"8": ["1", "SEX", "[A-Z]"],
"9": ["6", "EDATE", "[0-9]+"],
"A": ["1", "CTRL", "[0-9]", "9"],
"B": ["3", "NAT", "[A-Z]+"],
"C": ["11", "FACULT", ".+"],
"D": ["1", "CTRL", "[0-9]","345679AC"]
},
"Certificat de membre d'équipage"
]
VA = [
"11222333333333333333333333333333333333333333|444444444566677777789AAAAAABCCCCCCCCCCCCCCDE",
{
"1": "2|CODE|V*",
"2": "3|PAYS|AAA",
"3": "39|NOM|&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&",
"4": "9|NO|*********",
"5": "1|CTRL|4",
"6": "3|NAT|AAA",
"7": "6|BDATE|000000",
"8": "1|CTRL|7",
"9": "1|SEX|A",
"A": "6|EDATE|000000",
"B": "1|CTRL|A",
"C": "14|FACULT|**************"
},
"Visa de type A"
]
## XXXXXXXXXXX
# VA = [
# ["11222333333333333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCCCCCCCCCDE"],
# {
# "1": ["2", "CODE", "V."],
# "2": ["3", "PAYS", "[A-Z]+"],
# "3": ["39", "NOM", "[A-Z]+"],
# "4": ["9", "NO", ".+"],
# "5": ["1", "CTRL", "[0-9]","4"],
# "6": ["3", "NAT", "[A-Z]+"],
# "7": ["6", "BDATE", "[0-9]+"],
# "8": ["1", "CTRL", "[0-9]", "7"],
# "9": ["1", "SEX", "[A-Z]"],
# "A": ["6", "EDATE", "[0-9]+"],
# "B": ["1", "CTRL", "[0-9]", "A"],
# "C": ["14", "FACULT", ".+"]
# },
# "Visa de type A"
# ]
VB = [
"112223333333333333333333333333333333|444444444566677777789AAAAAABCCCCCC",
["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCC"],
{
"1": "2|CODE|V*",
"2": "3|PAYS|AAA",
"3": "31|NOM|&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&",
"4": "9|NO|*********",
"5": "1|CTRL|4",
"6": "3|NAT|AAA",
"7": "6|BDATE|000000",
"8": "1|CTRL|7",
"9": "1|SEX|A",
"A": "6|EDATE|000000",
"B": "1|CTRL|A",
"C": "8|FACULT|********"
"1": ["2", "CODE", "V."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["31", "NOM", "([A-Z]|<)+"],
"4": ["9", "NO", ".+"],
"5": ["1", "CTRL", "[0-9]","4"],
"6": ["3", "NAT", "[A-Z]+"],
"7": ["6", "BDATE", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "7"],
"9": ["1", "SEX", "[A-Z]"],
"A": ["6", "EDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["8", "FACULT", ".+"]
},
"Visa de type B"
]
I__ = [
"112223333333333333333333333333333333|444444444566677777789AAAAAABCCCCCCCD",
["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCD"],
{
"1": "2|CODE|I*",
"2": "3|PAYS|AAA",
"3": "31|NOM|&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&",
"4": "9|NO|*********",
"5": "1|CTRL|4",
"6": "3|NAT|AAA",
"7": "6|BDATE|000000",
"8": "1|CTRL|7",
"9": "1|SEX|A",
"A": "6|EDATE|000000",
"B": "1|CTRL|A",
"C": "7|FACULT|*******",
"D": "1|CTRL|4578ABC"
"1": ["2", "CODE", "I."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["31", "NOM", "([A-Z]|<)+"],
"4": ["9", "NO", ".+"],
"5": ["1", "CTRL", "[0-9]", "4"],
"6": ["3", "NAT", "[A-Z]+"],
"7": ["6", "BDATE", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "7"],
"9": ["1", "SEX", "[A-Z]"],
"A": ["6", "EDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["7", "FACULT", ".+"],
"D": ["1", "CTRL", "[0-9]", "4578ABC"]
},
"Pièce d'identité/de voyage"
]
ID = [
"112223333333333333333333333333444444|555566677777899999999999999AAAAAABCD",
IDFR = [
["112223333333333333333333333333444444", "555566677777899999999999999AAAAAABCD"],
{
"1": "2|CODE|ID",
"2": "3|PAYS|AAA",
"3": "25|NOM|&&&&&&&&&&&&&&&&&&&&&&&&&",
"4": "6|NOINT|000***",
"5": "4|DDATE|0000",
"6": "3|NOINT2|000",
"7": "5|NOINT3|00000",
"8": "1|CTRL|567",
"9": "14|PRENOM|A",
"A": "6|BDATE|000000",
"B": "1|CTRL|A",
"C": "1|SEX|A",
"D": "1|CTRL|123456789ABC"
"1": ["2", "CODE", "ID"],
"2": ["3", "PAYS", "FRA"],
"3": ["25", "NOM", "([A-Z]|<)+"],
"4": ["6", "NOINT", ".+"],
"5": ["4", "DDATE", "[0-9]+"],
"6": ["3", "NOINT2", "[0-9]+"],
"7": ["5", "NOINT3", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "567"],
"9": ["14", "PRENOM", "[A-Z]"],
"A": ["6", "BDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["1", "SEX", "[A-Z]"],
"D": ["1", "CTRL", "[0-9]", "123456789ABC"]
},
"Pièce d'identité FR"
]
DL = [
"112223333333334555555666666667|",
["112223333333334555555666666667", ""],
{
"1": "2|CODE|D1",
"2": "3|PAYS|AAA",
"3": "9|NO|00AA00000",
"4": "1|CTRL|123",
"5": "6|EDATE|000000",
"6": "8|NOM|&&&&&&&&",
"7": "1|CTRL|123456"
"1": ["2", "CODE", "D1"],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["9", "NO", "[0-9]{2}[A-Z]{2}[0-9]{5}"],
"4": ["1", "CTRL", "[0-9]", "123"],
"5": ["6", "EDATE", "[0-9]+"],
"6": ["8", "NOM", "([A-Z]|<)+"],
"7": ["1", "CTRL", "[0-9]", "123456"]
},
"Permis de conduire"
]
TYPES = [ID, I__, VB, VA, AC, I_, IP, P, DL]
#TYPES = [ID, I__, VB, VA, AC, I_, IP, P, DL]
TYPES = [IDFR, I__, VB, AC, I_, IP, P, DL]
## THE ROOT OF THIS PROJECT !
def limits(line, fieldtype):
a = line.find(fieldtype)
b = line.rfind(fieldtype)
return (a,b+1)
def docMatch(doc, strs):
# Global handler
logfile = logger.logCur
level = 0
nchar = 0
bonus = 0
for i in range(0,2):
cursor = 0
#print("Line : {}".format(i))
while True:
if cursor > len(doc[0][i]) - 1:
break
# Getting the type of field on the cursor position
fieldtype = doc[0][i][cursor]
lim = limits(doc[0][i], fieldtype)
# ready for next field
cursor = lim[1]
# get the current field and isolates it
field = doc[0][i][ lim[0]:lim[1] ]
fstr = strs[i][ lim[0]:lim[1] ]
# Prepare regex compilation
regex = re.compile(doc[1][fieldtype][2])
# Test the match
matching = regex.match(fstr)
# Retrieve the mathing level
if matching:
level += matching.end()
if fieldtype == "1":
bonus += 100
nchar += int(doc[1][fieldtype][0])
# Print for debug
#print("Field : {}, type = {}, on str : {}".format(field, fieldtype, fstr))
#logfile.printdbg(" REGEX : {}, match : {}".format(regex, matching))
# exit the loop
logfile.printdbg("{} level : {}/{} (+{})".format(doc[2], level, nchar, bonus))
return (level, nchar, bonus)
def allDocMatches(strs, final=False):
# Global handler
logfile = logger.logCur
SCORES = []
for doc in TYPES:
# Get the score of the document on the strings
level, nchar, bonus = docMatch(doc, strs)
# Number of characters compatibles + bonus with the doc indication
SCORES += [ level + bonus ]
# if the len of strings is the same than document, add a bonus
# but only if we are in a final situation
if final:
if len(strs[0] + strs[1]) == nchar:
SCORES[-1] += 100
candidate = SCORES.index(max(SCORES))
candidates = []
canditxt = []
# Search the candidates
for i in range(len(SCORES)):
if SCORES[i] == SCORES[candidate]:
candidates += [TYPES[i]]
canditxt += [TYPES[i][2]]
# Return the candidates
logfile.printdbg("Scores : {}".format(SCORES))
logfile.printdbg("Candidates : {}".format(canditxt))
return candidates
def MRZ(code):
"""
This function computes a control sum for a range of characters
This function computes a control sum for the given characters
"""
resultat = 0
i = -1
facteur = [7, 3, 1]
for car in code:
if car == '<' or car == '|':
if car == '<' or car == '\n':
valeur = 0
i += 1
else: