1
0
mirror of https://gitlab.os-k.eu/neox/CNIRevelator.git synced 2023-08-25 14:03:10 +02:00

Working on MRZ analyzer

This commit is contained in:
Adrien Bourmault 2019-07-17 17:12:21 +02:00 committed by GitHub
parent 49777a9caa
commit 5b7e4f65e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 233 additions and 148 deletions

View File

@ -31,6 +31,7 @@ from tkinter import filedialog
from tkinter import ttk from tkinter import ttk
import threading import threading
from datetime import datetime from datetime import datetime
import re
import logger # logger.py import logger # logger.py
import mrz # mrz.py import mrz # mrz.py
@ -141,11 +142,8 @@ class mainWindow(Tk):
self.termframe.grid(column=0, row=0, sticky='EW') self.termframe.grid(column=0, row=0, sticky='EW')
self.termframe.grid_columnconfigure(0, weight=1) self.termframe.grid_columnconfigure(0, weight=1)
self.termframe.grid_rowconfigure(0, weight=1) self.termframe.grid_rowconfigure(0, weight=1)
self.termtext = Text((self.termframe), state='disabled', width=60, height=4, wrap='none', font='Terminal 17', fg='#121f38') self.termtext = Text((self.termframe), state='normal', width=60, height=4, wrap='none', font='Terminal 17', fg='#121f38')
self.termtext.grid(column=0, row=0, sticky='NEW', padx=5) self.termtext.grid(column=0, row=0, sticky='NEW', padx=5)
vcmd = (self.register(self.entryValidation), '%S', '%P', '%d')
self.termentry = Entry((self.termframe), font='Terminal 17', validate='all', validatecommand=vcmd, fg='#121f38', width=44)
self.termentry.grid(column=0, row=0, sticky='SEW', padx=5)
# The monitor that indicates some useful infos # The monitor that indicates some useful infos
self.monitor = ttk.Labelframe(self, text='Moniteur') self.monitor = ttk.Labelframe(self, text='Moniteur')
@ -198,26 +196,29 @@ class mainWindow(Tk):
self.geometry('%dx%d+%d+%d' % (w, h, x, y)) self.geometry('%dx%d+%d+%d' % (w, h, x, y))
# Some bindings # Some bindings
self.termentry.bind('<Return>', self.preentryValidation) self.termtext.bind('<Key>', self.entryValidation)
self.termtext.bind('<Return>', self.preentryValidation)
self.termentry.bind('<Escape>', self.onTabPressed)
self.update() self.update()
logfile.printdbg('mainWindow() : Initialization successful') logfile.printdbg('Initialization successful')
def preentryValidation(self, event):
"""
Function that detects the kind of identity document this is
"""
def onTabPressed(self, event): def onTabPressed(self, event):
return 'break' return 'break'
def entryValidation(self, char, entry_value, typemod): def entryValidation(self, event):
"""
On the fly validation with regex
"""
currentText = self.termtext.get("1.0", "end")
currentText = (currentText.upper()[:-1]).replace(" ", "<")
# XXX : we must recreate a proper on the fly validation with regex ! regex = re.compile("([A-Z]|[0-9]|<)*")
while not regex.fullmatch(currentText):
currentText = currentText[:-1]
return isValid self.termtext.delete("1.0", "end")
self.termtext.insert("1.0", currentText)
print(currentText)
def logOnTerm(self, text): def logOnTerm(self, text):
self.monlog['state'] = 'normal' self.monlog['state'] = 'normal'
@ -226,7 +227,7 @@ class mainWindow(Tk):
self.monlog.yview(END) self.monlog.yview(END)
def openingScan(self): def openingScan(self):
pass
# OPEN A SCAN # OPEN A SCAN
def newEntry(self): def newEntry(self):
@ -261,12 +262,11 @@ class mainWindow(Tk):
parent=self) parent=self)
def calculSigma(self, MRZtxt, numtype): def calculSigma(self, MRZtxt, numtype):
pass
# CALCUL DE TOUTES LES SOMMES DE LA CARTE CONFORMEMENT A SON TYPE # CALCUL DE TOUTES LES SOMMES DE LA CARTE CONFORMEMENT A SON TYPE
class OpenScan(ttk.Frame): class OpenScan(ttk.Frame):
def __init__(self, mainframe, fileorig, type, nframe=1, pagenum=0, file=None): def __init__(self, mainframe, fileorig, type, nframe=1, pagenum=0, file=None):
""" Initialize the main Frame """ """ Initialize the main Frame """
if file == None: if file == None:

View File

@ -24,6 +24,9 @@
******************************************************************************** ********************************************************************************
""" """
import re
import logger # logger.py
## SEX CODES ## SEX CODES
sexcode = {'M':'Homme', 'F':'Femme', 'X':'Non spécifié'} sexcode = {'M':'Homme', 'F':'Femme', 'X':'Non spécifié'}
@ -552,192 +555,274 @@ landcode3 = {
} }
## DOCUMENTS TYPES ## DOCUMENTS TYPES
P = [ P = [
"11222333333333333333333333333333333333333333|444444444566677777789AAAAAABCCCCCCCCCCCCCCDE", ["11222333333333333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCCCCCCCCDE"],
{ {
"1": "2|CODE|P*", "1": ["2", "CODE", "P."],
"2": "3|PAYS|AAA", "2": ["3", "PAYS", "[A-Z]+"],
"3": "39|NOM|&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&", "3": ["39", "NOM", "([A-Z]|<)+"],
"4": "9|NO|*********", "4": ["9", "NO", ".+"],
"5": "1|CTRL|4", "5": ["1", "CTRL", "[0-9]", "4"],
"6": "3|NAT|AAA", "6": ["3", "NAT", "[A-Z]+"],
"7": "6|BDATE|000000", "7": ["6", "BDATE", "[0-9]+"],
"8": "1|CTRL|7", "8": ["1", "CTRL", "[0-9]", "7"],
"9": "1|SEX|A", "9": ["1", "SEX", "[A-Z]"],
"A": "6|EDATE|000000", "A": ["6", "EDATE", "[0-9]+"],
"B": "1|CTRL|A", "B": ["1", "CTRL", "[0-9]", "A"],
"C": "14|FACULT|**************", "C": ["14", "FACULT", ".+"],
"D": "1|CTRLF|C", "D": ["1", "CTRLF", "[0-9]", "C"],
"E": "1|CTRL|4578ABCD" "E": ["1", "CTRL", "[0-9]", "4578ABCD"]
}, },
"Passeport" "Passeport"
] ]
IP = [ IP = [
"112223333333334555555555555555|66666678999999ABBBCCCCCCCCCCCD", ["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
{ {
"1": "2|CODE|IP", "1": ["2", "CODE", "IP"],
"2": "3|PAYS|AAA", "2": ["3", "PAYS", "[A-Z]+"],
"3": "9|NO|*********", "3": ["9", "NO", ".+"],
"4": "1|CTRL|3", "4": ["1", "CTRL", "[0-9]", "3"],
"5": "15|FACULT|***************", "5": ["15", "FACULT", ".+"],
"6": "6|BDATE|000000", "6": ["6", "BDATE", "[0-9]+"],
"7": "1|CTRL|6", "7": ["1", "CTRL", "[0-9]", "6"],
"8": "1|SEX|A", "8": ["1", "SEX", "[A-Z]"],
"9": "6|EDATE|000000", "9": ["6", "EDATE", "[0-9]+"],
"A": "1|CTRL|9", "A": ["1", "CTRL", "[0-9]", "9"],
"B": "3|NAT|AAA", "B": ["3", "NAT", "[A-Z]+"],
"C": "11|FACULT|***********", "C": ["11", "FACULT", ".+"],
"D": "1|CTRL|345679AC" "D": ["1", "CTRL", "[0-9]", "345679AC"]
}, },
"Carte-passeport" "Carte-passeport"
] ]
I_ = [ I_ = [
"112223333333334555555555555555|66666678999999ABBBCCCCCCCCCCCD", ["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
{ {
"1": "2|CODE|I*", "1": ["2", "CODE", "I."],
"2": "3|PAYS|AAA", "2": ["3", "PAYS", "[A-Z]+"],
"3": "9|NO|*********", "3": ["9", "NO", ".+"],
"4": "1|CTRL|3", "4": ["1", "CTRL", "[0-9]", "3"],
"5": "15|FACULT|***************", "5": ["15", "FACULT", ".+"],
"6": "6|BDATE|000000", "6": ["6", "BDATE", "[0-9]+"],
"7": "1|CTRL|6", "7": ["1", "CTRL", "[0-9]", "6"],
"8": "1|SEX|A", "8": ["1", "SEX", "[A-Z]"],
"9": "6|EDATE|000000", "9": ["6", "EDATE", "[0-9]+"],
"A": "1|CTRL|9", "A": ["1", "CTRL", "[0-9]", "9"],
"B": "3|NAT|AAA", "B": ["3", "NAT", "[A-Z]+"],
"C": "11|FACULT|***********", "C": ["11", "FACULT", ".+"],
"D": "1|CTRL|345679AC" "D": ["1", "CTRL", "[0-9]", "345679AC"]
}, },
"Titre d'identité/de voyage" "Titre d'identité/de voyage"
] ]
AC = [ AC = [
"112223333333334EEE555555555555|66666678999999ABBBCCCCCCCCCCCD", ["112223333333334EEE555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
{ {
"1": "2|CODE|AC", "1": ["2", "CODE", "AC"],
"2": "3|PAYS|AAA", "2": ["3", "PAYS", "[A-Z]+"],
"3": "9|NO|*********", "3": ["9", "NO", ".+"],
"4": "1|CTRL|3", "4": ["1", "CTRL", "[0-9]", "3"],
"5": "15|FACULT|***************", "E": ["3", "INDIC", "[A-Z]{1,2}."],
"6": "6|BDATE|000000", "5": ["12", "FACULT", ".+"],
"7": "1|CTRL|6", "6": ["6", "BDATE", "[0-9]+ "],
"8": "1|SEX|A", "7": ["1", "CTRL", "[0-9]", "6"],
"9": "6|EDATE|000000", "8": ["1", "SEX", "[A-Z]"],
"A": "1|CTRL|9", "9": ["6", "EDATE", "[0-9]+"],
"B": "3|NAT|AAA", "A": ["1", "CTRL", "[0-9]", "9"],
"C": "11|FACULT|***********", "B": ["3", "NAT", "[A-Z]+"],
"D": "1|CTRL|345679AC", "C": ["11", "FACULT", ".+"],
"E": "3|INDIC|AA&" "D": ["1", "CTRL", "[0-9]","345679AC"]
}, },
"Certificat de membre d'équipage" "Certificat de membre d'équipage"
] ]
VA = [ ## XXXXXXXXXXX
"11222333333333333333333333333333333333333333|444444444566677777789AAAAAABCCCCCCCCCCCCCCDE", # VA = [
{ # ["11222333333333333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCCCCCCCCCDE"],
"1": "2|CODE|V*", # {
"2": "3|PAYS|AAA", # "1": ["2", "CODE", "V."],
"3": "39|NOM|&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&", # "2": ["3", "PAYS", "[A-Z]+"],
"4": "9|NO|*********", # "3": ["39", "NOM", "[A-Z]+"],
"5": "1|CTRL|4", # "4": ["9", "NO", ".+"],
"6": "3|NAT|AAA", # "5": ["1", "CTRL", "[0-9]","4"],
"7": "6|BDATE|000000", # "6": ["3", "NAT", "[A-Z]+"],
"8": "1|CTRL|7", # "7": ["6", "BDATE", "[0-9]+"],
"9": "1|SEX|A", # "8": ["1", "CTRL", "[0-9]", "7"],
"A": "6|EDATE|000000", # "9": ["1", "SEX", "[A-Z]"],
"B": "1|CTRL|A", # "A": ["6", "EDATE", "[0-9]+"],
"C": "14|FACULT|**************" # "B": ["1", "CTRL", "[0-9]", "A"],
}, # "C": ["14", "FACULT", ".+"]
"Visa de type A" # },
] # "Visa de type A"
# ]
VB = [ VB = [
"112223333333333333333333333333333333|444444444566677777789AAAAAABCCCCCC", ["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCC"],
{ {
"1": "2|CODE|V*", "1": ["2", "CODE", "V."],
"2": "3|PAYS|AAA", "2": ["3", "PAYS", "[A-Z]+"],
"3": "31|NOM|&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&", "3": ["31", "NOM", "([A-Z]|<)+"],
"4": "9|NO|*********", "4": ["9", "NO", ".+"],
"5": "1|CTRL|4", "5": ["1", "CTRL", "[0-9]","4"],
"6": "3|NAT|AAA", "6": ["3", "NAT", "[A-Z]+"],
"7": "6|BDATE|000000", "7": ["6", "BDATE", "[0-9]+"],
"8": "1|CTRL|7", "8": ["1", "CTRL", "[0-9]", "7"],
"9": "1|SEX|A", "9": ["1", "SEX", "[A-Z]"],
"A": "6|EDATE|000000", "A": ["6", "EDATE", "[0-9]+"],
"B": "1|CTRL|A", "B": ["1", "CTRL", "[0-9]", "A"],
"C": "8|FACULT|********" "C": ["8", "FACULT", ".+"]
}, },
"Visa de type B" "Visa de type B"
] ]
I__ = [ I__ = [
"112223333333333333333333333333333333|444444444566677777789AAAAAABCCCCCCCD", ["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCD"],
{ {
"1": "2|CODE|I*", "1": ["2", "CODE", "I."],
"2": "3|PAYS|AAA", "2": ["3", "PAYS", "[A-Z]+"],
"3": "31|NOM|&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&", "3": ["31", "NOM", "([A-Z]|<)+"],
"4": "9|NO|*********", "4": ["9", "NO", ".+"],
"5": "1|CTRL|4", "5": ["1", "CTRL", "[0-9]", "4"],
"6": "3|NAT|AAA", "6": ["3", "NAT", "[A-Z]+"],
"7": "6|BDATE|000000", "7": ["6", "BDATE", "[0-9]+"],
"8": "1|CTRL|7", "8": ["1", "CTRL", "[0-9]", "7"],
"9": "1|SEX|A", "9": ["1", "SEX", "[A-Z]"],
"A": "6|EDATE|000000", "A": ["6", "EDATE", "[0-9]+"],
"B": "1|CTRL|A", "B": ["1", "CTRL", "[0-9]", "A"],
"C": "7|FACULT|*******", "C": ["7", "FACULT", ".+"],
"D": "1|CTRL|4578ABC" "D": ["1", "CTRL", "[0-9]", "4578ABC"]
}, },
"Pièce d'identité/de voyage" "Pièce d'identité/de voyage"
] ]
ID = [ IDFR = [
"112223333333333333333333333333444444|555566677777899999999999999AAAAAABCD", ["112223333333333333333333333333444444", "555566677777899999999999999AAAAAABCD"],
{ {
"1": "2|CODE|ID", "1": ["2", "CODE", "ID"],
"2": "3|PAYS|AAA", "2": ["3", "PAYS", "FRA"],
"3": "25|NOM|&&&&&&&&&&&&&&&&&&&&&&&&&", "3": ["25", "NOM", "([A-Z]|<)+"],
"4": "6|NOINT|000***", "4": ["6", "NOINT", ".+"],
"5": "4|DDATE|0000", "5": ["4", "DDATE", "[0-9]+"],
"6": "3|NOINT2|000", "6": ["3", "NOINT2", "[0-9]+"],
"7": "5|NOINT3|00000", "7": ["5", "NOINT3", "[0-9]+"],
"8": "1|CTRL|567", "8": ["1", "CTRL", "[0-9]", "567"],
"9": "14|PRENOM|A", "9": ["14", "PRENOM", "[A-Z]"],
"A": "6|BDATE|000000", "A": ["6", "BDATE", "[0-9]+"],
"B": "1|CTRL|A", "B": ["1", "CTRL", "[0-9]", "A"],
"C": "1|SEX|A", "C": ["1", "SEX", "[A-Z]"],
"D": "1|CTRL|123456789ABC" "D": ["1", "CTRL", "[0-9]", "123456789ABC"]
}, },
"Pièce d'identité FR" "Pièce d'identité FR"
] ]
DL = [ DL = [
"112223333333334555555666666667|", ["112223333333334555555666666667", ""],
{ {
"1": "2|CODE|D1", "1": ["2", "CODE", "D1"],
"2": "3|PAYS|AAA", "2": ["3", "PAYS", "[A-Z]+"],
"3": "9|NO|00AA00000", "3": ["9", "NO", "[0-9]{2}[A-Z]{2}[0-9]{5}"],
"4": "1|CTRL|123", "4": ["1", "CTRL", "[0-9]", "123"],
"5": "6|EDATE|000000", "5": ["6", "EDATE", "[0-9]+"],
"6": "8|NOM|&&&&&&&&", "6": ["8", "NOM", "([A-Z]|<)+"],
"7": "1|CTRL|123456" "7": ["1", "CTRL", "[0-9]", "123456"]
}, },
"Permis de conduire" "Permis de conduire"
] ]
TYPES = [ID, I__, VB, VA, AC, I_, IP, P, DL] #TYPES = [ID, I__, VB, VA, AC, I_, IP, P, DL]
TYPES = [IDFR, I__, VB, AC, I_, IP, P, DL]
## THE ROOT OF THIS PROJECT ! ## THE ROOT OF THIS PROJECT !
def limits(line, fieldtype):
a = line.find(fieldtype)
b = line.rfind(fieldtype)
return (a,b+1)
def docMatch(doc, strs):
# Global handler
logfile = logger.logCur
level = 0
nchar = 0
bonus = 0
for i in range(0,2):
cursor = 0
#print("Line : {}".format(i))
while True:
if cursor > len(doc[0][i]) - 1:
break
# Getting the type of field on the cursor position
fieldtype = doc[0][i][cursor]
lim = limits(doc[0][i], fieldtype)
# ready for next field
cursor = lim[1]
# get the current field and isolates it
field = doc[0][i][ lim[0]:lim[1] ]
fstr = strs[i][ lim[0]:lim[1] ]
# Prepare regex compilation
regex = re.compile(doc[1][fieldtype][2])
# Test the match
matching = regex.match(fstr)
# Retrieve the mathing level
if matching:
level += matching.end()
if fieldtype == "1":
bonus += 100
nchar += int(doc[1][fieldtype][0])
# Print for debug
#print("Field : {}, type = {}, on str : {}".format(field, fieldtype, fstr))
#logfile.printdbg(" REGEX : {}, match : {}".format(regex, matching))
# exit the loop
logfile.printdbg("{} level : {}/{} (+{})".format(doc[2], level, nchar, bonus))
return (level, nchar, bonus)
def allDocMatches(strs, final=False):
# Global handler
logfile = logger.logCur
SCORES = []
for doc in TYPES:
# Get the score of the document on the strings
level, nchar, bonus = docMatch(doc, strs)
# Number of characters compatibles + bonus with the doc indication
SCORES += [ level + bonus ]
# if the len of strings is the same than document, add a bonus
# but only if we are in a final situation
if final:
if len(strs[0] + strs[1]) == nchar:
SCORES[-1] += 100
candidate = SCORES.index(max(SCORES))
candidates = []
canditxt = []
# Search the candidates
for i in range(len(SCORES)):
if SCORES[i] == SCORES[candidate]:
candidates += [TYPES[i]]
canditxt += [TYPES[i][2]]
# Return the candidates
logfile.printdbg("Scores : {}".format(SCORES))
logfile.printdbg("Candidates : {}".format(canditxt))
return candidates
def MRZ(code): def MRZ(code):
""" """
This function computes a control sum for a range of characters This function computes a control sum for the given characters
""" """
resultat = 0 resultat = 0
i = -1 i = -1
facteur = [7, 3, 1] facteur = [7, 3, 1]
for car in code: for car in code:
if car == '<' or car == '|': if car == '<' or car == '\n':
valeur = 0 valeur = 0
i += 1 i += 1
else: else: