2019-08-21 14:54:05 +02:00
|
|
|
|
# -*- coding: utf8 -*-
|
2019-07-09 23:01:09 +02:00
|
|
|
|
"""
|
|
|
|
|
********************************************************************************
|
|
|
|
|
* CNIRevelator *
|
|
|
|
|
* *
|
|
|
|
|
* Desc: MRZ data dictionnary for CNIRevelator analyzer and *
|
|
|
|
|
* functions to analyze these data *
|
|
|
|
|
* *
|
|
|
|
|
* Copyright © 2018-2019 Adrien Bourmault (neox95) *
|
|
|
|
|
* *
|
|
|
|
|
* This file is part of CNIRevelator. *
|
|
|
|
|
* *
|
|
|
|
|
* CNIRevelator is free software: you can redistribute it and/or modify *
|
|
|
|
|
* it under the terms of the GNU General Public License as published by *
|
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or *
|
|
|
|
|
* any later version. *
|
|
|
|
|
* *
|
|
|
|
|
* CNIRevelator is distributed in the hope that it will be useful, *
|
|
|
|
|
* but WITHOUT ANY WARRANTY*without even the implied warranty of *
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
|
|
|
|
* GNU General Public License for more details. *
|
|
|
|
|
* *
|
|
|
|
|
* You should have received a copy of the GNU General Public License *
|
|
|
|
|
* along with CNIRevelator. If not, see <https:*www.gnu.org/licenses/>. *
|
|
|
|
|
********************************************************************************
|
|
|
|
|
"""
|
|
|
|
|
|
2019-08-01 16:52:36 +02:00
|
|
|
|
import re
|
|
|
|
|
import datetime
|
2019-07-17 17:12:21 +02:00
|
|
|
|
|
2019-08-12 17:07:37 +02:00
|
|
|
|
import logger # logger.py
|
2019-08-19 17:56:23 +02:00
|
|
|
|
import globs # globs.py
|
2019-08-12 17:07:37 +02:00
|
|
|
|
import lang # lang.py
|
2019-08-20 10:14:54 +02:00
|
|
|
|
import critical # critical.py
|
2019-08-12 17:07:37 +02:00
|
|
|
|
|
2019-07-09 23:01:09 +02:00
|
|
|
|
## SEX CODES
|
|
|
|
|
sexcode = {'M':'Homme', 'F':'Femme', 'X':'Non spécifié'}
|
|
|
|
|
|
|
|
|
|
## COUNTRY CODES
|
2019-07-12 10:57:03 +02:00
|
|
|
|
|
2019-08-19 17:56:23 +02:00
|
|
|
|
landcode2 = lang.all[globs.CNIRlang]["LANDCODE2"]
|
|
|
|
|
|
|
|
|
|
landcode3 = lang.all[globs.CNIRlang]["LANDCODE3"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
|
|
|
|
|
## DOCUMENTS TYPES
|
2019-07-17 17:12:21 +02:00
|
|
|
|
|
2019-07-09 23:01:09 +02:00
|
|
|
|
P = [
|
2019-07-17 17:12:21 +02:00
|
|
|
|
["11222333333333333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCCCCCCCCDE"],
|
2019-07-09 23:01:09 +02:00
|
|
|
|
{
|
2019-07-17 17:12:21 +02:00
|
|
|
|
"1": ["2", "CODE", "P."],
|
|
|
|
|
"2": ["3", "PAYS", "[A-Z]+"],
|
|
|
|
|
"3": ["39", "NOM", "([A-Z]|<)+"],
|
|
|
|
|
"4": ["9", "NO", ".+"],
|
|
|
|
|
"5": ["1", "CTRL", "[0-9]", "4"],
|
|
|
|
|
"6": ["3", "NAT", "[A-Z]+"],
|
|
|
|
|
"7": ["6", "BDATE", "[0-9]+"],
|
|
|
|
|
"8": ["1", "CTRL", "[0-9]", "7"],
|
|
|
|
|
"9": ["1", "SEX", "[A-Z]"],
|
|
|
|
|
"A": ["6", "EDATE", "[0-9]+"],
|
|
|
|
|
"B": ["1", "CTRL", "[0-9]", "A"],
|
|
|
|
|
"C": ["14", "FACULT", ".+"],
|
|
|
|
|
"D": ["1", "CTRLF", "[0-9]", "C"],
|
|
|
|
|
"E": ["1", "CTRL", "[0-9]", "4578ABCD"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
},
|
2019-08-21 10:36:57 +02:00
|
|
|
|
lang.all[globs.CNIRlang]["Passeport lisible à la machine"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
IP = [
|
2019-07-17 17:12:21 +02:00
|
|
|
|
["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
|
2019-07-09 23:01:09 +02:00
|
|
|
|
{
|
2019-07-17 17:12:21 +02:00
|
|
|
|
"1": ["2", "CODE", "IP"],
|
|
|
|
|
"2": ["3", "PAYS", "[A-Z]+"],
|
|
|
|
|
"3": ["9", "NO", ".+"],
|
|
|
|
|
"4": ["1", "CTRL", "[0-9]", "3"],
|
|
|
|
|
"5": ["15", "FACULT", ".+"],
|
|
|
|
|
"6": ["6", "BDATE", "[0-9]+"],
|
|
|
|
|
"7": ["1", "CTRL", "[0-9]", "6"],
|
|
|
|
|
"8": ["1", "SEX", "[A-Z]"],
|
|
|
|
|
"9": ["6", "EDATE", "[0-9]+"],
|
|
|
|
|
"A": ["1", "CTRL", "[0-9]", "9"],
|
|
|
|
|
"B": ["3", "NAT", "[A-Z]+"],
|
|
|
|
|
"C": ["11", "FACULT", ".+"],
|
|
|
|
|
"D": ["1", "CTRL", "[0-9]", "345679AC"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
},
|
2019-08-19 17:56:23 +02:00
|
|
|
|
lang.all[globs.CNIRlang]["Carte-passeport"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
]
|
|
|
|
|
|
2019-08-21 10:36:57 +02:00
|
|
|
|
IDEUR = [
|
2019-07-17 17:12:21 +02:00
|
|
|
|
["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
|
2019-07-09 23:01:09 +02:00
|
|
|
|
{
|
2019-07-17 17:12:21 +02:00
|
|
|
|
"1": ["2", "CODE", "I."],
|
|
|
|
|
"2": ["3", "PAYS", "[A-Z]+"],
|
|
|
|
|
"3": ["9", "NO", ".+"],
|
|
|
|
|
"4": ["1", "CTRL", "[0-9]", "3"],
|
|
|
|
|
"5": ["15", "FACULT", ".+"],
|
|
|
|
|
"6": ["6", "BDATE", "[0-9]+"],
|
|
|
|
|
"7": ["1", "CTRL", "[0-9]", "6"],
|
|
|
|
|
"8": ["1", "SEX", "[A-Z]"],
|
|
|
|
|
"9": ["6", "EDATE", "[0-9]+"],
|
|
|
|
|
"A": ["1", "CTRL", "[0-9]", "9"],
|
|
|
|
|
"B": ["3", "NAT", "[A-Z]+"],
|
|
|
|
|
"C": ["11", "FACULT", ".+"],
|
|
|
|
|
"D": ["1", "CTRL", "[0-9]", "345679AC"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
},
|
2019-08-21 10:36:57 +02:00
|
|
|
|
lang.all[globs.CNIRlang]["Carte d’identité européenne"]
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
TSEUR = [
|
|
|
|
|
["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
|
|
|
|
|
{
|
|
|
|
|
"1": ["2", "CODE", "IR"],
|
|
|
|
|
"2": ["3", "PAYS", "[A-Z]+"],
|
|
|
|
|
"3": ["9", "NO", ".+"],
|
|
|
|
|
"4": ["1", "CTRL", "[0-9]", "3"],
|
|
|
|
|
"5": ["15", "FACULT", ".+"],
|
|
|
|
|
"6": ["6", "BDATE", "[0-9]+"],
|
|
|
|
|
"7": ["1", "CTRL", "[0-9]", "6"],
|
|
|
|
|
"8": ["1", "SEX", "[A-Z]"],
|
|
|
|
|
"9": ["6", "EDATE", "[0-9]+"],
|
|
|
|
|
"A": ["1", "CTRL", "[0-9]", "9"],
|
|
|
|
|
"B": ["3", "NAT", "[A-Z]+"],
|
|
|
|
|
"C": ["11", "FACULT", ".+"],
|
|
|
|
|
"D": ["1", "CTRL", "[0-9]", "345679AC"]
|
|
|
|
|
},
|
2019-08-21 14:54:05 +02:00
|
|
|
|
lang.all[globs.CNIRlang]["Carte de séjour européenne"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
AC = [
|
2019-07-17 17:12:21 +02:00
|
|
|
|
["112223333333334EEE555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
|
2019-07-09 23:01:09 +02:00
|
|
|
|
{
|
2019-07-17 17:12:21 +02:00
|
|
|
|
"1": ["2", "CODE", "AC"],
|
|
|
|
|
"2": ["3", "PAYS", "[A-Z]+"],
|
|
|
|
|
"3": ["9", "NO", ".+"],
|
|
|
|
|
"4": ["1", "CTRL", "[0-9]", "3"],
|
|
|
|
|
"E": ["3", "INDIC", "[A-Z]{1,2}."],
|
|
|
|
|
"5": ["12", "FACULT", ".+"],
|
|
|
|
|
"6": ["6", "BDATE", "[0-9]+ "],
|
|
|
|
|
"7": ["1", "CTRL", "[0-9]", "6"],
|
|
|
|
|
"8": ["1", "SEX", "[A-Z]"],
|
|
|
|
|
"9": ["6", "EDATE", "[0-9]+"],
|
|
|
|
|
"A": ["1", "CTRL", "[0-9]", "9"],
|
|
|
|
|
"B": ["3", "NAT", "[A-Z]+"],
|
|
|
|
|
"C": ["11", "FACULT", ".+"],
|
|
|
|
|
"D": ["1", "CTRL", "[0-9]","345679AC"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
},
|
2019-08-19 17:56:23 +02:00
|
|
|
|
lang.all[globs.CNIRlang]["Certificat de membre d'équipage"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
]
|
|
|
|
|
|
2019-07-19 17:17:30 +02:00
|
|
|
|
VA = [
|
2019-08-30 15:09:33 +02:00
|
|
|
|
["11222333333333333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCCCCCCCCCC"],
|
2019-08-11 17:08:49 +02:00
|
|
|
|
{
|
|
|
|
|
"1": ["2", "CODE", "V."],
|
|
|
|
|
"2": ["3", "PAYS", "[A-Z]+"],
|
2019-08-23 16:46:25 +02:00
|
|
|
|
"3": ["39", "NOM", "([A-Z]|<)+"],
|
2019-08-11 17:08:49 +02:00
|
|
|
|
"4": ["9", "NO", ".+"],
|
|
|
|
|
"5": ["1", "CTRL", "[0-9]","4"],
|
|
|
|
|
"6": ["3", "NAT", "[A-Z]+"],
|
|
|
|
|
"7": ["6", "BDATE", "[0-9]+"],
|
|
|
|
|
"8": ["1", "CTRL", "[0-9]", "7"],
|
|
|
|
|
"9": ["1", "SEX", "[A-Z]"],
|
|
|
|
|
"A": ["6", "EDATE", "[0-9]+"],
|
|
|
|
|
"B": ["1", "CTRL", "[0-9]", "A"],
|
|
|
|
|
"C": ["16", "FACULT", ".+"]
|
|
|
|
|
},
|
2019-08-19 17:56:23 +02:00
|
|
|
|
lang.all[globs.CNIRlang]["Visa de type A"]
|
2019-08-11 17:08:49 +02:00
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
VB = [
|
2019-08-30 15:09:33 +02:00
|
|
|
|
["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCC"],
|
2019-07-09 23:01:09 +02:00
|
|
|
|
{
|
2019-07-17 17:12:21 +02:00
|
|
|
|
"1": ["2", "CODE", "V."],
|
|
|
|
|
"2": ["3", "PAYS", "[A-Z]+"],
|
|
|
|
|
"3": ["31", "NOM", "([A-Z]|<)+"],
|
|
|
|
|
"4": ["9", "NO", ".+"],
|
|
|
|
|
"5": ["1", "CTRL", "[0-9]","4"],
|
|
|
|
|
"6": ["3", "NAT", "[A-Z]+"],
|
|
|
|
|
"7": ["6", "BDATE", "[0-9]+"],
|
|
|
|
|
"8": ["1", "CTRL", "[0-9]", "7"],
|
|
|
|
|
"9": ["1", "SEX", "[A-Z]"],
|
|
|
|
|
"A": ["6", "EDATE", "[0-9]+"],
|
|
|
|
|
"B": ["1", "CTRL", "[0-9]", "A"],
|
|
|
|
|
"C": ["8", "FACULT", ".+"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
},
|
2019-08-19 17:56:23 +02:00
|
|
|
|
lang.all[globs.CNIRlang]["Visa de type B"]
|
2019-07-19 17:17:30 +02:00
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
TSF = [
|
|
|
|
|
["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCC"],
|
|
|
|
|
{
|
|
|
|
|
"1": ["2", "CODE", "TS"],
|
2019-08-11 17:08:49 +02:00
|
|
|
|
"2": ["3", "PAYS", "FRA"],
|
2019-07-19 17:17:30 +02:00
|
|
|
|
"3": ["31", "NOM", "([A-Z]|<)+"],
|
|
|
|
|
"4": ["9", "NO", ".+"],
|
|
|
|
|
"5": ["1", "CTRL", "[0-9]","4"],
|
|
|
|
|
"6": ["3", "NAT", "[A-Z]+"],
|
|
|
|
|
"7": ["6", "BDATE", "[0-9]+"],
|
|
|
|
|
"8": ["1", "CTRL", "[0-9]", "7"],
|
|
|
|
|
"9": ["1", "SEX", "[A-Z]"],
|
|
|
|
|
"A": ["6", "EDATE", "[0-9]+"],
|
|
|
|
|
"B": ["1", "CTRL", "[0-9]", "A"],
|
|
|
|
|
"C": ["8", "FACULT", ".+"]
|
|
|
|
|
},
|
2019-08-21 10:36:57 +02:00
|
|
|
|
lang.all[globs.CNIRlang]["Carte de séjour FR"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
]
|
|
|
|
|
|
2019-08-21 10:36:57 +02:00
|
|
|
|
TDV = [
|
2019-07-17 17:12:21 +02:00
|
|
|
|
["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCD"],
|
2019-07-09 23:01:09 +02:00
|
|
|
|
{
|
2019-07-17 17:12:21 +02:00
|
|
|
|
"1": ["2", "CODE", "I."],
|
|
|
|
|
"2": ["3", "PAYS", "[A-Z]+"],
|
|
|
|
|
"3": ["31", "NOM", "([A-Z]|<)+"],
|
|
|
|
|
"4": ["9", "NO", ".+"],
|
|
|
|
|
"5": ["1", "CTRL", "[0-9]", "4"],
|
|
|
|
|
"6": ["3", "NAT", "[A-Z]+"],
|
|
|
|
|
"7": ["6", "BDATE", "[0-9]+"],
|
|
|
|
|
"8": ["1", "CTRL", "[0-9]", "7"],
|
|
|
|
|
"9": ["1", "SEX", "[A-Z]"],
|
|
|
|
|
"A": ["6", "EDATE", "[0-9]+"],
|
|
|
|
|
"B": ["1", "CTRL", "[0-9]", "A"],
|
|
|
|
|
"C": ["7", "FACULT", ".+"],
|
|
|
|
|
"D": ["1", "CTRL", "[0-9]", "4578ABC"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
},
|
2019-08-21 10:36:57 +02:00
|
|
|
|
lang.all[globs.CNIRlang]["Titre d'identité/de voyage"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
]
|
|
|
|
|
|
2019-07-17 17:12:21 +02:00
|
|
|
|
IDFR = [
|
2019-08-01 16:52:36 +02:00
|
|
|
|
["112223333333333333333333333333444444", "555566677777899999999999999AAAAAABCD"],
|
2019-07-09 23:01:09 +02:00
|
|
|
|
{
|
2019-07-17 17:12:21 +02:00
|
|
|
|
"1": ["2", "CODE", "ID"],
|
|
|
|
|
"2": ["3", "PAYS", "FRA"],
|
|
|
|
|
"3": ["25", "NOM", "([A-Z]|<)+"],
|
|
|
|
|
"4": ["6", "NOINT", ".+"],
|
|
|
|
|
"5": ["4", "DDATE", "[0-9]+"],
|
|
|
|
|
"6": ["3", "NOINT2", "[0-9]+"],
|
|
|
|
|
"7": ["5", "NOINT3", "[0-9]+"],
|
|
|
|
|
"8": ["1", "CTRL", "[0-9]", "567"],
|
|
|
|
|
"9": ["14", "PRENOM", "[A-Z]"],
|
|
|
|
|
"A": ["6", "BDATE", "[0-9]+"],
|
|
|
|
|
"B": ["1", "CTRL", "[0-9]", "A"],
|
|
|
|
|
"C": ["1", "SEX", "[A-Z]"],
|
2019-07-31 17:08:15 +02:00
|
|
|
|
"D": ["1", "CTRL", "[0-9]", "123456789ABCE"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
},
|
2019-08-19 17:56:23 +02:00
|
|
|
|
lang.all[globs.CNIRlang]["Pièce d'identité FR"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
DL = [
|
2019-07-17 17:12:21 +02:00
|
|
|
|
["112223333333334555555666666667", ""],
|
2019-07-09 23:01:09 +02:00
|
|
|
|
{
|
2019-07-17 17:12:21 +02:00
|
|
|
|
"1": ["2", "CODE", "D1"],
|
|
|
|
|
"2": ["3", "PAYS", "[A-Z]+"],
|
|
|
|
|
"3": ["9", "NO", "[0-9]{2}[A-Z]{2}[0-9]{5}"],
|
|
|
|
|
"4": ["1", "CTRL", "[0-9]", "123"],
|
|
|
|
|
"5": ["6", "EDATE", "[0-9]+"],
|
|
|
|
|
"6": ["8", "NOM", "([A-Z]|<)+"],
|
|
|
|
|
"7": ["1", "CTRL", "[0-9]", "123456"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
},
|
2019-08-19 17:56:23 +02:00
|
|
|
|
lang.all[globs.CNIRlang]["Permis de conduire"]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
]
|
|
|
|
|
|
2019-08-21 10:36:57 +02:00
|
|
|
|
TYPES = [IDFR, TDV, VB, VA, AC, IDEUR, IP, P, DL, TSF, TSEUR]
|
2019-07-09 23:01:09 +02:00
|
|
|
|
|
2019-07-18 15:37:58 +02:00
|
|
|
|
# longest document MRZ line
|
|
|
|
|
longest = max([len(x[0][0]) for x in TYPES])
|
|
|
|
|
|
2019-07-09 23:01:09 +02:00
|
|
|
|
## THE ROOT OF THIS PROJECT !
|
2019-07-17 17:12:21 +02:00
|
|
|
|
|
2019-07-31 17:08:15 +02:00
|
|
|
|
def getDocString(doc):
|
|
|
|
|
return doc[0][0] + doc[0][1]
|
|
|
|
|
|
|
|
|
|
def getFieldLimits(doc, fieldtype):
|
|
|
|
|
"""
|
|
|
|
|
This function returns the limit of a given field string id for a given document structure
|
|
|
|
|
"""
|
|
|
|
|
L1 = limits(doc[0][0], fieldtype)
|
|
|
|
|
L2 = limits(doc[0][1], fieldtype)
|
|
|
|
|
|
|
|
|
|
if -1 in L1:
|
|
|
|
|
return 1, L2
|
|
|
|
|
else:
|
|
|
|
|
return 0, L1
|
|
|
|
|
return
|
2019-07-17 17:12:21 +02:00
|
|
|
|
|
|
|
|
|
def limits(line, fieldtype):
|
2019-07-31 17:08:15 +02:00
|
|
|
|
"""
|
|
|
|
|
Returns the limit of a given field structure
|
|
|
|
|
"""
|
2019-07-17 17:12:21 +02:00
|
|
|
|
a = line.find(fieldtype)
|
|
|
|
|
b = line.rfind(fieldtype)
|
|
|
|
|
return (a,b+1)
|
|
|
|
|
|
2019-07-31 17:08:15 +02:00
|
|
|
|
def completeDocField(doc, code, position):
|
|
|
|
|
"""
|
|
|
|
|
Completes with '<' the document the field that is located at given position
|
|
|
|
|
"""
|
|
|
|
|
field = getDocString(doc)[position]
|
|
|
|
|
limit = limits(getDocString(doc), field)
|
|
|
|
|
res = limit[1] - position
|
|
|
|
|
#print("field : {}, limit : {}, number of char to complete : {}".format(field, limit, res))
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
2019-07-17 17:12:21 +02:00
|
|
|
|
def docMatch(doc, strs):
|
2019-07-31 17:08:15 +02:00
|
|
|
|
"""
|
|
|
|
|
This function calculates a regex match score for a given document and a string couple
|
|
|
|
|
"""
|
2019-07-17 17:12:21 +02:00
|
|
|
|
# Global handler
|
|
|
|
|
logfile = logger.logCur
|
|
|
|
|
|
|
|
|
|
level = 0
|
|
|
|
|
nchar = 0
|
|
|
|
|
bonus = 0
|
|
|
|
|
|
|
|
|
|
for i in range(0,2):
|
|
|
|
|
cursor = 0
|
|
|
|
|
#print("Line : {}".format(i))
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
|
if cursor > len(doc[0][i]) - 1:
|
|
|
|
|
break
|
|
|
|
|
# Getting the type of field on the cursor position
|
|
|
|
|
fieldtype = doc[0][i][cursor]
|
|
|
|
|
lim = limits(doc[0][i], fieldtype)
|
|
|
|
|
# ready for next field
|
|
|
|
|
cursor = lim[1]
|
|
|
|
|
# get the current field and isolates it
|
|
|
|
|
field = doc[0][i][ lim[0]:lim[1] ]
|
|
|
|
|
fstr = strs[i][ lim[0]:lim[1] ]
|
|
|
|
|
# Prepare regex compilation
|
|
|
|
|
regex = re.compile(doc[1][fieldtype][2])
|
|
|
|
|
# Test the match
|
|
|
|
|
matching = regex.match(fstr)
|
|
|
|
|
# Retrieve the mathing level
|
|
|
|
|
if matching:
|
|
|
|
|
level += matching.end()
|
|
|
|
|
if fieldtype == "1":
|
|
|
|
|
bonus += 100
|
|
|
|
|
nchar += int(doc[1][fieldtype][0])
|
|
|
|
|
|
|
|
|
|
# Print for debug
|
|
|
|
|
|
2019-07-19 17:17:30 +02:00
|
|
|
|
# print("Field : {}, type = {}, on str : {}".format(field, fieldtype, fstr))
|
|
|
|
|
# logfile.printdbg(" REGEX : {}, match : {}".format(regex, matching))
|
2019-07-17 17:12:21 +02:00
|
|
|
|
# exit the loop
|
|
|
|
|
|
2019-08-23 16:46:25 +02:00
|
|
|
|
logfile.printdbg("{} level : {}/{} (+{})".format(doc[2], level, nchar, bonus))
|
2019-07-17 17:12:21 +02:00
|
|
|
|
return (level, nchar, bonus)
|
|
|
|
|
|
2019-07-18 15:37:58 +02:00
|
|
|
|
def allDocMatch(strs, final=False):
|
2019-07-31 17:08:15 +02:00
|
|
|
|
"""
|
|
|
|
|
This functions test all documents types on the lines provided and returns a score for each
|
|
|
|
|
"""
|
2019-07-17 17:12:21 +02:00
|
|
|
|
# Global handler
|
|
|
|
|
logfile = logger.logCur
|
|
|
|
|
|
2019-07-19 17:17:30 +02:00
|
|
|
|
#print(strs)
|
2019-07-18 15:37:58 +02:00
|
|
|
|
|
2019-07-17 17:12:21 +02:00
|
|
|
|
SCORES = []
|
|
|
|
|
for doc in TYPES:
|
|
|
|
|
# Get the score of the document on the strings
|
|
|
|
|
level, nchar, bonus = docMatch(doc, strs)
|
|
|
|
|
# Number of characters compatibles + bonus with the doc indication
|
|
|
|
|
SCORES += [ level + bonus ]
|
|
|
|
|
# if the len of strings is the same than document, add a bonus
|
|
|
|
|
# but only if we are in a final situation
|
|
|
|
|
if final:
|
|
|
|
|
if len(strs[0] + strs[1]) == nchar:
|
|
|
|
|
SCORES[-1] += 100
|
|
|
|
|
candidate = SCORES.index(max(SCORES))
|
|
|
|
|
candidates = []
|
|
|
|
|
canditxt = []
|
2019-08-23 16:46:25 +02:00
|
|
|
|
|
2019-07-17 17:12:21 +02:00
|
|
|
|
# Search the candidates
|
|
|
|
|
for i in range(len(SCORES)):
|
|
|
|
|
if SCORES[i] == SCORES[candidate]:
|
|
|
|
|
candidates += [TYPES[i]]
|
|
|
|
|
canditxt += [TYPES[i][2]]
|
2019-08-23 16:46:25 +02:00
|
|
|
|
# Continue searching
|
|
|
|
|
if len(candidates) < 2:
|
|
|
|
|
tempRemovedCandidate = SCORES.pop(candidate)
|
|
|
|
|
if (SCORES.index(max(SCORES)) != candidate) and (max(SCORES) >= tempRemovedCandidate - 20):
|
|
|
|
|
if SCORES.index(max(SCORES)) < candidate:
|
|
|
|
|
candidates += [ TYPES[SCORES.index(max(SCORES))] ]
|
|
|
|
|
else:
|
|
|
|
|
candidates += [ TYPES[SCORES.index(max(SCORES)) + 1] ]
|
|
|
|
|
SCORES.insert(candidate, tempRemovedCandidate)
|
|
|
|
|
|
2019-07-17 17:12:21 +02:00
|
|
|
|
# Return the candidates
|
2019-08-23 16:46:25 +02:00
|
|
|
|
logfile.printdbg("Scores : {}".format(SCORES))
|
|
|
|
|
logfile.printdbg("Candidates : {}".format(canditxt))
|
2019-07-17 17:12:21 +02:00
|
|
|
|
return candidates
|
|
|
|
|
|
2019-07-19 17:17:30 +02:00
|
|
|
|
def computeControlSum(code):
|
2019-07-09 23:01:09 +02:00
|
|
|
|
"""
|
2019-07-17 17:12:21 +02:00
|
|
|
|
This function computes a control sum for the given characters
|
2019-07-09 23:01:09 +02:00
|
|
|
|
"""
|
|
|
|
|
resultat = 0
|
|
|
|
|
i = -1
|
|
|
|
|
facteur = [7, 3, 1]
|
|
|
|
|
for car in code:
|
2019-07-17 17:12:21 +02:00
|
|
|
|
if car == '<' or car == '\n':
|
2019-07-09 23:01:09 +02:00
|
|
|
|
valeur = 0
|
|
|
|
|
i += 1
|
|
|
|
|
else:
|
|
|
|
|
if car in '0123456789':
|
|
|
|
|
valeur = int(car)
|
|
|
|
|
i += 1
|
|
|
|
|
else:
|
|
|
|
|
if car in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
|
|
|
|
valeur = ord(car) - 55
|
|
|
|
|
i += 1
|
|
|
|
|
else:
|
|
|
|
|
break
|
|
|
|
|
resultat += valeur * facteur[(i % 3)]
|
|
|
|
|
|
2019-07-31 17:08:15 +02:00
|
|
|
|
return resultat % 10
|
|
|
|
|
|
|
|
|
|
def computeAllControlSum(doc, code):
|
|
|
|
|
"""
|
|
|
|
|
This function computes all the ctrl sums on a MRZ string and returns all the results
|
2019-08-01 16:52:36 +02:00
|
|
|
|
it returns the misc infos about the document too
|
2019-07-31 17:08:15 +02:00
|
|
|
|
"""
|
|
|
|
|
ctrlSumList = []
|
2019-08-01 16:52:36 +02:00
|
|
|
|
facult = False
|
2019-07-31 17:08:15 +02:00
|
|
|
|
|
|
|
|
|
# iteration on each char of the given MRZ
|
|
|
|
|
for charPos in range(len(code)):
|
2019-08-23 16:46:25 +02:00
|
|
|
|
|
|
|
|
|
# Sanity check
|
|
|
|
|
if len(getDocString(doc)) <= charPos:
|
|
|
|
|
break
|
|
|
|
|
|
2019-07-31 17:08:15 +02:00
|
|
|
|
field = getDocString(doc)[charPos]
|
|
|
|
|
|
|
|
|
|
if doc[1][field][1] == "CTRL":
|
|
|
|
|
#print("{} is CTRL field {}".format(code[charPos], field))
|
|
|
|
|
|
|
|
|
|
codeChain = ""
|
|
|
|
|
# iteration on the fields to control
|
|
|
|
|
for pos in range(len(code)):
|
2019-08-23 16:46:25 +02:00
|
|
|
|
|
|
|
|
|
#print("Len : {}, pos : {}".format(len(getDocString(doc)), pos))
|
|
|
|
|
# Sanity check
|
|
|
|
|
if len(getDocString(doc)) <= pos:
|
|
|
|
|
break
|
|
|
|
|
|
2019-07-31 17:08:15 +02:00
|
|
|
|
target = getDocString(doc)[pos]
|
|
|
|
|
if target in doc[1][field][3]:
|
|
|
|
|
#print("__field : {} {} {} {}".format(target, pos, field, doc[1][field][3]))
|
|
|
|
|
codeChain += code[pos]
|
|
|
|
|
|
|
|
|
|
#print("chain to control : _{}_".format(codeChain))
|
|
|
|
|
|
|
|
|
|
ctrlSum = computeControlSum(codeChain)
|
|
|
|
|
#print("SUM : {} vs {}".format(code[charPos], ctrlSum))
|
|
|
|
|
|
2019-08-01 16:52:36 +02:00
|
|
|
|
ctrlSumList += [ (field, charPos, ctrlSum, facult) ]
|
|
|
|
|
|
|
|
|
|
if doc[1][field][1] == "CTRLF":
|
|
|
|
|
#print("{} is CTRL field {}".format(code[charPos], field))
|
|
|
|
|
|
|
|
|
|
codeChain = ""
|
|
|
|
|
# iteration on the fields to control
|
|
|
|
|
for pos in range(len(code)):
|
|
|
|
|
target = getDocString(doc)[pos]
|
|
|
|
|
if target in doc[1][field][3]:
|
|
|
|
|
#print("__field : {} {} {} {}".format(target, pos, field, doc[1][field][3]))
|
|
|
|
|
codeChain += code[pos]
|
|
|
|
|
|
|
|
|
|
#print("chain to control : _{}_".format(codeChain))
|
|
|
|
|
|
|
|
|
|
ctrlSum = computeControlSum(codeChain)
|
|
|
|
|
#print("SUM : {} vs {}".format(code[charPos], ctrlSum))
|
|
|
|
|
|
|
|
|
|
if code[charPos] == "<":
|
|
|
|
|
facult = True
|
|
|
|
|
|
|
|
|
|
ctrlSumList += [ (field, charPos, ctrlSum, facult) ]
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"ctrlSumList" : ctrlSumList
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-31 17:08:15 +02:00
|
|
|
|
|
2019-08-01 16:52:36 +02:00
|
|
|
|
def getDocInfos(doc, code):
|
|
|
|
|
# get all the types of infos that are in the document doc
|
|
|
|
|
infoTypes = [ (doc[1][field][1], limits(doc[0][0] + doc[0][1], field)) for field in doc[1] ]
|
2019-07-31 17:08:15 +02:00
|
|
|
|
|
2019-08-01 16:52:36 +02:00
|
|
|
|
res = {}
|
2019-07-31 17:08:15 +02:00
|
|
|
|
|
2019-08-30 15:09:33 +02:00
|
|
|
|
# Length of MRZ
|
|
|
|
|
length = len(code)
|
|
|
|
|
if length == len(doc[0][0]+doc[0][1]):
|
|
|
|
|
res["LEN"] = [length, True]
|
|
|
|
|
else:
|
|
|
|
|
res["LEN"] = [length, False]
|
|
|
|
|
|
|
|
|
|
|
2019-08-01 16:52:36 +02:00
|
|
|
|
for field in infoTypes:
|
2019-07-31 17:08:15 +02:00
|
|
|
|
|
2019-08-02 10:28:14 +02:00
|
|
|
|
value = code[ field[1][0] : field[1][1] ].replace("<", " ").strip()
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res[field[0]] = [0,0]
|
2019-08-01 16:52:36 +02:00
|
|
|
|
|
|
|
|
|
# State code
|
|
|
|
|
if field[0] == 'PAYS' or field[0] == 'NAT':
|
|
|
|
|
try:
|
|
|
|
|
if len(value) == 3 and value[-1] != "<":
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res[field[0]] = (landcode3[value], True)
|
2019-08-01 16:52:36 +02:00
|
|
|
|
elif len(value) == 3 and value[-1] == "<":
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res[field[0]] = (landcode2[value[:-1]], True)
|
2019-08-01 16:52:36 +02:00
|
|
|
|
else:
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res[field[0]] = (landcode2[value], True)
|
2019-08-01 16:52:36 +02:00
|
|
|
|
except KeyError:
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res[field[0]] = [value, False]
|
2019-08-01 16:52:36 +02:00
|
|
|
|
|
|
|
|
|
# Dates
|
|
|
|
|
elif field[0][1:] == 'DATE':
|
|
|
|
|
# size adaptation
|
|
|
|
|
if len(value) == 6:
|
|
|
|
|
value = "{}/{}/{}".format(value[4:6], value[2:4], value[0:2])
|
|
|
|
|
elif len(value) == 4:
|
|
|
|
|
value = "{}/{}/{}".format("01", value[2:4], value[0:2])
|
|
|
|
|
|
|
|
|
|
# date validation
|
|
|
|
|
try:
|
|
|
|
|
datetime.datetime.strptime(value,"%d/%m/%y")
|
|
|
|
|
except ValueError:
|
2019-08-07 15:30:22 +02:00
|
|
|
|
#print(value)
|
2019-08-01 16:52:36 +02:00
|
|
|
|
if value != "":
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res[field[0]] = [value, False]
|
2019-08-01 16:52:36 +02:00
|
|
|
|
else:
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res[field[0]] = [value, True]
|
2019-08-01 16:52:36 +02:00
|
|
|
|
|
|
|
|
|
# Numbers
|
|
|
|
|
elif field[0][:-1] == 'NOINT':
|
|
|
|
|
try:
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res["NO"][0] += value
|
|
|
|
|
res["NO"][1] = True
|
2019-08-01 16:52:36 +02:00
|
|
|
|
except KeyError:
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res["NO"] = [value, True]
|
|
|
|
|
|
2019-08-01 16:52:36 +02:00
|
|
|
|
elif field[0] == 'NOINT':
|
|
|
|
|
try:
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res["NO"][0] += value
|
|
|
|
|
res["NO"][1] = True
|
2019-08-01 16:52:36 +02:00
|
|
|
|
except KeyError:
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res["NO"] = [value, True]
|
2019-08-01 16:52:36 +02:00
|
|
|
|
|
|
|
|
|
elif field[0] == 'FACULT':
|
2019-08-02 10:28:14 +02:00
|
|
|
|
try:
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res["INDIC"][0] += value
|
|
|
|
|
res["INDIC"][1] = True
|
2019-08-02 10:28:14 +02:00
|
|
|
|
except KeyError:
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res["INDIC"] = [value, True]
|
2019-08-19 17:56:23 +02:00
|
|
|
|
|
|
|
|
|
# Sex
|
|
|
|
|
elif field[0] == 'SEX':
|
|
|
|
|
if not value in "MF":
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res[field[0]] = [value, False]
|
2019-08-19 17:56:23 +02:00
|
|
|
|
else:
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res[field[0]] = [value, True]
|
2019-08-19 17:56:23 +02:00
|
|
|
|
|
2019-08-01 16:52:36 +02:00
|
|
|
|
# All other cases
|
|
|
|
|
else:
|
|
|
|
|
if value != "":
|
2019-08-27 16:49:01 +02:00
|
|
|
|
res[field[0]] = [value, True]
|
2019-08-01 16:52:36 +02:00
|
|
|
|
|
|
|
|
|
return res
|
2019-07-31 17:08:15 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|