CNIRevelator/src/mrz.py

1050 lines
28 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
********************************************************************************
* CNIRevelator *
* *
* Desc: MRZ data dictionnary for CNIRevelator analyzer and *
* functions to analyze these data *
* *
* Copyright © 2018-2019 Adrien Bourmault (neox95) *
* *
* This file is part of CNIRevelator. *
* *
* CNIRevelator is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* any later version. *
* *
* CNIRevelator is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY*without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with CNIRevelator. If not, see <https:*www.gnu.org/licenses/>. *
********************************************************************************
"""
import re
import logger # logger.py
import re
import datetime
## SEX CODES
sexcode = {'M':'Homme', 'F':'Femme', 'X':'Non spécifié'}
## COUNTRY CODES
landcode2 = {
'AW': 'Aruba',
'AF': 'Afghanistan',
'AO': 'Angola',
'AI': 'Anguilla',
'AL': 'Albanie',
'AD': 'Andorre',
'AE': 'Emirats arabes unis',
'AR': 'Argentine',
'AM': 'Arménie',
'AS': 'Samoa américaines',
'AQ': 'Antarctique',
'TF': 'Terres australes et antarctiques françaises',
'AG': 'Antigua-et-Barbuda',
'AU': 'Australie',
'AT': 'Autriche',
'AZ': 'Azerbaidjan',
'BI': 'Burundi',
'BE': 'Belgique',
'BJ': 'Benin',
'BQ': 'Pays-Bas caribéens',
'BF': 'Burkina Faso',
'BD': 'Bangladesh',
'BG': 'Bulgarie',
'BH': 'Bahrein',
'BS': 'Bahamas',
'BA': 'Bosnie-Herzegovine',
'BL': 'Saint-Barthélemy',
'BY': 'Bielorussie',
'BZ': 'Belize',
'BM': 'Bermudes',
'BO': 'Bolivie',
'BR': 'Brésil',
'BB': 'Barbade',
'BN': 'Brunei',
'BT': 'Bhoutan',
'BW': 'Botswana',
'CF': 'République Centrafricaine',
'CA': 'Canada',
'CC': 'Îles Cocos',
'CH': 'Suisse',
'CL': 'Chili',
'CN': 'Chine',
'CI': "Côte d'Ivoire",
'CM': 'Cameroun',
'CD': 'Congo (République démocratique)',
'CG': 'Congo (République)',
'CK': 'Îles Cook',
'CO': 'Colombie',
'KM': 'Comores',
'CV': 'Cap-Vert',
'CR': 'Costa Rica',
'CU': 'Cuba',
'CW': 'Curaçao',
'CX': 'Île Christmas',
'KY': 'Caimans',
'CY': 'Chypre',
'CZ': 'Tchéquie',
'DE': 'Allemagne',
'DJ': 'Djibouti',
'DM': 'Dominique',
'DK': 'Danemark',
'DO': 'République dominicaine',
'DZ': 'Algérie',
'EC': 'Equateur',
'EG': 'Egypte',
'ER': 'Erythrée',
'EH': 'Sahara occidental',
'ES': 'Espagne',
'EE': 'Estonie',
'ET': 'Ethiopie',
'FI': 'Finlande',
'FJ': 'Fidji',
'FK': 'Îles Malouines',
'FR': 'France',
'FO': 'Féroé',
'FM': 'Micronésie',
'GA': 'Gabon',
'GB': 'Royaume-Uni',
'GE': 'Géorgie',
'GG': 'Guernesey',
'GH': 'Ghana',
'GI': 'Gibraltar',
'GN': 'Guinée',
'GP': 'Guadeloupe',
'GM': 'Gambie',
'GW': 'Guinée-Bissau',
'GQ': 'Guinée équatoriale',
'GR': 'Grèce',
'GD': 'Grenade',
'GL': 'Groenland',
'GT': 'Guatemala',
'GF': 'Guyane',
'GU': 'Guam',
'GY': 'Guyana',
'HK': 'Hong Kong',
'HN': 'Honduras',
'HR': 'Croatie',
'HT': 'Haïti',
'HU': 'Hongrie',
'ID': 'Indonésie',
'IM': 'Île de Man',
'IN': 'Inde',
'IO': "Territoire britannique de l'océan Indien",
'IE': 'Irlande',
'IR': 'Irak',
'IQ': 'Iran',
'IS': 'Islande',
'IL': 'Israël',
'IT': 'Italie',
'JM': 'Jamaïque',
'JE': 'Jersey',
'JO': 'Jordanie',
'JP': 'Japon',
'KZ': 'Kazakhstan',
'KE': 'Kenya',
'KG': 'Kirghizistan',
'KH': 'Cambodge',
'KI': 'Kiribati',
'KN': 'Saint-Christophe-et-Niévès',
'KR': 'Corée du Sud',
'KW': 'Koweït',
'LA': 'Laos',
'LB': 'Liban',
'LR': 'Liberia',
'LY': 'Libye',
'LC': 'Sainte-Lucie',
'LI': 'Liechtenstein',
'LK': 'Sri Lanka',
'LS': 'Lesotho',
'LT': 'Lituanie',
'LU': 'Luxembourg',
'LV': 'Lettonie',
'MO': 'Macao',
'MF': 'Sint-Maarten',
'MA': 'Maroc',
'MC': 'Monaco',
'MD': 'Moldavie',
'MG': 'Madagascar',
'MV': 'Maldives',
'MX': 'Mexique',
'MH': 'Marshall',
'MK': 'Macedoine',
'ML': 'Mali',
'MT': 'Malte',
'MM': 'Birmanie',
'ME': 'Monténégro',
'MN': 'Mongolie',
'MP': 'Îles Mariannes du Nord',
'MZ': 'Mozambique',
'MR': 'Mauritanie',
'MS': 'Montserrat',
'MQ': 'Martinique',
'MU': 'Maurice',
'MW': 'Malawi',
'MY': 'Malaisie',
'YT': 'Mayotte',
'NA': 'Namibie',
'NC': 'Nouvelle-Calédonie',
'NE': 'Niger',
'NF': 'Île Norfolk',
'NG': 'Nigeria',
'NI': 'Nicaragua',
'NU': 'Niue',
'NL': 'Pays-Bas',
'NO': 'Norvège',
'NP': 'Nepal',
'NR': 'Nauru',
'NZ': 'Nouvelle-Zélande',
'OM': 'Oman',
'PK': 'Pakistan',
'PA': 'Panama',
'PN': 'Îles Pitcairn',
'PE': 'Pérou',
'PH': 'Philippines',
'PW': 'Palaos',
'PG': 'Papouasie-Nouvelle-Guinée',
'PL': 'Pologne',
'PR': 'Porto Rico',
'KP': 'Corée du Nord',
'PT': 'Portugal',
'PY': 'Paraguay',
'PS': 'Palestine',
'PF': 'Polynésie française',
'QA': 'Qatar',
'RE': 'Réunion',
'RO': 'Roumanie',
'RU': 'Russie',
'RW': 'Rwanda',
'SA': 'Arabie saoudite',
'SD': 'Soudan',
'SN': 'Sénégal',
'SG': 'Singapour',
'GS': 'Georgie du Sud-et-les iles Sandwich du Sud',
'SH': 'Sainte-Hélène, Ascension et Tristan da Cunha',
'SJ': 'Svalbard et île Jan Mayen',
'SB': 'Salomon',
'SL': 'Sierra Leone',
'SV': 'Salvador',
'SM': 'Saint-Marin',
'SO': 'Somalie',
'PM': 'Saint-Pierre-et-Miquelon',
'RS': 'Serbie',
'SS': 'Soudan du Sud',
'ST': 'Sao Tomé-et-Principe',
'SR': 'Suriname',
'SK': 'Slovaquie',
'SI': 'Slovénie',
'SE': 'Suède',
'SZ': 'eSwatani',
'SX': 'Saint-Martin ',
'SC': 'Seychelles',
'SY': 'Syrie',
'TC': 'Îles Turques-et-Caïques',
'TD': 'Tchad',
'TG': 'Togo',
'TH': 'Thaïlande',
'TJ': 'Tadjikistan',
'TK': 'Tokelau',
'TM': 'Turkmenistan',
'TL': 'Timor oriental',
'TO': 'Tonga',
'TT': 'Trinité-et-Tobago',
'TN': 'Tunisie',
'TR': 'Turquie',
'TV': 'Tuvalu',
'TW': 'Taiwan',
'TZ': 'Tanzanie',
'UG': 'Ouganda',
'UA': 'Ukraine',
'UY': 'Uruguay',
'US': 'Etats-Unis',
'UZ': 'Ouzbékistan',
'VA': 'Saint-Siège (État de la Cité du Vatican)',
'VC': 'Saint-Vincent-et-les-Grenadines',
'VE': 'Venezuela',
'VG': 'Îles Vierges britanniques',
'VI': 'Îles Vierges des États-Unis',
'VN': 'Viêt Nam',
'VU': 'Vanuatu',
'WF': 'Wallis-et-Futuna',
'WS': 'Samoa',
'XK': 'Kosovo',
'YE': 'Yémen',
'ZA': 'Afrique du Sud',
'ZM': 'Zambie',
'ZW': 'Zimbabwe'
}
landcode3 = {
'ABW': 'Aruba',
'AFG': 'Afghanistan',
'AGO': 'Angola',
'AIA': 'Anguilla',
'ALB': 'Albanie',
'AND': 'Andorre',
'ARE': 'Emirats arabes unis',
'ARG': 'Argentine',
'ARM': 'Arménie',
'ASM': 'Samoa américaines',
'ATA': 'Antarctique',
'ATF': 'Terres australes et antarctiques françaises',
'ATG': 'Antigua-et-Barbuda',
'AUS': 'Australie',
'AUT': 'Autriche',
'AZE': 'Azerbaidjan',
'BDI': 'Burundi',
'BEL': 'Belgique',
'BEN': 'Benin',
'BES': 'Pays-Bas caribéens',
'BFA': 'Burkina Faso',
'BGD': 'Bangladesh',
'BGR': 'Bulgarie',
'BHR': 'Bahrein',
'BHS': 'Bahamas',
'BIH': 'Bosnie-Herzegovine',
'BLM': 'Saint-Barthélemy',
'BLR': 'Bielorussie',
'BLZ': 'Belize',
'BMU': 'Bermudes',
'BOL': 'Bolivie',
'BRA': 'Brésil',
'BRB': 'Barbade',
'BRN': 'Brunei',
'BTN': 'Bhoutan',
'BWA': 'Botswana',
'CAF': 'République Centrafricaine',
'CAN': 'Canada',
'CCK': 'Îles Cocos',
'CHE': 'Suisse',
'CHL': 'Chili',
'CHN': 'Chine',
'CIV': "Côte d'Ivoire",
'CMR': 'Cameroun',
'COD': 'Congo (République démocratique)',
'COG': 'Congo (République)',
'COK': 'Îles Cook',
'COL': 'Colombie',
'COM': 'Comores',
'CPV': 'Cap-Vert',
'CRI': 'Costa Rica',
'CUB': 'Cuba',
'CUW': 'Curaçao',
'CXR': 'Île Christmas',
'CYM': 'Caimans',
'CYP': 'Chypre',
'CZE': 'Tchéquie',
'DEU': 'Allemagne',
'DJI': 'Djibouti',
'DMA': 'Dominique',
'DNK': 'Danemark',
'DOM': 'République dominicaine',
'DZA': 'Algérie',
'ECU': 'Equateur',
'EGY': 'Egypte',
'ERI': 'Erythrée',
'ESH': 'Sahara occidental',
'ESP': 'Espagne',
'EST': 'Estonie',
'ETH': 'Ethiopie',
'FIN': 'Finlande',
'FJI': 'Fidji',
'FLK': 'Îles Malouines',
'FRA': 'France',
'FRO': 'Féroé',
'FSM': 'Micronésie',
'GAB': 'Gabon',
'GBR': 'Royaume-Uni',
'GEO': 'Géorgie',
'GGY': 'Guernesey',
'GHA': 'Ghana',
'GIB': 'Gibraltar',
'GIN': 'Guinée',
'GLP': 'Guadeloupe',
'GMB': 'Gambie',
'GNB': 'Guinée-Bissau',
'GNQ': 'Guinée équatoriale',
'GRC': 'Grèce',
'GRD': 'Grenade',
'GRL': 'Groenland',
'GTM': 'Guatemala',
'GUF': 'Guyane',
'GUM': 'Guam',
'GUY': 'Guyana',
'HKG': 'Hong Kong',
'HND': 'Honduras',
'HRV': 'Croatie',
'HTI': 'Haïti',
'HUN': 'Hongrie',
'IDN': 'Indonésie',
'IMN': 'Île de Man',
'IND': 'Inde',
'IOT': "Territoire britannique de l'océan Indien",
'IRL': 'Irlande',
'IRN': 'Irak',
'IRQ': 'Iran',
'ISL': 'Islande',
'ISR': 'Israël',
'ITA': 'Italie',
'JAM': 'Jamaïque',
'JEY': 'Jersey',
'JOR': 'Jordanie',
'JPN': 'Japon',
'KAZ': 'Kazakhstan',
'KEN': 'Kenya',
'KGZ': 'Kirghizistan',
'KHM': 'Cambodge',
'KIR': 'Kiribati',
'KNA': 'Saint-Christophe-et-Niévès',
'KOR': 'Corée du Sud',
'KWT': 'Koweït',
'LAO': 'Laos',
'LBN': 'Liban',
'LBR': 'Liberia',
'LBY': 'Libye',
'LCA': 'Sainte-Lucie',
'LIE': 'Liechtenstein',
'LKA': 'Sri Lanka',
'LSO': 'Lesotho',
'LTU': 'Lituanie',
'LUX': 'Luxembourg',
'LVA': 'Lettonie',
'MAC': 'Macao',
'MAF': 'Sint-Maarten',
'MAR': 'Maroc',
'MCO': 'Monaco',
'MDA': 'Moldavie',
'MDG': 'Madagascar',
'MDV': 'Maldives',
'MEX': 'Mexique',
'MHL': 'Marshall',
'MKD': 'Macedoine',
'MLI': 'Mali',
'MLT': 'Malte',
'MMR': 'Birmanie',
'MNE': 'Monténégro',
'MNG': 'Mongolie',
'MNP': 'Îles Mariannes du Nord',
'MOZ': 'Mozambique',
'MRT': 'Mauritanie',
'MSR': 'Montserrat',
'MTQ': 'Martinique',
'MUS': 'Maurice',
'MWI': 'Malawi',
'MYS': 'Malaisie',
'MYT': 'Mayotte',
'NAM': 'Namibie',
'NCL': 'Nouvelle-Calédonie',
'NER': 'Niger',
'NFK': 'Île Norfolk',
'NGA': 'Nigeria',
'NIC': 'Nicaragua',
'NIU': 'Niue',
'NLD': 'Pays-Bas',
'NOR': 'Norvège',
'NPL': 'Nepal',
'NRU': 'Nauru',
'NZL': 'Nouvelle-Zélande',
'OMN': 'Oman',
'PAK': 'Pakistan',
'PAN': 'Panama',
'PCN': 'Îles Pitcairn',
'PER': 'Pérou',
'PHL': 'Philippines',
'PLW': 'Palaos',
'PNG': 'Papouasie-Nouvelle-Guinée',
'POL': 'Pologne',
'PRI': 'Porto Rico',
'PRK': 'Corée du Nord',
'PRT': 'Portugal',
'PRY': 'Paraguay',
'PSE': 'Palestine',
'PYF': 'Polynésie française',
'QAT': 'Qatar',
'REU': 'Réunion',
'ROU': 'Roumanie',
'RUS': 'Russie',
'RWA': 'Rwanda',
'SAU': 'Arabie saoudite',
'SDN': 'Soudan',
'SEN': 'Sénégal',
'SGP': 'Singapour',
'SGS': 'Georgie du Sud-et-les iles Sandwich du Sud',
'SHN': 'Sainte-Hélène, Ascension et Tristan da Cunha',
'SJM': 'Svalbard et île Jan Mayen',
'SLB': 'Salomon',
'SLE': 'Sierra Leone',
'SLV': 'Salvador',
'SMR': 'Saint-Marin',
'SOM': 'Somalie',
'SPM': 'Saint-Pierre-et-Miquelon',
'SRB': 'Serbie',
'SSD': 'Soudan du Sud',
'STP': 'Sao Tomé-et-Principe',
'SUR': 'Suriname',
'SVK': 'Slovaquie',
'SVN': 'Slovénie',
'SWE': 'Suède',
'SWZ': 'eSwatani',
'SXM': 'Saint-Martin ',
'SYC': 'Seychelles',
'SYR': 'Syrie',
'TCA': 'Îles Turques-et-Caïques',
'TCD': 'Tchad',
'TGO': 'Togo',
'THA': 'Thaïlande',
'TJK': 'Tadjikistan',
'TKL': 'Tokelau',
'TKM': 'Turkmenistan',
'TLS': 'Timor oriental',
'TON': 'Tonga',
'TTO': 'Trinité-et-Tobago',
'TUN': 'Tunisie',
'TUR': 'Turquie',
'TUV': 'Tuvalu',
'TWN': 'Taiwan',
'TZA': 'Tanzanie',
'UGA': 'Ouganda',
'UKR': 'Ukraine',
'URY': 'Uruguay',
'USA': 'Etats-Unis',
'UZB': 'Ouzbékistan',
'VAT': 'Saint-Siège (État de la Cité du Vatican)',
'VCT': 'Saint-Vincent-et-les-Grenadines',
'VEN': 'Venezuela',
'VGB': 'Îles Vierges britanniques',
'VIR': 'Îles Vierges des États-Unis',
'VNM': 'Viêt Nam',
'VUT': 'Vanuatu',
'WLF': 'Wallis-et-Futuna',
'WSM': 'Samoa',
'XKX': 'Kosovo',
'YEM': 'Yémen',
'ZAF': 'Afrique du Sud',
'ZMB': 'Zambie',
'ZWE': 'Zimbabwe',
'NTZ': 'Zone neutre',
'UNO': 'Fonctionnaire des Nations Unies',
'UNA': "Fonctionnaire d'une organisation affiliée aux Nations Unies",
'UNK': 'Représentant des Nations Unies au Kosovo',
'XXA': 'Apatride Convention 1954',
'XXB': 'Réfugié Convention 1954',
'XXC': 'Réfugié autre',
'XXX': 'Résident Légal de Nationalité Inconnue',
'D': 'Allemagne',
'EUE': 'Union Européenne',
'GBD': "Citoyen Britannique d'Outre-mer (BOTC)",
'GBN': 'British National (Overseas)',
'GBO': 'British Overseas Citizen',
'GBP': 'British Protected Person',
'GBS': 'British Subject',
'XBA': 'Banque Africaine de Développement',
'XIM': "Banque Africaine d'ExportImport",
'XCC': 'Caribbean Community or one of its emissaries',
'XCO': 'Common Market for Eastern and Southern Africa',
'XEC': 'Economic Community of West African States',
'XPO': 'International Criminal Police Organization',
'XOM': 'Sovereign Military Order of Malta',
'RKS': 'Kosovo',
'WSA': 'World Service Authority World Passport'
}
## DOCUMENTS TYPES
P = [
["11222333333333333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCCCCCCCCDE"],
{
"1": ["2", "CODE", "P."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["39", "NOM", "([A-Z]|<)+"],
"4": ["9", "NO", ".+"],
"5": ["1", "CTRL", "[0-9]", "4"],
"6": ["3", "NAT", "[A-Z]+"],
"7": ["6", "BDATE", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "7"],
"9": ["1", "SEX", "[A-Z]"],
"A": ["6", "EDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["14", "FACULT", ".+"],
"D": ["1", "CTRLF", "[0-9]", "C"],
"E": ["1", "CTRL", "[0-9]", "4578ABCD"]
},
"Passeport"
]
IP = [
["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
{
"1": ["2", "CODE", "IP"],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["9", "NO", ".+"],
"4": ["1", "CTRL", "[0-9]", "3"],
"5": ["15", "FACULT", ".+"],
"6": ["6", "BDATE", "[0-9]+"],
"7": ["1", "CTRL", "[0-9]", "6"],
"8": ["1", "SEX", "[A-Z]"],
"9": ["6", "EDATE", "[0-9]+"],
"A": ["1", "CTRL", "[0-9]", "9"],
"B": ["3", "NAT", "[A-Z]+"],
"C": ["11", "FACULT", ".+"],
"D": ["1", "CTRL", "[0-9]", "345679AC"]
},
"Carte-passeport"
]
I_ = [
["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
{
"1": ["2", "CODE", "I."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["9", "NO", ".+"],
"4": ["1", "CTRL", "[0-9]", "3"],
"5": ["15", "FACULT", ".+"],
"6": ["6", "BDATE", "[0-9]+"],
"7": ["1", "CTRL", "[0-9]", "6"],
"8": ["1", "SEX", "[A-Z]"],
"9": ["6", "EDATE", "[0-9]+"],
"A": ["1", "CTRL", "[0-9]", "9"],
"B": ["3", "NAT", "[A-Z]+"],
"C": ["11", "FACULT", ".+"],
"D": ["1", "CTRL", "[0-9]", "345679AC"]
},
"Titre d'identité/de voyage"
]
AC = [
["112223333333334EEE555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
{
"1": ["2", "CODE", "AC"],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["9", "NO", ".+"],
"4": ["1", "CTRL", "[0-9]", "3"],
"E": ["3", "INDIC", "[A-Z]{1,2}."],
"5": ["12", "FACULT", ".+"],
"6": ["6", "BDATE", "[0-9]+ "],
"7": ["1", "CTRL", "[0-9]", "6"],
"8": ["1", "SEX", "[A-Z]"],
"9": ["6", "EDATE", "[0-9]+"],
"A": ["1", "CTRL", "[0-9]", "9"],
"B": ["3", "NAT", "[A-Z]+"],
"C": ["11", "FACULT", ".+"],
"D": ["1", "CTRL", "[0-9]","345679AC"]
},
"Certificat de membre d'équipage"
]
VA = [
["11222333333333333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCCCCCCCCCCC"],
{
"1": ["2", "CODE", "V."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["39", "NOM", "[A-Z]+"],
"4": ["9", "NO", ".+"],
"5": ["1", "CTRL", "[0-9]","4"],
"6": ["3", "NAT", "[A-Z]+"],
"7": ["6", "BDATE", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "7"],
"9": ["1", "SEX", "[A-Z]"],
"A": ["6", "EDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["16", "FACULT", ".+"]
},
"Visa de type A"
]
VB = [
["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCC"],
{
"1": ["2", "CODE", "V."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["31", "NOM", "([A-Z]|<)+"],
"4": ["9", "NO", ".+"],
"5": ["1", "CTRL", "[0-9]","4"],
"6": ["3", "NAT", "[A-Z]+"],
"7": ["6", "BDATE", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "7"],
"9": ["1", "SEX", "[A-Z]"],
"A": ["6", "EDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["8", "FACULT", ".+"]
},
"Visa de type B"
]
TSF = [
["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCC"],
{
"1": ["2", "CODE", "TS"],
"2": ["3", "PAYS", "FRA"],
"3": ["31", "NOM", "([A-Z]|<)+"],
"4": ["9", "NO", ".+"],
"5": ["1", "CTRL", "[0-9]","4"],
"6": ["3", "NAT", "[A-Z]+"],
"7": ["6", "BDATE", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "7"],
"9": ["1", "SEX", "[A-Z]"],
"A": ["6", "EDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["8", "FACULT", ".+"]
},
"Carte de séjour"
]
I__ = [
["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCD"],
{
"1": ["2", "CODE", "I."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["31", "NOM", "([A-Z]|<)+"],
"4": ["9", "NO", ".+"],
"5": ["1", "CTRL", "[0-9]", "4"],
"6": ["3", "NAT", "[A-Z]+"],
"7": ["6", "BDATE", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "7"],
"9": ["1", "SEX", "[A-Z]"],
"A": ["6", "EDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["7", "FACULT", ".+"],
"D": ["1", "CTRL", "[0-9]", "4578ABC"]
},
"Pièce d'identité/de voyage"
]
IDFR = [
["112223333333333333333333333333444444", "555566677777899999999999999AAAAAABCD"],
{
"1": ["2", "CODE", "ID"],
"2": ["3", "PAYS", "FRA"],
"3": ["25", "NOM", "([A-Z]|<)+"],
"4": ["6", "NOINT", ".+"],
"5": ["4", "DDATE", "[0-9]+"],
"6": ["3", "NOINT2", "[0-9]+"],
"7": ["5", "NOINT3", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "567"],
"9": ["14", "PRENOM", "[A-Z]"],
"A": ["6", "BDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["1", "SEX", "[A-Z]"],
"D": ["1", "CTRL", "[0-9]", "123456789ABCE"]
},
"Pièce d'identité FR"
]
DL = [
["112223333333334555555666666667", ""],
{
"1": ["2", "CODE", "D1"],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["9", "NO", "[0-9]{2}[A-Z]{2}[0-9]{5}"],
"4": ["1", "CTRL", "[0-9]", "123"],
"5": ["6", "EDATE", "[0-9]+"],
"6": ["8", "NOM", "([A-Z]|<)+"],
"7": ["1", "CTRL", "[0-9]", "123456"]
},
"Permis de conduire"
]
TYPES = [IDFR, I__, VB, VA, AC, I_, IP, P, DL, TSF]
# longest document MRZ line
longest = max([len(x[0][0]) for x in TYPES])
## THE ROOT OF THIS PROJECT !
def getDocString(doc):
return doc[0][0] + doc[0][1]
def getFieldLimits(doc, fieldtype):
"""
This function returns the limit of a given field string id for a given document structure
"""
L1 = limits(doc[0][0], fieldtype)
L2 = limits(doc[0][1], fieldtype)
if -1 in L1:
return 1, L2
else:
return 0, L1
return
def limits(line, fieldtype):
"""
Returns the limit of a given field structure
"""
a = line.find(fieldtype)
b = line.rfind(fieldtype)
return (a,b+1)
def completeDocField(doc, code, position):
"""
Completes with '<' the document the field that is located at given position
"""
field = getDocString(doc)[position]
limit = limits(getDocString(doc), field)
res = limit[1] - position
#print("field : {}, limit : {}, number of char to complete : {}".format(field, limit, res))
return res
def docMatch(doc, strs):
"""
This function calculates a regex match score for a given document and a string couple
"""
# Global handler
logfile = logger.logCur
level = 0
nchar = 0
bonus = 0
for i in range(0,2):
cursor = 0
#print("Line : {}".format(i))
while True:
if cursor > len(doc[0][i]) - 1:
break
# Getting the type of field on the cursor position
fieldtype = doc[0][i][cursor]
lim = limits(doc[0][i], fieldtype)
# ready for next field
cursor = lim[1]
# get the current field and isolates it
field = doc[0][i][ lim[0]:lim[1] ]
fstr = strs[i][ lim[0]:lim[1] ]
# Prepare regex compilation
regex = re.compile(doc[1][fieldtype][2])
# Test the match
matching = regex.match(fstr)
# Retrieve the mathing level
if matching:
level += matching.end()
if fieldtype == "1":
bonus += 100
nchar += int(doc[1][fieldtype][0])
# Print for debug
# print("Field : {}, type = {}, on str : {}".format(field, fieldtype, fstr))
# logfile.printdbg(" REGEX : {}, match : {}".format(regex, matching))
# exit the loop
#logfile.printdbg("{} level : {}/{} (+{})".format(doc[2], level, nchar, bonus))
return (level, nchar, bonus)
def allDocMatch(strs, final=False):
"""
This functions test all documents types on the lines provided and returns a score for each
"""
# Global handler
logfile = logger.logCur
#print(strs)
SCORES = []
for doc in TYPES:
# Get the score of the document on the strings
level, nchar, bonus = docMatch(doc, strs)
# Number of characters compatibles + bonus with the doc indication
SCORES += [ level + bonus ]
# if the len of strings is the same than document, add a bonus
# but only if we are in a final situation
if final:
if len(strs[0] + strs[1]) == nchar:
SCORES[-1] += 100
candidate = SCORES.index(max(SCORES))
candidates = []
canditxt = []
# Search the candidates
for i in range(len(SCORES)):
if SCORES[i] == SCORES[candidate]:
candidates += [TYPES[i]]
canditxt += [TYPES[i][2]]
# Return the candidates
#logfile.printdbg("Scores : {}".format(SCORES))
#logfile.printdbg("Candidates : {}".format(canditxt))
return candidates
def computeControlSum(code):
"""
This function computes a control sum for the given characters
"""
resultat = 0
i = -1
facteur = [7, 3, 1]
for car in code:
if car == '<' or car == '\n':
valeur = 0
i += 1
else:
if car in '0123456789':
valeur = int(car)
i += 1
else:
if car in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
valeur = ord(car) - 55
i += 1
else:
break
resultat += valeur * facteur[(i % 3)]
return resultat % 10
def computeAllControlSum(doc, code):
"""
This function computes all the ctrl sums on a MRZ string and returns all the results
it returns the misc infos about the document too
"""
ctrlSumList = []
facult = False
# iteration on each char of the given MRZ
for charPos in range(len(code)):
field = getDocString(doc)[charPos]
if doc[1][field][1] == "CTRL":
#print("{} is CTRL field {}".format(code[charPos], field))
codeChain = ""
# iteration on the fields to control
for pos in range(len(code)):
target = getDocString(doc)[pos]
if target in doc[1][field][3]:
#print("__field : {} {} {} {}".format(target, pos, field, doc[1][field][3]))
codeChain += code[pos]
#print("chain to control : _{}_".format(codeChain))
ctrlSum = computeControlSum(codeChain)
#print("SUM : {} vs {}".format(code[charPos], ctrlSum))
ctrlSumList += [ (field, charPos, ctrlSum, facult) ]
if doc[1][field][1] == "CTRLF":
#print("{} is CTRL field {}".format(code[charPos], field))
codeChain = ""
# iteration on the fields to control
for pos in range(len(code)):
target = getDocString(doc)[pos]
if target in doc[1][field][3]:
#print("__field : {} {} {} {}".format(target, pos, field, doc[1][field][3]))
codeChain += code[pos]
#print("chain to control : _{}_".format(codeChain))
ctrlSum = computeControlSum(codeChain)
#print("SUM : {} vs {}".format(code[charPos], ctrlSum))
if code[charPos] == "<":
facult = True
ctrlSumList += [ (field, charPos, ctrlSum, facult) ]
return {
"ctrlSumList" : ctrlSumList
}
def getDocInfos(doc, code):
# get all the types of infos that are in the document doc
infoTypes = [ (doc[1][field][1], limits(doc[0][0] + doc[0][1], field)) for field in doc[1] ]
res = {}
for field in infoTypes:
value = code[ field[1][0] : field[1][1] ].replace("<", " ").strip()
# State code
if field[0] == 'PAYS' or field[0] == 'NAT':
try:
if len(value) == 3 and value[-1] != "<":
res[field[0]] = landcode3[value]
elif len(value) == 3 and value[-1] == "<":
res[field[0]] = landcode2[value[:-1]]
else:
res[field[0]] = landcode2[value]
except KeyError:
res[field[0]] = False
# Dates
elif field[0][1:] == 'DATE':
# size adaptation
if len(value) == 6:
value = "{}/{}/{}".format(value[4:6], value[2:4], value[0:2])
elif len(value) == 4:
value = "{}/{}/{}".format("01", value[2:4], value[0:2])
# date validation
try:
datetime.datetime.strptime(value,"%d/%m/%y")
except ValueError:
#print(value)
if value != "":
res[field[0]] = False
else:
res[field[0]] = value
# Numbers
elif field[0][:-1] == 'NOINT':
try:
res["NO"] += value
except KeyError:
res["NO"] = value
elif field[0] == 'NOINT':
try:
res["NO"] += value
except KeyError:
res["NO"] = value
elif field[0] == 'FACULT':
try:
res["INDIC"] += value
except KeyError:
res["INDIC"] = value
# All other cases
else:
if value != "":
res[field[0]] = value
return res