CNIRevelator/src/mrz.py

1049 lines
28 KiB
Python
Raw Normal View History

2019-07-09 23:01:09 +02:00
"""
********************************************************************************
* CNIRevelator *
* *
* Desc: MRZ data dictionnary for CNIRevelator analyzer and *
* functions to analyze these data *
* *
* Copyright © 2018-2019 Adrien Bourmault (neox95) *
* *
* This file is part of CNIRevelator. *
* *
* CNIRevelator is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* any later version. *
* *
* CNIRevelator is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY*without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with CNIRevelator. If not, see <https:*www.gnu.org/licenses/>. *
********************************************************************************
"""
2019-07-17 17:12:21 +02:00
import re
import logger # logger.py
2019-08-01 16:52:36 +02:00
import re
import datetime
2019-07-17 17:12:21 +02:00
2019-07-09 23:01:09 +02:00
## SEX CODES
sexcode = {'M':'Homme', 'F':'Femme', 'X':'Non spécifié'}
## COUNTRY CODES
2019-07-12 10:57:03 +02:00
landcode2 = {
'AW': 'Aruba',
'AF': 'Afghanistan',
'AO': 'Angola',
'AI': 'Anguilla',
'AL': 'Albanie',
'AD': 'Andorre',
'AE': 'Emirats arabes unis',
'AR': 'Argentine',
'AM': 'Arménie',
'AS': 'Samoa américaines',
'AQ': 'Antarctique',
'TF': 'Terres australes et antarctiques françaises',
'AG': 'Antigua-et-Barbuda',
'AU': 'Australie',
'AT': 'Autriche',
'AZ': 'Azerbaidjan',
'BI': 'Burundi',
'BE': 'Belgique',
'BJ': 'Benin',
'BQ': 'Pays-Bas caribéens',
'BF': 'Burkina Faso',
'BD': 'Bangladesh',
'BG': 'Bulgarie',
'BH': 'Bahrein',
'BS': 'Bahamas',
'BA': 'Bosnie-Herzegovine',
'BL': 'Saint-Barthélemy',
'BY': 'Bielorussie',
'BZ': 'Belize',
'BM': 'Bermudes',
'BO': 'Bolivie',
'BR': 'Brésil',
'BB': 'Barbade',
'BN': 'Brunei',
'BT': 'Bhoutan',
'BW': 'Botswana',
'CF': 'République Centrafricaine',
'CA': 'Canada',
'CC': 'Îles Cocos',
'CH': 'Suisse',
'CL': 'Chili',
'CN': 'Chine',
'CI': "Côte d'Ivoire",
'CM': 'Cameroun',
'CD': 'Congo (République démocratique)',
'CG': 'Congo (République)',
'CK': 'Îles Cook',
'CO': 'Colombie',
'KM': 'Comores',
'CV': 'Cap-Vert',
'CR': 'Costa Rica',
'CU': 'Cuba',
'CW': 'Curaçao',
'CX': 'Île Christmas',
'KY': 'Caimans',
'CY': 'Chypre',
'CZ': 'Tchéquie',
'DE': 'Allemagne',
'DJ': 'Djibouti',
'DM': 'Dominique',
'DK': 'Danemark',
'DO': 'République dominicaine',
'DZ': 'Algérie',
'EC': 'Equateur',
'EG': 'Egypte',
'ER': 'Erythrée',
'EH': 'Sahara occidental',
'ES': 'Espagne',
'EE': 'Estonie',
'ET': 'Ethiopie',
'FI': 'Finlande',
'FJ': 'Fidji',
'FK': 'Îles Malouines',
'FR': 'France',
'FO': 'Féroé',
'FM': 'Micronésie',
'GA': 'Gabon',
'GB': 'Royaume-Uni',
'GE': 'Géorgie',
'GG': 'Guernesey',
'GH': 'Ghana',
'GI': 'Gibraltar',
'GN': 'Guinée',
'GP': 'Guadeloupe',
'GM': 'Gambie',
'GW': 'Guinée-Bissau',
'GQ': 'Guinée équatoriale',
'GR': 'Grèce',
'GD': 'Grenade',
'GL': 'Groenland',
'GT': 'Guatemala',
'GF': 'Guyane',
'GU': 'Guam',
'GY': 'Guyana',
'HK': 'Hong Kong',
'HN': 'Honduras',
'HR': 'Croatie',
'HT': 'Haïti',
'HU': 'Hongrie',
'ID': 'Indonésie',
'IM': 'Île de Man',
'IN': 'Inde',
'IO': "Territoire britannique de l'océan Indien",
'IE': 'Irlande',
'IR': 'Irak',
'IQ': 'Iran',
'IS': 'Islande',
'IL': 'Israël',
'IT': 'Italie',
'JM': 'Jamaïque',
'JE': 'Jersey',
'JO': 'Jordanie',
'JP': 'Japon',
'KZ': 'Kazakhstan',
'KE': 'Kenya',
'KG': 'Kirghizistan',
'KH': 'Cambodge',
'KI': 'Kiribati',
'KN': 'Saint-Christophe-et-Niévès',
'KR': 'Corée du Sud',
'KW': 'Koweït',
'LA': 'Laos',
'LB': 'Liban',
'LR': 'Liberia',
'LY': 'Libye',
'LC': 'Sainte-Lucie',
'LI': 'Liechtenstein',
'LK': 'Sri Lanka',
'LS': 'Lesotho',
'LT': 'Lituanie',
'LU': 'Luxembourg',
'LV': 'Lettonie',
'MO': 'Macao',
'MF': 'Sint-Maarten',
'MA': 'Maroc',
'MC': 'Monaco',
'MD': 'Moldavie',
'MG': 'Madagascar',
'MV': 'Maldives',
'MX': 'Mexique',
'MH': 'Marshall',
'MK': 'Macedoine',
'ML': 'Mali',
'MT': 'Malte',
'MM': 'Birmanie',
'ME': 'Monténégro',
'MN': 'Mongolie',
'MP': 'Îles Mariannes du Nord',
'MZ': 'Mozambique',
'MR': 'Mauritanie',
'MS': 'Montserrat',
'MQ': 'Martinique',
'MU': 'Maurice',
'MW': 'Malawi',
'MY': 'Malaisie',
'YT': 'Mayotte',
'NA': 'Namibie',
'NC': 'Nouvelle-Calédonie',
'NE': 'Niger',
'NF': 'Île Norfolk',
'NG': 'Nigeria',
'NI': 'Nicaragua',
'NU': 'Niue',
'NL': 'Pays-Bas',
'NO': 'Norvège',
'NP': 'Nepal',
'NR': 'Nauru',
'NZ': 'Nouvelle-Zélande',
'OM': 'Oman',
'PK': 'Pakistan',
'PA': 'Panama',
'PN': 'Îles Pitcairn',
'PE': 'Pérou',
'PH': 'Philippines',
'PW': 'Palaos',
'PG': 'Papouasie-Nouvelle-Guinée',
'PL': 'Pologne',
'PR': 'Porto Rico',
'KP': 'Corée du Nord',
'PT': 'Portugal',
'PY': 'Paraguay',
'PS': 'Palestine',
'PF': 'Polynésie française',
'QA': 'Qatar',
'RE': 'Réunion',
'RO': 'Roumanie',
'RU': 'Russie',
'RW': 'Rwanda',
'SA': 'Arabie saoudite',
'SD': 'Soudan',
'SN': 'Sénégal',
'SG': 'Singapour',
'GS': 'Georgie du Sud-et-les iles Sandwich du Sud',
'SH': 'Sainte-Hélène, Ascension et Tristan da Cunha',
'SJ': 'Svalbard et île Jan Mayen',
'SB': 'Salomon',
'SL': 'Sierra Leone',
'SV': 'Salvador',
'SM': 'Saint-Marin',
'SO': 'Somalie',
'PM': 'Saint-Pierre-et-Miquelon',
'RS': 'Serbie',
'SS': 'Soudan du Sud',
'ST': 'Sao Tomé-et-Principe',
'SR': 'Suriname',
'SK': 'Slovaquie',
'SI': 'Slovénie',
'SE': 'Suède',
'SZ': 'eSwatani',
'SX': 'Saint-Martin ',
'SC': 'Seychelles',
'SY': 'Syrie',
'TC': 'Îles Turques-et-Caïques',
'TD': 'Tchad',
'TG': 'Togo',
'TH': 'Thaïlande',
'TJ': 'Tadjikistan',
'TK': 'Tokelau',
'TM': 'Turkmenistan',
'TL': 'Timor oriental',
'TO': 'Tonga',
'TT': 'Trinité-et-Tobago',
'TN': 'Tunisie',
'TR': 'Turquie',
'TV': 'Tuvalu',
'TW': 'Taiwan',
'TZ': 'Tanzanie',
'UG': 'Ouganda',
'UA': 'Ukraine',
'UY': 'Uruguay',
'US': 'Etats-Unis',
'UZ': 'Ouzbékistan',
'VA': 'Saint-Siège (État de la Cité du Vatican)',
'VC': 'Saint-Vincent-et-les-Grenadines',
'VE': 'Venezuela',
'VG': 'Îles Vierges britanniques',
'VI': 'Îles Vierges des États-Unis',
'VN': 'Viêt Nam',
'VU': 'Vanuatu',
'WF': 'Wallis-et-Futuna',
'WS': 'Samoa',
'XK': 'Kosovo',
'YE': 'Yémen',
'ZA': 'Afrique du Sud',
'ZM': 'Zambie',
'ZW': 'Zimbabwe'
}
landcode3 = {
'ABW': 'Aruba',
'AFG': 'Afghanistan',
'AGO': 'Angola',
'AIA': 'Anguilla',
'ALB': 'Albanie',
'AND': 'Andorre',
'ARE': 'Emirats arabes unis',
'ARG': 'Argentine',
'ARM': 'Arménie',
'ASM': 'Samoa américaines',
'ATA': 'Antarctique',
'ATF': 'Terres australes et antarctiques françaises',
'ATG': 'Antigua-et-Barbuda',
'AUS': 'Australie',
'AUT': 'Autriche',
'AZE': 'Azerbaidjan',
'BDI': 'Burundi',
'BEL': 'Belgique',
'BEN': 'Benin',
'BES': 'Pays-Bas caribéens',
'BFA': 'Burkina Faso',
'BGD': 'Bangladesh',
'BGR': 'Bulgarie',
'BHR': 'Bahrein',
'BHS': 'Bahamas',
'BIH': 'Bosnie-Herzegovine',
'BLM': 'Saint-Barthélemy',
'BLR': 'Bielorussie',
'BLZ': 'Belize',
'BMU': 'Bermudes',
'BOL': 'Bolivie',
'BRA': 'Brésil',
'BRB': 'Barbade',
'BRN': 'Brunei',
'BTN': 'Bhoutan',
'BWA': 'Botswana',
'CAF': 'République Centrafricaine',
'CAN': 'Canada',
'CCK': 'Îles Cocos',
'CHE': 'Suisse',
'CHL': 'Chili',
'CHN': 'Chine',
'CIV': "Côte d'Ivoire",
'CMR': 'Cameroun',
'COD': 'Congo (République démocratique)',
'COG': 'Congo (République)',
'COK': 'Îles Cook',
'COL': 'Colombie',
'COM': 'Comores',
'CPV': 'Cap-Vert',
'CRI': 'Costa Rica',
'CUB': 'Cuba',
'CUW': 'Curaçao',
'CXR': 'Île Christmas',
'CYM': 'Caimans',
'CYP': 'Chypre',
'CZE': 'Tchéquie',
'DEU': 'Allemagne',
'DJI': 'Djibouti',
'DMA': 'Dominique',
'DNK': 'Danemark',
'DOM': 'République dominicaine',
'DZA': 'Algérie',
'ECU': 'Equateur',
'EGY': 'Egypte',
'ERI': 'Erythrée',
'ESH': 'Sahara occidental',
'ESP': 'Espagne',
'EST': 'Estonie',
'ETH': 'Ethiopie',
'FIN': 'Finlande',
'FJI': 'Fidji',
'FLK': 'Îles Malouines',
'FRA': 'France',
'FRO': 'Féroé',
'FSM': 'Micronésie',
'GAB': 'Gabon',
'GBR': 'Royaume-Uni',
'GEO': 'Géorgie',
'GGY': 'Guernesey',
'GHA': 'Ghana',
'GIB': 'Gibraltar',
'GIN': 'Guinée',
'GLP': 'Guadeloupe',
'GMB': 'Gambie',
'GNB': 'Guinée-Bissau',
'GNQ': 'Guinée équatoriale',
'GRC': 'Grèce',
'GRD': 'Grenade',
'GRL': 'Groenland',
'GTM': 'Guatemala',
'GUF': 'Guyane',
'GUM': 'Guam',
'GUY': 'Guyana',
'HKG': 'Hong Kong',
'HND': 'Honduras',
'HRV': 'Croatie',
'HTI': 'Haïti',
'HUN': 'Hongrie',
'IDN': 'Indonésie',
'IMN': 'Île de Man',
'IND': 'Inde',
'IOT': "Territoire britannique de l'océan Indien",
'IRL': 'Irlande',
'IRN': 'Irak',
'IRQ': 'Iran',
'ISL': 'Islande',
'ISR': 'Israël',
'ITA': 'Italie',
'JAM': 'Jamaïque',
'JEY': 'Jersey',
'JOR': 'Jordanie',
'JPN': 'Japon',
'KAZ': 'Kazakhstan',
'KEN': 'Kenya',
'KGZ': 'Kirghizistan',
'KHM': 'Cambodge',
'KIR': 'Kiribati',
'KNA': 'Saint-Christophe-et-Niévès',
'KOR': 'Corée du Sud',
'KWT': 'Koweït',
'LAO': 'Laos',
'LBN': 'Liban',
'LBR': 'Liberia',
'LBY': 'Libye',
'LCA': 'Sainte-Lucie',
'LIE': 'Liechtenstein',
'LKA': 'Sri Lanka',
'LSO': 'Lesotho',
'LTU': 'Lituanie',
'LUX': 'Luxembourg',
'LVA': 'Lettonie',
'MAC': 'Macao',
'MAF': 'Sint-Maarten',
'MAR': 'Maroc',
'MCO': 'Monaco',
'MDA': 'Moldavie',
'MDG': 'Madagascar',
'MDV': 'Maldives',
'MEX': 'Mexique',
'MHL': 'Marshall',
'MKD': 'Macedoine',
'MLI': 'Mali',
'MLT': 'Malte',
'MMR': 'Birmanie',
'MNE': 'Monténégro',
'MNG': 'Mongolie',
'MNP': 'Îles Mariannes du Nord',
'MOZ': 'Mozambique',
'MRT': 'Mauritanie',
'MSR': 'Montserrat',
'MTQ': 'Martinique',
'MUS': 'Maurice',
'MWI': 'Malawi',
'MYS': 'Malaisie',
'MYT': 'Mayotte',
'NAM': 'Namibie',
'NCL': 'Nouvelle-Calédonie',
'NER': 'Niger',
'NFK': 'Île Norfolk',
'NGA': 'Nigeria',
'NIC': 'Nicaragua',
'NIU': 'Niue',
'NLD': 'Pays-Bas',
'NOR': 'Norvège',
'NPL': 'Nepal',
'NRU': 'Nauru',
'NZL': 'Nouvelle-Zélande',
'OMN': 'Oman',
'PAK': 'Pakistan',
'PAN': 'Panama',
'PCN': 'Îles Pitcairn',
'PER': 'Pérou',
'PHL': 'Philippines',
'PLW': 'Palaos',
'PNG': 'Papouasie-Nouvelle-Guinée',
'POL': 'Pologne',
'PRI': 'Porto Rico',
'PRK': 'Corée du Nord',
'PRT': 'Portugal',
'PRY': 'Paraguay',
'PSE': 'Palestine',
'PYF': 'Polynésie française',
'QAT': 'Qatar',
'REU': 'Réunion',
'ROU': 'Roumanie',
'RUS': 'Russie',
'RWA': 'Rwanda',
'SAU': 'Arabie saoudite',
'SDN': 'Soudan',
'SEN': 'Sénégal',
'SGP': 'Singapour',
'SGS': 'Georgie du Sud-et-les iles Sandwich du Sud',
'SHN': 'Sainte-Hélène, Ascension et Tristan da Cunha',
'SJM': 'Svalbard et île Jan Mayen',
'SLB': 'Salomon',
'SLE': 'Sierra Leone',
'SLV': 'Salvador',
'SMR': 'Saint-Marin',
'SOM': 'Somalie',
'SPM': 'Saint-Pierre-et-Miquelon',
'SRB': 'Serbie',
'SSD': 'Soudan du Sud',
'STP': 'Sao Tomé-et-Principe',
'SUR': 'Suriname',
'SVK': 'Slovaquie',
'SVN': 'Slovénie',
'SWE': 'Suède',
'SWZ': 'eSwatani',
'SXM': 'Saint-Martin ',
'SYC': 'Seychelles',
'SYR': 'Syrie',
'TCA': 'Îles Turques-et-Caïques',
'TCD': 'Tchad',
'TGO': 'Togo',
'THA': 'Thaïlande',
'TJK': 'Tadjikistan',
'TKL': 'Tokelau',
'TKM': 'Turkmenistan',
'TLS': 'Timor oriental',
'TON': 'Tonga',
'TTO': 'Trinité-et-Tobago',
'TUN': 'Tunisie',
'TUR': 'Turquie',
'TUV': 'Tuvalu',
'TWN': 'Taiwan',
'TZA': 'Tanzanie',
'UGA': 'Ouganda',
'UKR': 'Ukraine',
'URY': 'Uruguay',
'USA': 'Etats-Unis',
'UZB': 'Ouzbékistan',
'VAT': 'Saint-Siège (État de la Cité du Vatican)',
'VCT': 'Saint-Vincent-et-les-Grenadines',
'VEN': 'Venezuela',
'VGB': 'Îles Vierges britanniques',
'VIR': 'Îles Vierges des États-Unis',
'VNM': 'Viêt Nam',
'VUT': 'Vanuatu',
'WLF': 'Wallis-et-Futuna',
'WSM': 'Samoa',
'XKX': 'Kosovo',
'YEM': 'Yémen',
'ZAF': 'Afrique du Sud',
'ZMB': 'Zambie',
'ZWE': 'Zimbabwe',
'NTZ': 'Zone neutre',
'UNO': 'Fonctionnaire des Nations Unies',
'UNA': "Fonctionnaire d'une organisation affiliée aux Nations Unies",
'UNK': 'Représentant des Nations Unies au Kosovo',
'XXA': 'Apatride Convention 1954',
'XXB': 'Réfugié Convention 1954',
'XXC': 'Réfugié autre',
'XXX': 'Résident Légal de Nationalité Inconnue',
'D': 'Allemagne',
'EUE': 'Union Européenne',
'GBD': "Citoyen Britannique d'Outre-mer (BOTC)",
'GBN': 'British National (Overseas)',
'GBO': 'British Overseas Citizen',
'GBP': 'British Protected Person',
'GBS': 'British Subject',
'XBA': 'Banque Africaine de Développement',
'XIM': "Banque Africaine d'ExportImport",
'XCC': 'Caribbean Community or one of its emissaries',
'XCO': 'Common Market for Eastern and Southern Africa',
'XEC': 'Economic Community of West African States',
'XPO': 'International Criminal Police Organization',
'XOM': 'Sovereign Military Order of Malta',
'RKS': 'Kosovo',
'WSA': 'World Service Authority World Passport'
}
2019-07-09 23:01:09 +02:00
## DOCUMENTS TYPES
2019-07-17 17:12:21 +02:00
2019-07-09 23:01:09 +02:00
P = [
2019-07-17 17:12:21 +02:00
["11222333333333333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCCCCCCCCDE"],
2019-07-09 23:01:09 +02:00
{
2019-07-17 17:12:21 +02:00
"1": ["2", "CODE", "P."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["39", "NOM", "([A-Z]|<)+"],
"4": ["9", "NO", ".+"],
"5": ["1", "CTRL", "[0-9]", "4"],
"6": ["3", "NAT", "[A-Z]+"],
"7": ["6", "BDATE", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "7"],
"9": ["1", "SEX", "[A-Z]"],
"A": ["6", "EDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["14", "FACULT", ".+"],
"D": ["1", "CTRLF", "[0-9]", "C"],
"E": ["1", "CTRL", "[0-9]", "4578ABCD"]
2019-07-09 23:01:09 +02:00
},
"Passeport"
]
IP = [
2019-07-17 17:12:21 +02:00
["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
2019-07-09 23:01:09 +02:00
{
2019-07-17 17:12:21 +02:00
"1": ["2", "CODE", "IP"],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["9", "NO", ".+"],
"4": ["1", "CTRL", "[0-9]", "3"],
"5": ["15", "FACULT", ".+"],
"6": ["6", "BDATE", "[0-9]+"],
"7": ["1", "CTRL", "[0-9]", "6"],
"8": ["1", "SEX", "[A-Z]"],
"9": ["6", "EDATE", "[0-9]+"],
"A": ["1", "CTRL", "[0-9]", "9"],
"B": ["3", "NAT", "[A-Z]+"],
"C": ["11", "FACULT", ".+"],
"D": ["1", "CTRL", "[0-9]", "345679AC"]
2019-07-09 23:01:09 +02:00
},
"Carte-passeport"
]
I_ = [
2019-07-17 17:12:21 +02:00
["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
2019-07-09 23:01:09 +02:00
{
2019-07-17 17:12:21 +02:00
"1": ["2", "CODE", "I."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["9", "NO", ".+"],
"4": ["1", "CTRL", "[0-9]", "3"],
"5": ["15", "FACULT", ".+"],
"6": ["6", "BDATE", "[0-9]+"],
"7": ["1", "CTRL", "[0-9]", "6"],
"8": ["1", "SEX", "[A-Z]"],
"9": ["6", "EDATE", "[0-9]+"],
"A": ["1", "CTRL", "[0-9]", "9"],
"B": ["3", "NAT", "[A-Z]+"],
"C": ["11", "FACULT", ".+"],
"D": ["1", "CTRL", "[0-9]", "345679AC"]
2019-07-09 23:01:09 +02:00
},
"Titre d'identité/de voyage"
]
AC = [
2019-07-17 17:12:21 +02:00
["112223333333334EEE555555555555", "66666678999999ABBBCCCCCCCCCCCD"],
2019-07-09 23:01:09 +02:00
{
2019-07-17 17:12:21 +02:00
"1": ["2", "CODE", "AC"],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["9", "NO", ".+"],
"4": ["1", "CTRL", "[0-9]", "3"],
"E": ["3", "INDIC", "[A-Z]{1,2}."],
"5": ["12", "FACULT", ".+"],
"6": ["6", "BDATE", "[0-9]+ "],
"7": ["1", "CTRL", "[0-9]", "6"],
"8": ["1", "SEX", "[A-Z]"],
"9": ["6", "EDATE", "[0-9]+"],
"A": ["1", "CTRL", "[0-9]", "9"],
"B": ["3", "NAT", "[A-Z]+"],
"C": ["11", "FACULT", ".+"],
"D": ["1", "CTRL", "[0-9]","345679AC"]
2019-07-09 23:01:09 +02:00
},
"Certificat de membre d'équipage"
]
2019-07-17 17:12:21 +02:00
## XXXXXXXXXXX
2019-07-19 17:17:30 +02:00
# VB = [
2019-07-17 17:12:21 +02:00
# ["11222333333333333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCCCCCCCCCDE"],
# {
# "1": ["2", "CODE", "V."],
# "2": ["3", "PAYS", "[A-Z]+"],
# "3": ["39", "NOM", "[A-Z]+"],
# "4": ["9", "NO", ".+"],
# "5": ["1", "CTRL", "[0-9]","4"],
# "6": ["3", "NAT", "[A-Z]+"],
# "7": ["6", "BDATE", "[0-9]+"],
# "8": ["1", "CTRL", "[0-9]", "7"],
# "9": ["1", "SEX", "[A-Z]"],
# "A": ["6", "EDATE", "[0-9]+"],
# "B": ["1", "CTRL", "[0-9]", "A"],
# "C": ["14", "FACULT", ".+"]
# },
2019-07-19 17:17:30 +02:00
# "Visa de type B"
2019-07-17 17:12:21 +02:00
# ]
2019-07-09 23:01:09 +02:00
2019-07-19 17:17:30 +02:00
VA = [
2019-07-17 17:12:21 +02:00
["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCC"],
2019-07-09 23:01:09 +02:00
{
2019-07-17 17:12:21 +02:00
"1": ["2", "CODE", "V."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["31", "NOM", "([A-Z]|<)+"],
"4": ["9", "NO", ".+"],
"5": ["1", "CTRL", "[0-9]","4"],
"6": ["3", "NAT", "[A-Z]+"],
"7": ["6", "BDATE", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "7"],
"9": ["1", "SEX", "[A-Z]"],
"A": ["6", "EDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["8", "FACULT", ".+"]
2019-07-09 23:01:09 +02:00
},
2019-07-19 17:17:30 +02:00
"Visa de type A"
]
TSF = [
["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCC"],
{
"1": ["2", "CODE", "TS"],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["31", "NOM", "([A-Z]|<)+"],
"4": ["9", "NO", ".+"],
"5": ["1", "CTRL", "[0-9]","4"],
"6": ["3", "NAT", "[A-Z]+"],
"7": ["6", "BDATE", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "7"],
"9": ["1", "SEX", "[A-Z]"],
"A": ["6", "EDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["8", "FACULT", ".+"]
},
"Titre de séjour"
2019-07-09 23:01:09 +02:00
]
I__ = [
2019-07-17 17:12:21 +02:00
["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCD"],
2019-07-09 23:01:09 +02:00
{
2019-07-17 17:12:21 +02:00
"1": ["2", "CODE", "I."],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["31", "NOM", "([A-Z]|<)+"],
"4": ["9", "NO", ".+"],
"5": ["1", "CTRL", "[0-9]", "4"],
"6": ["3", "NAT", "[A-Z]+"],
"7": ["6", "BDATE", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "7"],
"9": ["1", "SEX", "[A-Z]"],
"A": ["6", "EDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["7", "FACULT", ".+"],
"D": ["1", "CTRL", "[0-9]", "4578ABC"]
2019-07-09 23:01:09 +02:00
},
"Pièce d'identité/de voyage"
]
2019-07-17 17:12:21 +02:00
IDFR = [
2019-08-01 16:52:36 +02:00
["112223333333333333333333333333444444", "555566677777899999999999999AAAAAABCD"],
2019-07-09 23:01:09 +02:00
{
2019-07-17 17:12:21 +02:00
"1": ["2", "CODE", "ID"],
"2": ["3", "PAYS", "FRA"],
"3": ["25", "NOM", "([A-Z]|<)+"],
"4": ["6", "NOINT", ".+"],
"5": ["4", "DDATE", "[0-9]+"],
"6": ["3", "NOINT2", "[0-9]+"],
"7": ["5", "NOINT3", "[0-9]+"],
"8": ["1", "CTRL", "[0-9]", "567"],
"9": ["14", "PRENOM", "[A-Z]"],
"A": ["6", "BDATE", "[0-9]+"],
"B": ["1", "CTRL", "[0-9]", "A"],
"C": ["1", "SEX", "[A-Z]"],
"D": ["1", "CTRL", "[0-9]", "123456789ABCE"]
2019-07-09 23:01:09 +02:00
},
"Pièce d'identité FR"
]
DL = [
2019-07-17 17:12:21 +02:00
["112223333333334555555666666667", ""],
2019-07-09 23:01:09 +02:00
{
2019-07-17 17:12:21 +02:00
"1": ["2", "CODE", "D1"],
"2": ["3", "PAYS", "[A-Z]+"],
"3": ["9", "NO", "[0-9]{2}[A-Z]{2}[0-9]{5}"],
"4": ["1", "CTRL", "[0-9]", "123"],
"5": ["6", "EDATE", "[0-9]+"],
"6": ["8", "NOM", "([A-Z]|<)+"],
"7": ["1", "CTRL", "[0-9]", "123456"]
2019-07-09 23:01:09 +02:00
},
"Permis de conduire"
]
2019-07-17 17:12:21 +02:00
#TYPES = [ID, I__, VB, VA, AC, I_, IP, P, DL]
2019-07-19 17:17:30 +02:00
TYPES = [IDFR, I__, VA, AC, I_, IP, P, DL, TSF]
2019-07-09 23:01:09 +02:00
2019-07-18 15:37:58 +02:00
# longest document MRZ line
longest = max([len(x[0][0]) for x in TYPES])
2019-07-09 23:01:09 +02:00
## THE ROOT OF THIS PROJECT !
2019-07-17 17:12:21 +02:00
def getDocString(doc):
return doc[0][0] + doc[0][1]
def getFieldLimits(doc, fieldtype):
"""
This function returns the limit of a given field string id for a given document structure
"""
L1 = limits(doc[0][0], fieldtype)
L2 = limits(doc[0][1], fieldtype)
if -1 in L1:
return 1, L2
else:
return 0, L1
return
2019-07-17 17:12:21 +02:00
def limits(line, fieldtype):
"""
Returns the limit of a given field structure
"""
2019-07-17 17:12:21 +02:00
a = line.find(fieldtype)
b = line.rfind(fieldtype)
return (a,b+1)
def completeDocField(doc, code, position):
"""
Completes with '<' the document the field that is located at given position
"""
field = getDocString(doc)[position]
limit = limits(getDocString(doc), field)
res = limit[1] - position
#print("field : {}, limit : {}, number of char to complete : {}".format(field, limit, res))
return res
2019-07-17 17:12:21 +02:00
def docMatch(doc, strs):
"""
This function calculates a regex match score for a given document and a string couple
"""
2019-07-17 17:12:21 +02:00
# Global handler
logfile = logger.logCur
level = 0
nchar = 0
bonus = 0
for i in range(0,2):
cursor = 0
#print("Line : {}".format(i))
while True:
if cursor > len(doc[0][i]) - 1:
break
# Getting the type of field on the cursor position
fieldtype = doc[0][i][cursor]
lim = limits(doc[0][i], fieldtype)
# ready for next field
cursor = lim[1]
# get the current field and isolates it
field = doc[0][i][ lim[0]:lim[1] ]
fstr = strs[i][ lim[0]:lim[1] ]
# Prepare regex compilation
regex = re.compile(doc[1][fieldtype][2])
# Test the match
matching = regex.match(fstr)
# Retrieve the mathing level
if matching:
level += matching.end()
if fieldtype == "1":
bonus += 100
nchar += int(doc[1][fieldtype][0])
# Print for debug
2019-07-19 17:17:30 +02:00
# print("Field : {}, type = {}, on str : {}".format(field, fieldtype, fstr))
# logfile.printdbg(" REGEX : {}, match : {}".format(regex, matching))
2019-07-17 17:12:21 +02:00
# exit the loop
logfile.printdbg("{} level : {}/{} (+{})".format(doc[2], level, nchar, bonus))
return (level, nchar, bonus)
2019-07-18 15:37:58 +02:00
def allDocMatch(strs, final=False):
"""
This functions test all documents types on the lines provided and returns a score for each
"""
2019-07-17 17:12:21 +02:00
# Global handler
logfile = logger.logCur
2019-07-19 17:17:30 +02:00
#print(strs)
2019-07-18 15:37:58 +02:00
2019-07-17 17:12:21 +02:00
SCORES = []
for doc in TYPES:
# Get the score of the document on the strings
level, nchar, bonus = docMatch(doc, strs)
# Number of characters compatibles + bonus with the doc indication
SCORES += [ level + bonus ]
# if the len of strings is the same than document, add a bonus
# but only if we are in a final situation
if final:
if len(strs[0] + strs[1]) == nchar:
SCORES[-1] += 100
candidate = SCORES.index(max(SCORES))
candidates = []
canditxt = []
# Search the candidates
for i in range(len(SCORES)):
if SCORES[i] == SCORES[candidate]:
candidates += [TYPES[i]]
canditxt += [TYPES[i][2]]
# Return the candidates
2019-07-19 17:17:30 +02:00
#logfile.printdbg("Scores : {}".format(SCORES))
#logfile.printdbg("Candidates : {}".format(canditxt))
2019-07-17 17:12:21 +02:00
return candidates
2019-07-19 17:17:30 +02:00
def computeControlSum(code):
2019-07-09 23:01:09 +02:00
"""
2019-07-17 17:12:21 +02:00
This function computes a control sum for the given characters
2019-07-09 23:01:09 +02:00
"""
resultat = 0
i = -1
facteur = [7, 3, 1]
for car in code:
2019-07-17 17:12:21 +02:00
if car == '<' or car == '\n':
2019-07-09 23:01:09 +02:00
valeur = 0
i += 1
else:
if car in '0123456789':
valeur = int(car)
i += 1
else:
if car in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
valeur = ord(car) - 55
i += 1
else:
break
resultat += valeur * facteur[(i % 3)]
return resultat % 10
def computeAllControlSum(doc, code):
"""
This function computes all the ctrl sums on a MRZ string and returns all the results
2019-08-01 16:52:36 +02:00
it returns the misc infos about the document too
"""
ctrlSumList = []
2019-08-01 16:52:36 +02:00
facult = False
# iteration on each char of the given MRZ
for charPos in range(len(code)):
field = getDocString(doc)[charPos]
if doc[1][field][1] == "CTRL":
#print("{} is CTRL field {}".format(code[charPos], field))
codeChain = ""
# iteration on the fields to control
for pos in range(len(code)):
target = getDocString(doc)[pos]
if target in doc[1][field][3]:
#print("__field : {} {} {} {}".format(target, pos, field, doc[1][field][3]))
codeChain += code[pos]
#print("chain to control : _{}_".format(codeChain))
ctrlSum = computeControlSum(codeChain)
#print("SUM : {} vs {}".format(code[charPos], ctrlSum))
2019-08-01 16:52:36 +02:00
ctrlSumList += [ (field, charPos, ctrlSum, facult) ]
if doc[1][field][1] == "CTRLF":
#print("{} is CTRL field {}".format(code[charPos], field))
codeChain = ""
# iteration on the fields to control
for pos in range(len(code)):
target = getDocString(doc)[pos]
if target in doc[1][field][3]:
#print("__field : {} {} {} {}".format(target, pos, field, doc[1][field][3]))
codeChain += code[pos]
#print("chain to control : _{}_".format(codeChain))
ctrlSum = computeControlSum(codeChain)
#print("SUM : {} vs {}".format(code[charPos], ctrlSum))
if code[charPos] == "<":
facult = True
ctrlSumList += [ (field, charPos, ctrlSum, facult) ]
return {
"ctrlSumList" : ctrlSumList
}
2019-08-01 16:52:36 +02:00
def getDocInfos(doc, code):
# get all the types of infos that are in the document doc
infoTypes = [ (doc[1][field][1], limits(doc[0][0] + doc[0][1], field)) for field in doc[1] ]
2019-08-01 16:52:36 +02:00
res = {}
2019-08-01 16:52:36 +02:00
for field in infoTypes:
2019-08-01 16:52:36 +02:00
value = code[ field[1][0] : field[1][1] ]
# State code
if field[0] == 'PAYS' or field[0] == 'NAT':
try:
if len(value) == 3 and value[-1] != "<":
res[field[0]] = landcode3[value]
elif len(value) == 3 and value[-1] == "<":
res[field[0]] = landcode2[value[:-1]]
else:
res[field[0]] = landcode2[value]
except KeyError:
res[field[0]] = False
# Dates
elif field[0][1:] == 'DATE':
# size adaptation
if len(value) == 6:
value = "{}/{}/{}".format(value[4:6], value[2:4], value[0:2])
elif len(value) == 4:
value = "{}/{}/{}".format("01", value[2:4], value[0:2])
# date validation
try:
datetime.datetime.strptime(value,"%d/%m/%y")
except ValueError:
print(value)
if value != "":
res[field[0]] = False
else:
res[field[0]] = value
# Numbers
elif field[0][:-1] == 'NOINT':
try:
res["NO"] += value
except KeyError:
res["NO"] = value
elif field[0] == 'NOINT':
try:
res["NO"] += value
except KeyError:
res["NO"] = value
elif field[0] == 'FACULT':
res["INDIC"] += value
# All other cases
else:
if value != "":
res[field[0]] = value.replace("<", " ").strip()
return res