# -*- coding: utf8 -*- """ ******************************************************************************** * CNIRevelator * * * * Desc: MRZ data dictionnary for CNIRevelator analyzer and * * functions to analyze these data * * * * Copyright © 2018-2019 Adrien Bourmault (neox95) * * * * This file is part of CNIRevelator. * * * * CNIRevelator is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * any later version. * * * * CNIRevelator is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY*without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with CNIRevelator. If not, see . * ******************************************************************************** """ import re import datetime import logger # logger.py import globs # globs.py import lang # lang.py import critical # critical.py ## SEX CODES sexcode = {'M':'Homme', 'F':'Femme', 'X':'Non spécifié'} ## COUNTRY CODES landcode2 = lang.all[globs.CNIRlang]["LANDCODE2"] landcode3 = lang.all[globs.CNIRlang]["LANDCODE3"] ## DOCUMENTS TYPES P = [ ["11222333333333333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCCCCCCCCDE"], { "1": ["2", "CODE", "P."], "2": ["3", "PAYS", "[A-Z]+"], "3": ["39", "NOM", "([A-Z]|<)+"], "4": ["9", "NO", ".+"], "5": ["1", "CTRL", "[0-9]", "4"], "6": ["3", "NAT", "[A-Z]+"], "7": ["6", "BDATE", "[0-9]+"], "8": ["1", "CTRL", "[0-9]", "7"], "9": ["1", "SEX", "[A-Z]"], "A": ["6", "EDATE", "[0-9]+"], "B": ["1", "CTRL", "[0-9]", "A"], "C": ["14", "FACULT", ".+"], "D": ["1", "CTRLF", "[0-9]", "C"], "E": ["1", "CTRL", "[0-9]", "4578ABCD"] }, lang.all[globs.CNIRlang]["Passeport lisible à la machine"] ] IP = [ ["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"], { "1": ["2", "CODE", "IP"], "2": ["3", "PAYS", "[A-Z]+"], "3": ["9", "NO", ".+"], "4": ["1", "CTRL", "[0-9]", "3"], "5": ["15", "FACULT", ".+"], "6": ["6", "BDATE", "[0-9]+"], "7": ["1", "CTRL", "[0-9]", "6"], "8": ["1", "SEX", "[A-Z]"], "9": ["6", "EDATE", "[0-9]+"], "A": ["1", "CTRL", "[0-9]", "9"], "B": ["3", "NAT", "[A-Z]+"], "C": ["11", "FACULT", ".+"], "D": ["1", "CTRL", "[0-9]", "345679AC"] }, lang.all[globs.CNIRlang]["Carte-passeport"] ] IDEUR = [ ["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"], { "1": ["2", "CODE", "I."], "2": ["3", "PAYS", "[A-Z]+"], "3": ["9", "NO", ".+"], "4": ["1", "CTRL", "[0-9]", "3"], "5": ["15", "FACULT", ".+"], "6": ["6", "BDATE", "[0-9]+"], "7": ["1", "CTRL", "[0-9]", "6"], "8": ["1", "SEX", "[A-Z]"], "9": ["6", "EDATE", "[0-9]+"], "A": ["1", "CTRL", "[0-9]", "9"], "B": ["3", "NAT", "[A-Z]+"], "C": ["11", "FACULT", ".+"], "D": ["1", "CTRL", "[0-9]", "345679AC"] }, lang.all[globs.CNIRlang]["Carte d’identité européenne"] ] TSEUR = [ ["112223333333334555555555555555", "66666678999999ABBBCCCCCCCCCCCD"], { "1": ["2", "CODE", "IR"], "2": ["3", "PAYS", "[A-Z]+"], "3": ["9", "NO", ".+"], "4": ["1", "CTRL", "[0-9]", "3"], "5": ["15", "FACULT", ".+"], "6": ["6", "BDATE", "[0-9]+"], "7": ["1", "CTRL", "[0-9]", "6"], "8": ["1", "SEX", "[A-Z]"], "9": ["6", "EDATE", "[0-9]+"], "A": ["1", "CTRL", "[0-9]", "9"], "B": ["3", "NAT", "[A-Z]+"], "C": ["11", "FACULT", ".+"], "D": ["1", "CTRL", "[0-9]", "345679AC"] }, lang.all[globs.CNIRlang]["Carte de séjour européenne"] ] AC = [ ["112223333333334EEE555555555555", "66666678999999ABBBCCCCCCCCCCCD"], { "1": ["2", "CODE", "AC"], "2": ["3", "PAYS", "[A-Z]+"], "3": ["9", "NO", ".+"], "4": ["1", "CTRL", "[0-9]", "3"], "E": ["3", "INDIC", "[A-Z]{1,2}."], "5": ["12", "FACULT", ".+"], "6": ["6", "BDATE", "[0-9]+ "], "7": ["1", "CTRL", "[0-9]", "6"], "8": ["1", "SEX", "[A-Z]"], "9": ["6", "EDATE", "[0-9]+"], "A": ["1", "CTRL", "[0-9]", "9"], "B": ["3", "NAT", "[A-Z]+"], "C": ["11", "FACULT", ".+"], "D": ["1", "CTRL", "[0-9]","345679AC"] }, lang.all[globs.CNIRlang]["Certificat de membre d'équipage"] ] VA = [ ["11222333333333333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCCCCCCCCCC"], { "1": ["2", "CODE", "V."], "2": ["3", "PAYS", "[A-Z]+"], "3": ["39", "NOM", "([A-Z]|<)+"], "4": ["9", "NO", ".+"], "5": ["1", "CTRL", "[0-9]","4"], "6": ["3", "NAT", "[A-Z]+"], "7": ["6", "BDATE", "[0-9]+"], "8": ["1", "CTRL", "[0-9]", "7"], "9": ["1", "SEX", "[A-Z]"], "A": ["6", "EDATE", "[0-9]+"], "B": ["1", "CTRL", "[0-9]", "A"], "C": ["16", "FACULT", ".+"] }, lang.all[globs.CNIRlang]["Visa de type A"] ] VB = [ ["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCC"], { "1": ["2", "CODE", "V."], "2": ["3", "PAYS", "[A-Z]+"], "3": ["31", "NOM", "([A-Z]|<)+"], "4": ["9", "NO", ".+"], "5": ["1", "CTRL", "[0-9]","4"], "6": ["3", "NAT", "[A-Z]+"], "7": ["6", "BDATE", "[0-9]+"], "8": ["1", "CTRL", "[0-9]", "7"], "9": ["1", "SEX", "[A-Z]"], "A": ["6", "EDATE", "[0-9]+"], "B": ["1", "CTRL", "[0-9]", "A"], "C": ["8", "FACULT", ".+"] }, lang.all[globs.CNIRlang]["Visa de type B"] ] TSF = [ ["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCC"], { "1": ["2", "CODE", "TS"], "2": ["3", "PAYS", "FRA"], "3": ["31", "NOM", "([A-Z]|<)+"], "4": ["9", "NO", ".+"], "5": ["1", "CTRL", "[0-9]","4"], "6": ["3", "NAT", "[A-Z]+"], "7": ["6", "BDATE", "[0-9]+"], "8": ["1", "CTRL", "[0-9]", "7"], "9": ["1", "SEX", "[A-Z]"], "A": ["6", "EDATE", "[0-9]+"], "B": ["1", "CTRL", "[0-9]", "A"], "C": ["8", "FACULT", ".+"] }, lang.all[globs.CNIRlang]["Carte de séjour FR"] ] TDV = [ ["112223333333333333333333333333333333", "444444444566677777789AAAAAABCCCCCCCD"], { "1": ["2", "CODE", "I."], "2": ["3", "PAYS", "[A-Z]+"], "3": ["31", "NOM", "([A-Z]|<)+"], "4": ["9", "NO", ".+"], "5": ["1", "CTRL", "[0-9]", "4"], "6": ["3", "NAT", "[A-Z]+"], "7": ["6", "BDATE", "[0-9]+"], "8": ["1", "CTRL", "[0-9]", "7"], "9": ["1", "SEX", "[A-Z]"], "A": ["6", "EDATE", "[0-9]+"], "B": ["1", "CTRL", "[0-9]", "A"], "C": ["7", "FACULT", ".+"], "D": ["1", "CTRL", "[0-9]", "4578ABC"] }, lang.all[globs.CNIRlang]["Titre d'identité/de voyage"] ] IDFR = [ ["112223333333333333333333333333444444", "555566677777899999999999999AAAAAABCD"], { "1": ["2", "CODE", "ID"], "2": ["3", "PAYS", "FRA"], "3": ["25", "NOM", "([A-Z]|<)+"], "4": ["6", "NOINT", ".+"], "5": ["4", "DDATE", "[0-9]+"], "6": ["3", "NOINT2", "[0-9]+"], "7": ["5", "NOINT3", "[0-9]+"], "8": ["1", "CTRL", "[0-9]", "567"], "9": ["14", "PRENOM", "[A-Z]"], "A": ["6", "BDATE", "[0-9]+"], "B": ["1", "CTRL", "[0-9]", "A"], "C": ["1", "SEX", "[A-Z]"], "D": ["1", "CTRL", "[0-9]", "123456789ABCE"] }, lang.all[globs.CNIRlang]["Pièce d'identité FR"] ] DL = [ ["112223333333334555555666666667", ""], { "1": ["2", "CODE", "D1"], "2": ["3", "PAYS", "[A-Z]+"], "3": ["9", "NO", "[0-9]{2}[A-Z]{2}[0-9]{5}"], "4": ["1", "CTRL", "[0-9]", "123"], "5": ["6", "EDATE", "[0-9]+"], "6": ["8", "NOM", "([A-Z]|<)+"], "7": ["1", "CTRL", "[0-9]", "123456"] }, lang.all[globs.CNIRlang]["Permis de conduire"] ] TYPES = [IDFR, TDV, VB, VA, AC, IDEUR, IP, P, DL, TSF, TSEUR] # longest document MRZ line longest = max([len(x[0][0]) for x in TYPES]) ## THE ROOT OF THIS PROJECT ! def getDocString(doc): return doc[0][0] + doc[0][1] def getFieldLimits(doc, fieldtype): """ This function returns the limit of a given field string id for a given document structure """ L1 = limits(doc[0][0], fieldtype) L2 = limits(doc[0][1], fieldtype) if -1 in L1: return 1, L2 else: return 0, L1 return def limits(line, fieldtype): """ Returns the limit of a given field structure """ a = line.find(fieldtype) b = line.rfind(fieldtype) return (a,b+1) def completeDocField(doc, code, position): """ Completes with '<' the document the field that is located at given position """ field = getDocString(doc)[position] limit = limits(getDocString(doc), field) res = limit[1] - position #print("field : {}, limit : {}, number of char to complete : {}".format(field, limit, res)) return res def docMatch(doc, strs): """ This function calculates a regex match score for a given document and a string couple """ # Global handler logfile = logger.logCur level = 0 nchar = 0 bonus = 0 for i in range(0,2): cursor = 0 #print("Line : {}".format(i)) while True: if cursor > len(doc[0][i]) - 1: break # Getting the type of field on the cursor position fieldtype = doc[0][i][cursor] lim = limits(doc[0][i], fieldtype) # ready for next field cursor = lim[1] # get the current field and isolates it field = doc[0][i][ lim[0]:lim[1] ] fstr = strs[i][ lim[0]:lim[1] ] # Prepare regex compilation regex = re.compile(doc[1][fieldtype][2]) # Test the match matching = regex.match(fstr) # Retrieve the mathing level if matching: level += matching.end() if fieldtype == "1": bonus += 100 nchar += int(doc[1][fieldtype][0]) # Print for debug # print("Field : {}, type = {}, on str : {}".format(field, fieldtype, fstr)) # logfile.printdbg(" REGEX : {}, match : {}".format(regex, matching)) # exit the loop logfile.printdbg("{} level : {}/{} (+{})".format(doc[2], level, nchar, bonus)) return (level, nchar, bonus) def allDocMatch(strs, final=False): """ This functions test all documents types on the lines provided and returns a score for each """ # Global handler logfile = logger.logCur #print(strs) SCORES = [] for doc in TYPES: # Get the score of the document on the strings level, nchar, bonus = docMatch(doc, strs) # Number of characters compatibles + bonus with the doc indication SCORES += [ level + bonus ] # if the len of strings is the same than document, add a bonus # but only if we are in a final situation if final: if len(strs[0] + strs[1]) == nchar: SCORES[-1] += 100 candidate = SCORES.index(max(SCORES)) candidates = [] canditxt = [] # Search the candidates for i in range(len(SCORES)): if SCORES[i] == SCORES[candidate]: candidates += [TYPES[i]] canditxt += [TYPES[i][2]] # Continue searching if len(candidates) < 2: tempRemovedCandidate = SCORES.pop(candidate) if (SCORES.index(max(SCORES)) != candidate) and (max(SCORES) >= tempRemovedCandidate - 20): if SCORES.index(max(SCORES)) < candidate: candidates += [ TYPES[SCORES.index(max(SCORES))] ] else: candidates += [ TYPES[SCORES.index(max(SCORES)) + 1] ] SCORES.insert(candidate, tempRemovedCandidate) # Return the candidates logfile.printdbg("Scores : {}".format(SCORES)) logfile.printdbg("Candidates : {}".format(canditxt)) return candidates def computeControlSum(code): """ This function computes a control sum for the given characters """ resultat = 0 i = -1 facteur = [7, 3, 1] for car in code: if car == '<' or car == '\n': valeur = 0 i += 1 else: if car in '0123456789': valeur = int(car) i += 1 else: if car in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ': valeur = ord(car) - 55 i += 1 else: break resultat += valeur * facteur[(i % 3)] return resultat % 10 def computeAllControlSum(doc, code): """ This function computes all the ctrl sums on a MRZ string and returns all the results it returns the misc infos about the document too """ ctrlSumList = [] facult = False # iteration on each char of the given MRZ for charPos in range(len(code)): # Sanity check if len(getDocString(doc)) <= charPos: break field = getDocString(doc)[charPos] if doc[1][field][1] == "CTRL": #print("{} is CTRL field {}".format(code[charPos], field)) codeChain = "" # iteration on the fields to control for pos in range(len(code)): #print("Len : {}, pos : {}".format(len(getDocString(doc)), pos)) # Sanity check if len(getDocString(doc)) <= pos: break target = getDocString(doc)[pos] if target in doc[1][field][3]: #print("__field : {} {} {} {}".format(target, pos, field, doc[1][field][3])) codeChain += code[pos] #print("chain to control : _{}_".format(codeChain)) ctrlSum = computeControlSum(codeChain) #print("SUM : {} vs {}".format(code[charPos], ctrlSum)) ctrlSumList += [ (field, charPos, ctrlSum, facult) ] if doc[1][field][1] == "CTRLF": #print("{} is CTRL field {}".format(code[charPos], field)) codeChain = "" # iteration on the fields to control for pos in range(len(code)): target = getDocString(doc)[pos] if target in doc[1][field][3]: #print("__field : {} {} {} {}".format(target, pos, field, doc[1][field][3])) codeChain += code[pos] #print("chain to control : _{}_".format(codeChain)) ctrlSum = computeControlSum(codeChain) #print("SUM : {} vs {}".format(code[charPos], ctrlSum)) if code[charPos] == "<": facult = True ctrlSumList += [ (field, charPos, ctrlSum, facult) ] return { "ctrlSumList" : ctrlSumList } def getDocInfos(doc, code): # get all the types of infos that are in the document doc infoTypes = [ (doc[1][field][1], limits(doc[0][0] + doc[0][1], field)) for field in doc[1] ] res = {} # Length of MRZ length = len(code) if length == len(doc[0][0]+doc[0][1]): res["LEN"] = [length, True] else: res["LEN"] = [length, False] for field in infoTypes: value = code[ field[1][0] : field[1][1] ].replace("<", " ").strip() res[field[0]] = [0,0] # State code if field[0] == 'PAYS' or field[0] == 'NAT': try: if len(value) == 3 and value[-1] != "<": res[field[0]] = (landcode3[value], True) elif len(value) == 3 and value[-1] == "<": res[field[0]] = (landcode2[value[:-1]], True) else: res[field[0]] = (landcode2[value], True) except KeyError: res[field[0]] = [value, False] # Dates elif field[0][1:] == 'DATE': # size adaptation if len(value) == 6: value = "{}/{}/{}".format(value[4:6], value[2:4], value[0:2]) elif len(value) == 4: value = "{}/{}/{}".format("01", value[2:4], value[0:2]) # date validation try: datetime.datetime.strptime(value,"%d/%m/%y") except ValueError: #print(value) if value != "": res[field[0]] = [value, False] else: res[field[0]] = [value, True] # Numbers elif field[0][:-1] == 'NOINT': try: res["NO"][0] += value res["NO"][1] = True except KeyError: res["NO"] = [value, True] elif field[0] == 'NOINT': try: res["NO"][0] += value res["NO"][1] = True except KeyError: res["NO"] = [value, True] elif field[0] == 'FACULT': try: res["INDIC"][0] += value res["INDIC"][1] = True except KeyError: res["INDIC"] = [value, True] # Sex elif field[0] == 'SEX': if not value in "MF": res[field[0]] = [value, False] else: res[field[0]] = [value, True] # All other cases else: if value != "": res[field[0]] = [value, True] return res