70 lines
2.9 KiB
Python
70 lines
2.9 KiB
Python
#!/usr/bin/python3.10
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""************************
|
|
AnkIdentification - Supplementary code 3-2 : Attributing its varnacular name(s) (VN) to each species.
|
|
Aurélien VALENTIN - from 06/11/2022 to 29/12/2022
|
|
************************"""
|
|
|
|
# Importation and initialisation
|
|
import xml.etree.ElementTree as ET
|
|
import sys, json, lxml.etree, lxml.builder
|
|
with open("_dict_species_raw.json", "r") as fpi: common_names = json.load(fpi)
|
|
|
|
# Definition of the progressbar function, from https://gist.github.com/ChesterChowWOV/2b35c551b339adbf459363322aac5b4b
|
|
def progressbar(it, prefix = "", size = 60, file = sys.stdout):
|
|
count = len(list(myroot))
|
|
def show(j):
|
|
x = int(size*j/count)
|
|
file.write("{}[{}{}] {}/{} {}%\r".format(prefix, "█"*x, "."*(size-x), j, count, round(j / count * 100, 2)))
|
|
file.flush()
|
|
show(0)
|
|
for i, item in enumerate(it):
|
|
yield item
|
|
show(i + 1)
|
|
file.write("\n")
|
|
file.flush()
|
|
|
|
# Computing of the 7 fragments of the original files (because it is a very huge one)
|
|
for file_number in range(1, 7):
|
|
file_number = str(file_number)
|
|
print("File number ", file_number)
|
|
|
|
# Parsing of the xml file
|
|
mytree = ET.parse(file_number + '.xml')
|
|
globals()[f"common_names_{file_number}"] = {}
|
|
myroot = mytree.getroot()
|
|
E = lxml.builder.ElementMaker()
|
|
ROOT = E.root
|
|
DOC = E.doc
|
|
PAGE = E.page
|
|
TEXT = E.text
|
|
TITLE = E.title
|
|
the_doc = ROOT()
|
|
|
|
for page in progressbar(myroot.iter("page"), "Searching plant species", 40):
|
|
specie = page.find("title").text
|
|
if specie in common_names.keys():
|
|
try:
|
|
# Checking of French names
|
|
VN = page.find("revision").find("text").text.split("{{VN")[1].split("}}")[0].replace("\n", " ")
|
|
try: globals()[f"common_names_{file_number}"][specie] = {"fr": VN.split("fr=")[1].split("|")[0].strip("}}").strip(" ")}
|
|
except:
|
|
try:
|
|
# Otherwise, cheking for an English one
|
|
New_VN = VN.split("en=")[1].split("|")[0].strip("}}").strip(" ")
|
|
if New_VN != "": globals()[f"common_names_{file_number}"][specie] = {"fr": "en " + New_VN}
|
|
except:
|
|
pass
|
|
try:
|
|
# Also storing of English vernacular names
|
|
New_VN = VN.split("en=")[1].split("|")[0].strip("}}").strip(" ")
|
|
if New_VN != "": globals()[f"common_names_{file_number}"][specie]["en"] = New_VN
|
|
except: pass
|
|
except: pass
|
|
|
|
# Creation of a json file for each fragments
|
|
print("Storing")
|
|
with open("_dict_species_" + file_number + ".json", "w") as fpo : json.dump(globals()[f"common_names_{file_number}"], fpo)
|
|
print(globals()[f"common_names_{file_number}"])
|