Upload supplementary files
This commit is contained in:
parent
1b6ed30926
commit
e5dc703cfc
15747
Supplementary_files/0229588-220831081235567.csv
Normal file
15747
Supplementary_files/0229588-220831081235567.csv
Normal file
File diff suppressed because it is too large
Load Diff
35
Supplementary_files/1-Order_species_by_rarety.py
Normal file
35
Supplementary_files/1-Order_species_by_rarety.py
Normal file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/python3.10
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""************************
|
||||
AnkIdentification - Supplementary code 1 : Order species by rarety.
|
||||
Aurélien VALENTIN - from 06/11/2022 to 29/12/2022
|
||||
************************"""
|
||||
# Importation and initialisation
|
||||
import json, sys
|
||||
dict_plants = {}
|
||||
|
||||
# Definition of the progressbar function, from https://gist.github.com/ChesterChowWOV/2b35c551b339adbf459363322aac5b4b
|
||||
def progressbar(it, prefix = "", size = 60, file = sys.stdout):
|
||||
count = len(it)
|
||||
def show(j):
|
||||
x = int(size*j/count)
|
||||
file.write("{}[{}{}] {}/{} {}%\r".format(prefix, "█"*x, "."*(size-x), j, count, round(j / count * 100, 2)))
|
||||
file.flush()
|
||||
show(0)
|
||||
for i, item in enumerate(it):
|
||||
yield item
|
||||
show(i + 1)
|
||||
file.write("\n")
|
||||
file.flush()
|
||||
|
||||
with open("multimedia.txt", encoding="utf-8") as fpi:
|
||||
fpi.readline()
|
||||
for line in progressbar(fpi.readlines(), "Counting", 40):
|
||||
species = line.split("\t")[5].split(" ")[0] + " " + line.split("\t")[5].split(" ")[1]
|
||||
try: dict_plants[species] += 1
|
||||
except: dict_plants[species] = 1
|
||||
|
||||
dict_plants_sorted = dict(sorted(dict_plants.items(), key=lambda item: item[1], reverse=True))
|
||||
|
||||
with open("_dict_species_by_rarety.json", "w") as fpo: json.dump(dict_plants_sorted, fpo)
|
34
Supplementary_files/2-Attributing_families.py
Normal file
34
Supplementary_files/2-Attributing_families.py
Normal file
@ -0,0 +1,34 @@
|
||||
#!/usr/bin/python3.10
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""************************
|
||||
AnkIdentification - Supplementary code 2 : Attributing its family to each species.
|
||||
Aurélien VALENTIN - from 06/11/2022 to 29/12/2022
|
||||
************************"""
|
||||
|
||||
# Importation and initialisation
|
||||
import json, sys
|
||||
dict_classification = {}
|
||||
|
||||
# Definition of the progressbar function, from https://gist.github.com/ChesterChowWOV/2b35c551b339adbf459363322aac5b4b
|
||||
def progressbar(it, prefix = "", size = 60, file = sys.stdout):
|
||||
count = len(it)
|
||||
def show(j):
|
||||
x = int(size*j/count)
|
||||
file.write("{}[{}{}] {}/{} {}%\r".format(prefix, "█"*x, "."*(size-x), j, count, round(j / count * 100, 2)))
|
||||
file.flush()
|
||||
show(0)
|
||||
for i, item in enumerate(it):
|
||||
yield item
|
||||
show(i + 1)
|
||||
file.write("\n")
|
||||
file.flush()
|
||||
|
||||
# Creation of the dictionary and the corresponding json file
|
||||
with open("classification.txt", encoding="utf-8") as classification:
|
||||
classification.readline()
|
||||
for line in progressbar(classification.readlines(), "Computing", 40):
|
||||
if line.split("\t")[4] == "SPECIES":
|
||||
dict_classification[line.split("\t")[3].split(" ")[0] + " " + line.split("\t")[3].split(" ")[1]] = line.split("\t")[7]
|
||||
|
||||
with open("_dict_classification.json", "w") as fpo : json.dump(dict_classification, fpo)
|
42
Supplementary_files/3-1-Get_the_list_of_all_plants.py
Normal file
42
Supplementary_files/3-1-Get_the_list_of_all_plants.py
Normal file
@ -0,0 +1,42 @@
|
||||
#!/usr/bin/python3.10
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""************************
|
||||
AnkIdentification - Supplementary code 3-1 : Creating the list of all plant species.
|
||||
Aurélien VALENTIN - from 06/11/2022 to 29/12/2022
|
||||
************************"""
|
||||
|
||||
# Importation and initialisation
|
||||
import json, sys
|
||||
dict_common_names = {}
|
||||
|
||||
# Definition of the progressbar function, from https://gist.github.com/ChesterChowWOV/2b35c551b339adbf459363322aac5b4b
|
||||
def progressbar(it, prefix="", size=60, file=sys.stdout):
|
||||
count = len(it)
|
||||
def show(j):
|
||||
x = int(size*j/count)
|
||||
file.write("{}[{}{}] {}/{} {}%\r".format(prefix, "█"*x, "."*(size-x), j, count, round(j/count * 100, 2)))
|
||||
file.flush()
|
||||
show(0)
|
||||
for i, item in enumerate(it):
|
||||
yield item
|
||||
show(i+1)
|
||||
file.write("\n")
|
||||
file.flush()
|
||||
|
||||
# Creation of the list
|
||||
with open("0229588-220831081235567.csv", encoding="utf-8") as fpi:
|
||||
fpi.readline()
|
||||
for line in progressbar(fpi.readlines(), "Processing species", 40):
|
||||
try:
|
||||
specie = line.split("\t")[1].split(" ")[0] + " " + line.split("\t")[1].split(" ")[1]
|
||||
dict_common_names[specie] = ""
|
||||
# Looking for another scientific name accepted
|
||||
specie2 = line.split("\t")[3].split(" ")[0] + " " + line.split("\t")[3].split(" ")[1]
|
||||
if specie2 != specie:
|
||||
dict_common_names[specie2] = ""
|
||||
except: pass
|
||||
|
||||
# Generation of the json file
|
||||
with open("_dict_species_raw.json", "w") as fpo : json.dump(dict_common_names, fpo)
|
||||
print(dict_common_names)
|
69
Supplementary_files/3-2-Attributing_vernacular_names.py
Normal file
69
Supplementary_files/3-2-Attributing_vernacular_names.py
Normal file
@ -0,0 +1,69 @@
|
||||
#!/usr/bin/python3.10
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""************************
|
||||
AnkIdentification - Supplementary code 3-2 : Attributing its varnacular name(s) (VN) to each species.
|
||||
Aurélien VALENTIN - from 06/11/2022 to 29/12/2022
|
||||
************************"""
|
||||
|
||||
# Importation and initialisation
|
||||
import xml.etree.ElementTree as ET
|
||||
import sys, json, lxml.etree, lxml.builder
|
||||
with open("_dict_species_raw.json", "r") as fpi: common_names = json.load(fpi)
|
||||
|
||||
# Definition of the progressbar function, from https://gist.github.com/ChesterChowWOV/2b35c551b339adbf459363322aac5b4b
|
||||
def progressbar(it, prefix = "", size = 60, file = sys.stdout):
|
||||
count = len(list(myroot))
|
||||
def show(j):
|
||||
x = int(size*j/count)
|
||||
file.write("{}[{}{}] {}/{} {}%\r".format(prefix, "█"*x, "."*(size-x), j, count, round(j / count * 100, 2)))
|
||||
file.flush()
|
||||
show(0)
|
||||
for i, item in enumerate(it):
|
||||
yield item
|
||||
show(i + 1)
|
||||
file.write("\n")
|
||||
file.flush()
|
||||
|
||||
# Computing of the 7 fragments of the original files (because it is a very huge one)
|
||||
for file_number in range(1, 7):
|
||||
file_number = str(file_number)
|
||||
print("File number ", file_number)
|
||||
|
||||
# Parsing of the xml file
|
||||
mytree = ET.parse(file_number + '.xml')
|
||||
globals()[f"common_names_{file_number}"] = {}
|
||||
myroot = mytree.getroot()
|
||||
E = lxml.builder.ElementMaker()
|
||||
ROOT = E.root
|
||||
DOC = E.doc
|
||||
PAGE = E.page
|
||||
TEXT = E.text
|
||||
TITLE = E.title
|
||||
the_doc = ROOT()
|
||||
|
||||
for page in progressbar(myroot.iter("page"), "Searching plant species", 40):
|
||||
specie = page.find("title").text
|
||||
if specie in common_names.keys():
|
||||
try:
|
||||
# Checking of French names
|
||||
VN = page.find("revision").find("text").text.split("{{VN")[1].split("}}")[0].replace("\n", " ")
|
||||
try: globals()[f"common_names_{file_number}"][specie] = {"fr": VN.split("fr=")[1].split("|")[0].strip("}}").strip(" ")}
|
||||
except:
|
||||
try:
|
||||
# Otherwise, cheking for an English one
|
||||
New_VN = VN.split("en=")[1].split("|")[0].strip("}}").strip(" ")
|
||||
if New_VN != "": globals()[f"common_names_{file_number}"][specie] = {"fr": "en " + New_VN}
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
# Also storing of English vernacular names
|
||||
New_VN = VN.split("en=")[1].split("|")[0].strip("}}").strip(" ")
|
||||
if New_VN != "": globals()[f"common_names_{file_number}"][specie]["en"] = New_VN
|
||||
except: pass
|
||||
except: pass
|
||||
|
||||
# Creation of a json file for each fragments
|
||||
print("Storing")
|
||||
with open("_dict_species_" + file_number + ".json", "w") as fpo : json.dump(globals()[f"common_names_{file_number}"], fpo)
|
||||
print(globals()[f"common_names_{file_number}"])
|
Loading…
Reference in New Issue
Block a user