AnkIdentification/Supplementary_files/1-Order_species_by_rarety.py

35 lines
1.3 KiB
Python

#!/usr/bin/python3.10
# -*- coding: utf-8 -*-
"""************************
AnkIdentification - Supplementary code 1 : Order species by rarety.
Aurélien VALENTIN - from 06/11/2022 to 29/12/2022
************************"""
# Importation and initialisation
import json, sys
dict_plants = {}
# Definition of the progressbar function, from https://gist.github.com/ChesterChowWOV/2b35c551b339adbf459363322aac5b4b
def progressbar(it, prefix = "", size = 60, file = sys.stdout):
count = len(it)
def show(j):
x = int(size*j/count)
file.write("{}[{}{}] {}/{} {}%\r".format(prefix, ""*x, "."*(size-x), j, count, round(j / count * 100, 2)))
file.flush()
show(0)
for i, item in enumerate(it):
yield item
show(i + 1)
file.write("\n")
file.flush()
with open("multimedia.txt", encoding="utf-8") as fpi:
fpi.readline()
for line in progressbar(fpi.readlines(), "Counting", 40):
species = line.split("\t")[5].split(" ")[0] + " " + line.split("\t")[5].split(" ")[1]
try: dict_plants[species] += 1
except: dict_plants[species] = 1
dict_plants_sorted = dict(sorted(dict_plants.items(), key=lambda item: item[1], reverse=True))
with open("_dict_species_by_rarety.json", "w") as fpo: json.dump(dict_plants_sorted, fpo)