email report in eml files

This commit is contained in:
Tykayn 2024-06-11 09:50:36 +02:00 committed by tykayn
parent 37a86d4297
commit 1161ce6510
3 changed files with 493 additions and 0 deletions

View File

@ -0,0 +1,19 @@
# Filtrage d'emails .eml
Permet d'exporter une liste d'emails contenant un certain terme de recherche et en excluant un autre.
Ceci permet de faire un rapport html pour un suivi d'évènements ou de projet.
Paquets python requis:
```bash
pip install pandas jinja2 shutil email
```
Modifier le contenu de index.py, pour changer les critères de filtres, puis lancer la commande.
```bash
python index.py
```
Ceci lit les fichiers eml du dossier donné en configuration, puis définit des variables d'expéditeur, sujet, destinataire, et les envoie au template qui utilise jinga2 pour rendre un fichier html du nom des filtres donnés.
Les emails concernés par le filtre sont copiés dans un dossier ici.

View File

@ -0,0 +1,275 @@
<!DOCTYPE html>
<html lang="fr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Emails Filtrés </title>
<style>
/* ----------------------------- */
/* ==reset */
/* ----------------------------- */
/* base font-size corresponds to 10px and is adapted to rem unit */
html {
font-size: 62.5%;
}
body {
background-color: #fff;
color: #000;
font-family: "Century Gothic", helvetica, arial, sans-serif;
font-size: 1.4em; /* equiv 14px */
line-height: 1.5; /* adapt to your design */
}
/* font-sizing for content */
/* preserve vertical-rythm, thanks to http://soqr.fr/vertical-rhythm/ */
p,
ul,
ol,
dl,
blockquote,
pre,
td,
th,
label,
textarea,
caption,
details,
figure,
hgroup {
font-size: 1em; /* equiv 14px */
line-height: 1.5;
margin: 1.5em 0 0;
}
h1, .h1-like {
font-size: 1.8571em; /* equiv 26px */
font-weight: normal;
line-height: 1.6154em;
margin: .8077em 0 0 0;
}
h2, .h2-like {
font-size: 1.7143em; /* equiv 24px */
font-weight: normal;
line-height: 1.75em;
margin: .875em 0 0 0;
}
h3, .h3-like {
font-size: 1.5714em; /* equiv 22px */
font-weight: normal;
line-height: 1.909em;
margin: .9545em 0 0 0;
}
h4, .h4-like {
font-size: 1.4286em; /* equiv 20px */
font-weight: normal;
line-height: 1.05em;
margin: 1.05em 0 0 0;
}
h5, .h5-like {
font-size: 1.2857em; /* equiv 18px */
font-weight: normal;
line-height: 1.1667em;
margin: 1.1667em 0 0 0;
}
h6, .h6-like {
font-size: 1.1429em; /* equiv 16px */
font-weight: normal;
line-height: 1.3125em;
margin: 1.3125em 0 0 0;
}
/* alternate font-sizing */
.smaller {
font-size: .7143em; /* equiv 10px */
line-height: 2.1em;
}
.small {
font-size: .8571em; /* equiv 12px */
line-height: 1.75em;
}
.big {
font-size: 1.1429em; /* equiv 16px */
line-height: 1.3125em;
}
.bigger {
font-size: 1.2857em; /* equiv 18px */
line-height: 1.1667em;
}
.biggest {
font-size: 1.4286em; /* equiv 20px */
line-height: 1.05em;
}
/* soft reset */
html,
body,
textarea,
figure,
label {
margin: 0;
padding: 0;
}
ul,
ol {
padding-left: 2em;
}
code,
pre,
samp {
white-space: pre-wrap;
font-family: consolas, "DejaVu Sans Mono", courier, monospace;
}
code {
line-height: 1em;
}
table {
margin-bottom: 1.5em;
}
/* avoid top margins on first content element */
p:first-child,
ul:first-child,
ol:first-child,
dl:first-child,
blockquote:first-child,
pre:first-child,
h1:first-child,
h2:first-child,
h3:first-child,
h4:first-child,
h5:first-child,
h6:first-child {
margin-top: 0;
}
/* avoid margins on nested elements */
li p,
li ul,
li ol {
margin-top: 0;
margin-bottom: 0;
}
/* HTML5 tags */
article, aside, details, figcaption, figure, footer, header, hgroup, nav, section {
display: block;
}
/* max values */
img, table, td, blockquote, code, pre, textarea, input, video {
max-width: 100%;
}
/* you shall not pass */
div, textarea, table, td, th, code, pre, samp {
word-wrap: break-word;
-webkit-hyphens: auto;
-moz-hyphens: auto;
-ms-hyphens: auto;
-o-hyphens: auto;
hyphens: auto;
}
/* pictures */
img {
width: auto;
height: auto;
vertical-align: middle;
}
a img {
border: 0;
}
/* scripts */
body > script {
display: none !important;
}
/* skip-links */
.skip-links {
position: absolute;
}
.skip-links a {
position: absolute;
left: -9999px;
padding: 0.5em;
background: #000;
color: #fff;
text-decoration: none;
}
.skip-links a:focus {
position: static;
}
body {
margin: 4rem auto !important;
padding: 2rem;
max-width: 80ch;
}
.email {
margin-bottom: 20px;
padding: 2em 4em;
border: 1px solid #ccc;
}
strong {
font-weight: normal;
}
.email-body-content {
margin-top: 1rem;
margin-left: 2rem;
font-size: 1.25rem;
}
.email-header {
font-weight: bold;
font-size: 0.8rem;
}
table, tbody, thead, .protonmail_quote, .gmail_signature, .gmail_quote, .gmail *, blockquote blockquote, .email blockquote {
display: none !important;
}
</style>
</head>
<body>
<h1>Emails Filtrés </h1>
<p>Export d'emails</p>
{% for email in emails %}
<div class="email">
<div class="email-header">N°: {{ email.count }}</div>
<div class="email-header">Date: {{ email.date }}</div>
<!-- <div class="email-header">Jours depuis le premier email: {{ email.days_since_ref_date }} j</div>-->
<div class="email-header">Sujet: {{ email.subject }}</div>
<div class="email-header">De: {{ email.from|escape}}</div>
<div class="email-header">À: {{ email.to|escape}}</div>
<div class="email-body">
<!-- Message:-->
<div class="email-body-content">
{{ email.body }}
</div>
</div>
</div>
{% endfor %}
</body>
</html>

View File

@ -0,0 +1,199 @@
import os
import email
import pandas as pd
from jinja2 import Environment, FileSystemLoader
import shutil
from html import unescape
import os
import email
import pandas as pd
from jinja2 import Environment, FileSystemLoader
# dossier_eml = "/home/tykayn/Téléchargements/expot_emails/"
dossier_eml = "./mon_filtre_selection/"
# Définir le mot à trouver pour retenir les emails
mot_filtre = "bidule"
mot_exclure = "riendutout"
# Définir un dictionnaire pour remplacer les caractères Quoted-Printable
quoted_printable_replace = {
"3D&quot;": "",
"=C2=A0": "&nbsp;",
"\"3D=": "",
"=C5=93": "œ",
"=E2=8B=85": ":",
"=E2=80=93": "",
"=E2=80=99": "'",
"=C3= =A9": "é",
"=C3=A0": "à",
"=C3=A1": "á",
"=C3=A2": "â",
"=C3=A3": "ã",
"=C3=A4": "ä",
"=C3=A5": "å",
"=C3=A6": "æ",
"=C3=A7": "ç",
"=C3=A8": "è",
"=C3=A9": "é",
"=C3=AA": "ê",
"=C3=AB": "ë",
"=C3=AC": "ì",
"=C3=AD": "í",
"=C3=AE": "î",
"=C3=AF": "ï",
"=C3=B0": "ð",
"=C3=B1": "ñ",
"=C3=B2": "ò",
"=C3=B3": "ó",
"=C3=B4": "ô",
"=C3=B5": "õ",
"=C3=B6": "ö",
"=C3=B7": "÷",
"=C3=B8": "ø",
"=C3=B9": "ù",
"=C3=BA": "ú",
"=C3=BB": "û",
"=C3=BC": "ü",
"=C3=BF": "ý",
"=C3=91": "ñ",
"=C3=96": "Ö",
"=C3=9C": "Ü",
"=C3=80": "À",
"=C3=81": "Á",
"=C3=82": "Â",
"=C3=83": "Ã",
"=C3=84": "Ä",
"=C3=85": "Å",
"=C3=86": "Æ",
"=C3=87": "Ç",
"=C3=88": "È",
"=C3=89": "É",
"=C3=8A": "Ê",
"=C3=8B": "Ë",
"=C3=8C": "Ì",
"=C3=8D": "Í",
"=C3=8E": "Î",
"=C3=8F": "Ï",
"=C3=90": "Ð",
"=C3=91": "Ñ",
"=C3=92": "Ò",
"=C3=93": "Ó",
"=C3=94": "Ô",
"=C3=95": "Õ",
"=C3=96": "Ö",
"=C3=97": "×",
"=C3=98": "Ø",
"=C3=99": "Ù",
"=C3=9A": "Ú",
"=C3=9B": "Û",
"=C3=9C": "Ü",
"=C3=9D": "Ý",
"=C3=9F": "ß",
"= ;": "",
"=\n": "",
"=\r": "",
'3D"': "",
'=3D"': "",
'=?utf-8?q?': "",
'?=': "",
}
def clean_text(my_text):
for k, v in quoted_printable_replace.items():
my_text = my_text.replace(k, v)
return my_text
# Définir le répertoire contenant les fichiers .eml
dossier_emails = dossier_eml
# Créer une liste pour stocker les emails filtrés
emails_filtres = []
count_emails=0
limit=2000
# Créer le nom du dossier pour les emails sélectionnés
output_folder = f"mon_filtre_selection"
os.makedirs(output_folder, exist_ok=True)
# Parcourir les fichiers .eml dans le dossier
for fichier in os.listdir(dossier_emails):
if fichier.endswith(".eml"):
chemin_fichier = os.path.join(dossier_emails, fichier)
with open(chemin_fichier, "r", encoding="utf-8") as f:
message = email.message_from_file(f)
# Vérifier si le mot filtre est présent et si le mot à exclure n'est pas présent dans l'email
if any(mot_filtre in str(part.get_payload()) for part in message.walk()) and not any(
mot_exclure.lower() in str(payload).lower()
for part in message.walk()
for payload in (part.get_payload() if isinstance(part.get_payload(), list) else [part.get_payload()])
) and count_emails <= limit and message["From"]:
count_emails+=1
print('.',count_emails)
output_file = os.path.join(output_folder, fichier)
# Copier le fichier .eml dans le dossier de sortie
if not os.path.exists(output_file):
shutil.copy(chemin_fichier, output_folder)
emails_filtres.append({
"id": count_emails,
"subject": clean_text(message["Subject"]),
"from": message["From"],
# "to": message["To"].replace("<", "<").replace(">", "&gt;"),
"to": message["To"],
"date": message["Date"],
"body": unescape( clean_text(email.message_from_string("\n".join(part.get_payload() for part in message.walk() if part.get_content_type() == "text/plain" or part.get_content_type() == "text/html")).get_payload(decode=True).decode("utf-8", errors="ignore")) ),
})
# Convertir la liste d'emails filtrés en DataFrame Pandas
df = pd.DataFrame(emails_filtres)
# Convertir la colonne "date" en format datetime
df["date"] = pd.to_datetime(df["date"])
# Trier les emails par date chronologique
df = df.sort_values(by="date")
df["count"] = df.index + 1
import datetime
# ...
# Ajouter une colonne "days_since_ref_date" avec le nombre de jours depuis la date de référence
# ref_date = datetime.date(2023, 8, 31)
# df["days_since_ref_date"] = df["date"].apply(lambda x: (x - ref_date).days)
# Créer le fichier HTML à partir du modèle
env = Environment(loader=FileSystemLoader("."))
template = env.get_template("email_template.html")
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
# Compter le nombre d'emails retenus
nb_emails = len(df)
print(f"Nombre d'emails en tout : {count_emails}")
print(f"Nombre d'emails retenus par les filtres : {nb_emails}")
# Remplacer les caractères Quoted-Printable dans la colonne "body"
for k, v in quoted_printable_replace.items():
df["body"] = df["body"].str.replace(k, v)
df["subject"] = df["subject"].str.replace(k, v)
# Enregistrer le DataFrame au format JSON
df.to_json("list_emails_filtered.json", orient="records", force_ascii=False)
html_content = template.render(emails=df.to_dict(orient="records"))
with open(F"emails_filtered__with_{mot_filtre}__{mot_exclure}.html", "w", encoding="utf-8") as f:
f.write(html_content)