orgmode-to-gemini-blog/atom_generate.py

94 lines
3.8 KiB
Python
Raw Normal View History

2024-11-02 18:30:04 +01:00
import os
import re
from datetime import datetime
# Chemin du dossier source
2024-11-16 00:21:38 +01:00
import argparse
2024-11-18 11:18:50 +01:00
from utils import find_first_level1_title, find_year_and_slug, find_extract_in_content_org
from website_config import configs_sites
2024-11-16 00:21:38 +01:00
# Configuration des arguments de la ligne de commande
parser = argparse.ArgumentParser(description="Générer un nouvel article en mode orgmode.")
parser.add_argument("blog_dir", help="Le nom du dossier de blog.")
args = parser.parse_args()
2024-11-18 11:18:50 +01:00
website_ndd = configs_sites[args.blog_dir]['NDD']
blog_dir = 'sources/'+args.blog_dir+'/lang_fr/'
2024-11-02 18:30:04 +01:00
# Expression régulière pour extraire la date du contenu de l'article
2024-11-10 00:01:15 +01:00
date_regex = re.compile(r"\b(\d{14})\b")
2024-11-18 11:18:50 +01:00
date_regex_org = re.compile(r"\b(\d{4}-\d{2}-\d{2})\b")
2024-11-02 18:30:04 +01:00
# Liste des fichiers org-mode trouvés
org_files = []
2024-11-18 11:18:50 +01:00
limit_articles_feed=1000
count_articles=0
print('atom generate: fichiers dans le dossier: ',len((blog_dir)))
2024-11-02 18:30:04 +01:00
# Parcourt le dossier source à la recherche de fichiers org-mode
2024-11-16 00:21:38 +01:00
for root, dirs, files in os.walk(blog_dir):
2024-11-02 18:30:04 +01:00
for file in files:
if file.endswith(".org"):
2024-11-18 11:18:50 +01:00
print(os.path.join(root, file))
date_str, annee, slug = find_year_and_slug(file)
2024-11-02 18:30:04 +01:00
# Ouvre le fichier et recherche la première date dans le contenu de l'article
with open(os.path.join(root, file), "r", encoding="utf-8") as f:
content = f.read()
2024-11-18 11:18:50 +01:00
extract = find_extract_in_content_org(content)
count_articles+=1
match = date_regex_org.search(content)
2024-11-02 18:30:04 +01:00
if match:
date = datetime.strptime(match.group(1), "%Y-%m-%d")
# Ajoute le fichier à la liste avec sa date correspondante
2024-11-18 11:18:50 +01:00
org_files.append((date, os.path.join(root, file), annee, slug,extract))
2024-11-02 18:30:04 +01:00
2024-11-18 11:18:50 +01:00
if count_articles > limit_articles_feed:
break
if count_articles > limit_articles_feed:
break
2024-11-02 18:30:04 +01:00
# Tri des fichiers par ordre décroissant de date
org_files.sort(reverse=True)
# Génération du flux Atom
2024-11-18 11:18:50 +01:00
atom_feed = {"title": "Flux Atom des articles de "+args.blog_dir,
"link": f"{website_ndd}/feed",
# "updated": org_files[0][0].strftime("%Y-%m-%dT%H:%M:%SZ"),
"updated": org_files[0][0],
2024-11-17 01:13:07 +01:00
"entries": []}
2024-11-02 18:30:04 +01:00
2024-11-18 11:18:50 +01:00
for date, file, annee, slug, extract in org_files:
2024-11-02 18:30:04 +01:00
# Parse le fichier org-mode pour extraire le titre, la description et la date de publication
with open(file, "r", encoding="utf-8") as f:
content = f.read()
2024-11-18 11:18:50 +01:00
title = find_first_level1_title(content)
description = title
# published = date_str
2024-11-02 18:30:04 +01:00
# Ajoute l'article au flux Atom
2024-11-18 11:18:50 +01:00
atom_entry = {"title": title,
"summary": extract,
"link": f"{website_ndd}/{annee}/{slug}",
"published": date
}
2024-11-02 18:30:04 +01:00
atom_feed["entries"].append(atom_entry)
2024-11-18 11:18:50 +01:00
# if published > atom_feed["updated"]:
# atom_feed["updated"] = published
2024-11-02 18:30:04 +01:00
# Enregistrement du flux Atom dans un fichier
2024-11-18 11:18:50 +01:00
with open(f"index_{args.blog_dir}.xml", "w", encoding="utf-8") as f:
2024-11-02 18:30:04 +01:00
f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
f.write('<feed xmlns="http://www.w3.org/2005/Atom">\n')
f.write(f' <title>{atom_feed["title"]}</title>\n')
f.write(f' <link href="{atom_feed["link"]}"/>\n')
f.write(f' <updated>{atom_feed["updated"]}</updated>\n')
for entry in atom_feed["entries"]:
f.write(' <entry>\n')
f.write(f' <title>{entry["title"]}</title>\n')
f.write(f' <link href="{entry["link"]}"/>\n')
f.write(f' <summary>{entry["summary"]}</summary>\n')
f.write(f' <published>{entry["published"]}</published>\n')
f.write(' </entry>\n')
f.write('</feed>')
2024-11-18 16:01:34 +01:00
os.rename(f"index_{args.blog_dir}.xml", f"html-websites/{args.blog_dir}/feed/index.xml")