up atom feed generate
This commit is contained in:
parent
bff1ec1392
commit
791d93ecde
1
.gitignore
vendored
1
.gitignore
vendored
@ -12,3 +12,4 @@ output/*.org
|
||||
html-websites/*
|
||||
pictures_done
|
||||
pictures_inbox/*
|
||||
/index*.xml
|
@ -5,57 +5,78 @@ from datetime import datetime
|
||||
# Chemin du dossier source
|
||||
import argparse
|
||||
|
||||
from utils import find_first_level1_title, find_year_and_slug, find_extract_in_content_org
|
||||
from website_config import configs_sites
|
||||
|
||||
# Configuration des arguments de la ligne de commande
|
||||
parser = argparse.ArgumentParser(description="Générer un nouvel article en mode orgmode.")
|
||||
parser.add_argument("blog_dir", help="Le nom du dossier de blog.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
blog_dir = 'sources/'+args.blog_dir
|
||||
website_ndd = configs_sites[args.blog_dir]['NDD']
|
||||
blog_dir = 'sources/'+args.blog_dir+'/lang_fr/'
|
||||
|
||||
# Expression régulière pour extraire la date du contenu de l'article
|
||||
date_regex = re.compile(r"\b(\d{14})\b")
|
||||
date_regex_org = re.compile(r"\b(\d{4}-\d{2}-\d{2})\b")
|
||||
|
||||
# Liste des fichiers org-mode trouvés
|
||||
org_files = []
|
||||
|
||||
limit_articles_feed=1000
|
||||
count_articles=0
|
||||
print('atom generate: fichiers dans le dossier: ',len((blog_dir)))
|
||||
# Parcourt le dossier source à la recherche de fichiers org-mode
|
||||
for root, dirs, files in os.walk(blog_dir):
|
||||
for file in files:
|
||||
if file.endswith(".org"):
|
||||
print(os.path.join(root, file))
|
||||
date_str, annee, slug = find_year_and_slug(file)
|
||||
# Ouvre le fichier et recherche la première date dans le contenu de l'article
|
||||
with open(os.path.join(root, file), "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
match = date_regex.search(content)
|
||||
extract = find_extract_in_content_org(content)
|
||||
count_articles+=1
|
||||
match = date_regex_org.search(content)
|
||||
if match:
|
||||
date = datetime.strptime(match.group(1), "%Y-%m-%d")
|
||||
# Ajoute le fichier à la liste avec sa date correspondante
|
||||
org_files.append((date, os.path.join(root, file)))
|
||||
org_files.append((date, os.path.join(root, file), annee, slug,extract))
|
||||
|
||||
if count_articles > limit_articles_feed:
|
||||
break
|
||||
if count_articles > limit_articles_feed:
|
||||
break
|
||||
# Tri des fichiers par ordre décroissant de date
|
||||
org_files.sort(reverse=True)
|
||||
|
||||
# Génération du flux Atom
|
||||
atom_feed = {"title": "Flux Atom des articles GMI",
|
||||
"link": "http://www.example.com/atom",
|
||||
"updated": org_files[0][0].strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
atom_feed = {"title": "Flux Atom des articles de "+args.blog_dir,
|
||||
"link": f"{website_ndd}/feed",
|
||||
# "updated": org_files[0][0].strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"updated": org_files[0][0],
|
||||
"entries": []}
|
||||
|
||||
for date, file in org_files:
|
||||
for date, file, annee, slug, extract in org_files:
|
||||
# Parse le fichier org-mode pour extraire le titre, la description et la date de publication
|
||||
with open(file, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
title = re.search(r"\*+ (.+)\n", content).group(1)
|
||||
description = re.search(r"\n+ (.+)\n", content, re.DOTALL).group(1)
|
||||
published = date.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
title = find_first_level1_title(content)
|
||||
description = title
|
||||
# published = date_str
|
||||
# Ajoute l'article au flux Atom
|
||||
atom_entry = {"title": title, "link": file, "summary": description, "published": published}
|
||||
atom_entry = {"title": title,
|
||||
"summary": extract,
|
||||
"link": f"{website_ndd}/{annee}/{slug}",
|
||||
"published": date
|
||||
}
|
||||
atom_feed["entries"].append(atom_entry)
|
||||
if published > atom_feed["updated"]:
|
||||
atom_feed["updated"] = published
|
||||
# if published > atom_feed["updated"]:
|
||||
# atom_feed["updated"] = published
|
||||
|
||||
# Enregistrement du flux Atom dans un fichier
|
||||
with open("atom.xml", "w", encoding="utf-8") as f:
|
||||
with open(f"index_{args.blog_dir}.xml", "w", encoding="utf-8") as f:
|
||||
f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
|
||||
f.write('<feed xmlns="http://www.w3.org/2005/Atom">\n')
|
||||
f.write(f' <title>{atom_feed["title"]}</title>\n')
|
||||
|
@ -66,7 +66,7 @@ def generer_index(dossier_source, fichier_index):
|
||||
article_name = trouver_nom_article(file_path_org, args.source, 'org')
|
||||
|
||||
basename_file = os.path.basename(file_path_org)
|
||||
article_relative_url = detect_slug_in_file_basename(basename_file)
|
||||
article_relative_url = find_slug_in_file_basename(basename_file)
|
||||
|
||||
if not article_name:
|
||||
article_name = article_relative_url.replace('-', ' ')
|
||||
|
@ -77,6 +77,8 @@ generate_website() {
|
||||
|
||||
|
||||
mkdir -p html-websites/$website_name
|
||||
mkdir -p html-websites/$website_name/feed
|
||||
|
||||
rm -rf html-websites/$website_name/*
|
||||
rm -rf sources/$website_name/converted/*
|
||||
rm -rf sources/$website_name/lang_fr/converted/*
|
||||
@ -212,5 +214,7 @@ for website_name in "${blogs_folders[@]}"; do
|
||||
|
||||
# traiter les réductions d'images dans l'inbox
|
||||
python3 pictures_resize.py
|
||||
python3 atom_generate.py $website_name
|
||||
mv "index_$website_name.xml" "html-websites/$website_name/feed/index.xml"
|
||||
|
||||
done
|
||||
|
@ -56,7 +56,7 @@ def enrich_one_file(html_content: str, partials: dict = {"header_page": "", "foo
|
||||
css_content = f.read()
|
||||
css_content = "<style type='text/css'>{css_content}</style>"
|
||||
template_content["CSS_INLINE_CONTENT"] = css_content
|
||||
template_content["PAGE_SLUG"] = detect_slug_in_file_basename(file)
|
||||
template_content["PAGE_SLUG"] = find_slug_in_file_basename(file)
|
||||
# remplir le template
|
||||
html_content = f"""
|
||||
|
||||
@ -70,7 +70,7 @@ def enrich_one_file(html_content: str, partials: dict = {"header_page": "", "foo
|
||||
<meta property="og:description" content="{template_content['BLOG_SUBTITLE']}">
|
||||
<meta property="og:url" content="{template_content['NDD']}">
|
||||
<meta property="og:site_name" content="{template_content['TITLE']}">
|
||||
<link rel="alternate" type="application/rss+xml" title="Cipher Bliss » Flux" href="{template_content['NDD']}/feed/">
|
||||
<link rel="alternate" type="application/atom+xml" title="Cipher Bliss » Flux" href="{template_content['NDD']}/feed/">
|
||||
<link href="/style.css" rel="stylesheet">
|
||||
<script src="main_script.js"></script>
|
||||
<meta charset="utf-8">
|
||||
@ -145,6 +145,7 @@ def enrich_one_file(html_content: str, partials: dict = {"header_page": "", "foo
|
||||
<nav class="footer-nav">
|
||||
{template_content['NAVIGATION']}
|
||||
<a href="/tags/">Tags</a>
|
||||
<a href=""{template_content['NDD']}/feed/">Flux Atom</a>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -128,7 +128,7 @@ def generate_html_pages_for_all_tags(tag_to_files, html_output_folder):
|
||||
"""
|
||||
for file_path_org in files:
|
||||
basename_file = os.path.basename(file_path_org)
|
||||
slug = detect_slug_in_file_basename(basename_file)
|
||||
slug = find_slug_in_file_basename(basename_file)
|
||||
if not slug:
|
||||
slug = trouver_slug_in_article(file_path_org)
|
||||
|
||||
|
@ -14,17 +14,17 @@ from website_config import configs_sites
|
||||
# tester la génération de slug sur un fichier org donné
|
||||
|
||||
basename_file = '20111126170159_cipherbliss_blog_120_bienvenue-sur-informageek.org'
|
||||
found_slug = detect_slug_in_file_basename(basename_file)
|
||||
found_slug = find_slug_in_file_basename(basename_file)
|
||||
print('slug found:',found_slug)
|
||||
|
||||
basename_file = '20200803124344_blog_cil_gometz_11_ecrire-une-comptine-en-python-bonjour-le-jour.org'
|
||||
found_slug = detect_slug_in_file_basename(basename_file)
|
||||
found_slug = find_slug_in_file_basename(basename_file)
|
||||
print('slug found:',found_slug)
|
||||
|
||||
basename_file = '20241115010205_cipherbliss_blog_suivi-de-rédaction-de-livre-orgmode.org'
|
||||
found_slug = detect_slug_in_file_basename(basename_file)
|
||||
found_slug = find_slug_in_file_basename(basename_file)
|
||||
print('slug found:',found_slug)
|
||||
|
||||
basename_file = '20061125015032_tkblog_864_pourquoi-mee2-est-il-une-flamme.org'
|
||||
found_slug = detect_slug_in_file_basename(basename_file)
|
||||
found_slug = find_slug_in_file_basename(basename_file)
|
||||
print('slug found:',found_slug)
|
||||
|
23
utils.py
23
utils.py
@ -66,7 +66,7 @@ def trouver_nom_article(fichier_org, blog_name, format="html"):
|
||||
def find_year_and_slug(fichier):
|
||||
fichier = fichier.replace('..', '.')
|
||||
mylog(f" ------------ build_indexes: find in {fichier} -------------")
|
||||
slug = fichier.replace('.gmi', '')
|
||||
slug = fichier.replace('.gmi', '').replace('.org', '')
|
||||
annee = '2024'
|
||||
date_str = '2024-00-00'
|
||||
date = '2024-00-00'
|
||||
@ -86,7 +86,6 @@ def find_year_and_slug(fichier):
|
||||
else:
|
||||
date = datetime.strptime(date_str, "%Y%m%d%H%M%S")
|
||||
date_string_replaced = str(date).replace(' 00:00:00', '')
|
||||
slug = fichier.replace('.gmi', '')
|
||||
slug = slug.replace(date_string_replaced, '')
|
||||
slug = enlever_premier_tiret_ou_underscore(slug)
|
||||
|
||||
@ -155,6 +154,24 @@ def find_first_level1_title(content):
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
def find_extract_in_content_org(org_content):
|
||||
# Supprimer les lignes qui commencent par #+
|
||||
org_content = re.sub(r'^\s*#\+.*\n', '', org_content, flags=re.MULTILINE)
|
||||
|
||||
# Supprimer les sections de logbook
|
||||
org_content = re.sub(r'^\*\* Logbook\n.*?(?=\*\* |\Z)', '', org_content, flags=re.DOTALL | re.MULTILINE)
|
||||
|
||||
# Supprimer les propriétés
|
||||
org_content = re.sub(r'^:PROPERTIES:\n.*?:END:\n', '', org_content, flags=re.DOTALL | re.MULTILINE)
|
||||
|
||||
# Supprimer les lignes vides supplémentaires
|
||||
org_content = re.sub(r'\n\s*\n+', '\n', org_content)
|
||||
|
||||
# Supprimer les espaces en début et fin de chaque ligne
|
||||
org_content = '\n'.join(line.strip() for line in org_content.splitlines())
|
||||
|
||||
# Supprimer les espaces en début et fin du contenu final
|
||||
return org_content.strip()
|
||||
|
||||
def extract_body_content(html_content):
|
||||
pattern = r'<body.*?>(.*?)</body>'
|
||||
@ -199,7 +216,7 @@ def slugify_title(title_text):
|
||||
title_text = title_text.strip('-')
|
||||
return title_text
|
||||
|
||||
def detect_slug_in_file_basename(file_basename) -> str:
|
||||
def find_slug_in_file_basename(file_basename) -> str:
|
||||
"""
|
||||
Extrait l'année et le slug du nom de fichier selon le format spécifié.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user