up atom feed generate
This commit is contained in:
parent
bff1ec1392
commit
791d93ecde
1
.gitignore
vendored
1
.gitignore
vendored
@ -12,3 +12,4 @@ output/*.org
|
|||||||
html-websites/*
|
html-websites/*
|
||||||
pictures_done
|
pictures_done
|
||||||
pictures_inbox/*
|
pictures_inbox/*
|
||||||
|
/index*.xml
|
@ -5,57 +5,78 @@ from datetime import datetime
|
|||||||
# Chemin du dossier source
|
# Chemin du dossier source
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
from utils import find_first_level1_title, find_year_and_slug, find_extract_in_content_org
|
||||||
|
from website_config import configs_sites
|
||||||
|
|
||||||
# Configuration des arguments de la ligne de commande
|
# Configuration des arguments de la ligne de commande
|
||||||
parser = argparse.ArgumentParser(description="Générer un nouvel article en mode orgmode.")
|
parser = argparse.ArgumentParser(description="Générer un nouvel article en mode orgmode.")
|
||||||
parser.add_argument("blog_dir", help="Le nom du dossier de blog.")
|
parser.add_argument("blog_dir", help="Le nom du dossier de blog.")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
blog_dir = 'sources/'+args.blog_dir
|
website_ndd = configs_sites[args.blog_dir]['NDD']
|
||||||
|
blog_dir = 'sources/'+args.blog_dir+'/lang_fr/'
|
||||||
|
|
||||||
# Expression régulière pour extraire la date du contenu de l'article
|
# Expression régulière pour extraire la date du contenu de l'article
|
||||||
date_regex = re.compile(r"\b(\d{14})\b")
|
date_regex = re.compile(r"\b(\d{14})\b")
|
||||||
|
date_regex_org = re.compile(r"\b(\d{4}-\d{2}-\d{2})\b")
|
||||||
|
|
||||||
# Liste des fichiers org-mode trouvés
|
# Liste des fichiers org-mode trouvés
|
||||||
org_files = []
|
org_files = []
|
||||||
|
|
||||||
|
limit_articles_feed=1000
|
||||||
|
count_articles=0
|
||||||
|
print('atom generate: fichiers dans le dossier: ',len((blog_dir)))
|
||||||
# Parcourt le dossier source à la recherche de fichiers org-mode
|
# Parcourt le dossier source à la recherche de fichiers org-mode
|
||||||
for root, dirs, files in os.walk(blog_dir):
|
for root, dirs, files in os.walk(blog_dir):
|
||||||
for file in files:
|
for file in files:
|
||||||
if file.endswith(".org"):
|
if file.endswith(".org"):
|
||||||
|
print(os.path.join(root, file))
|
||||||
|
date_str, annee, slug = find_year_and_slug(file)
|
||||||
# Ouvre le fichier et recherche la première date dans le contenu de l'article
|
# Ouvre le fichier et recherche la première date dans le contenu de l'article
|
||||||
with open(os.path.join(root, file), "r", encoding="utf-8") as f:
|
with open(os.path.join(root, file), "r", encoding="utf-8") as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
match = date_regex.search(content)
|
extract = find_extract_in_content_org(content)
|
||||||
|
count_articles+=1
|
||||||
|
match = date_regex_org.search(content)
|
||||||
if match:
|
if match:
|
||||||
date = datetime.strptime(match.group(1), "%Y-%m-%d")
|
date = datetime.strptime(match.group(1), "%Y-%m-%d")
|
||||||
# Ajoute le fichier à la liste avec sa date correspondante
|
# Ajoute le fichier à la liste avec sa date correspondante
|
||||||
org_files.append((date, os.path.join(root, file)))
|
org_files.append((date, os.path.join(root, file), annee, slug,extract))
|
||||||
|
|
||||||
|
if count_articles > limit_articles_feed:
|
||||||
|
break
|
||||||
|
if count_articles > limit_articles_feed:
|
||||||
|
break
|
||||||
# Tri des fichiers par ordre décroissant de date
|
# Tri des fichiers par ordre décroissant de date
|
||||||
org_files.sort(reverse=True)
|
org_files.sort(reverse=True)
|
||||||
|
|
||||||
# Génération du flux Atom
|
# Génération du flux Atom
|
||||||
atom_feed = {"title": "Flux Atom des articles GMI",
|
atom_feed = {"title": "Flux Atom des articles de "+args.blog_dir,
|
||||||
"link": "http://www.example.com/atom",
|
"link": f"{website_ndd}/feed",
|
||||||
"updated": org_files[0][0].strftime("%Y-%m-%dT%H:%M:%SZ"),
|
# "updated": org_files[0][0].strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||||
|
"updated": org_files[0][0],
|
||||||
"entries": []}
|
"entries": []}
|
||||||
|
|
||||||
for date, file in org_files:
|
for date, file, annee, slug, extract in org_files:
|
||||||
# Parse le fichier org-mode pour extraire le titre, la description et la date de publication
|
# Parse le fichier org-mode pour extraire le titre, la description et la date de publication
|
||||||
with open(file, "r", encoding="utf-8") as f:
|
with open(file, "r", encoding="utf-8") as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
title = re.search(r"\*+ (.+)\n", content).group(1)
|
title = find_first_level1_title(content)
|
||||||
description = re.search(r"\n+ (.+)\n", content, re.DOTALL).group(1)
|
description = title
|
||||||
published = date.strftime("%Y-%m-%dT%H:%M:%SZ")
|
# published = date_str
|
||||||
# Ajoute l'article au flux Atom
|
# Ajoute l'article au flux Atom
|
||||||
atom_entry = {"title": title, "link": file, "summary": description, "published": published}
|
atom_entry = {"title": title,
|
||||||
|
"summary": extract,
|
||||||
|
"link": f"{website_ndd}/{annee}/{slug}",
|
||||||
|
"published": date
|
||||||
|
}
|
||||||
atom_feed["entries"].append(atom_entry)
|
atom_feed["entries"].append(atom_entry)
|
||||||
if published > atom_feed["updated"]:
|
# if published > atom_feed["updated"]:
|
||||||
atom_feed["updated"] = published
|
# atom_feed["updated"] = published
|
||||||
|
|
||||||
# Enregistrement du flux Atom dans un fichier
|
# Enregistrement du flux Atom dans un fichier
|
||||||
with open("atom.xml", "w", encoding="utf-8") as f:
|
with open(f"index_{args.blog_dir}.xml", "w", encoding="utf-8") as f:
|
||||||
f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
|
f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
|
||||||
f.write('<feed xmlns="http://www.w3.org/2005/Atom">\n')
|
f.write('<feed xmlns="http://www.w3.org/2005/Atom">\n')
|
||||||
f.write(f' <title>{atom_feed["title"]}</title>\n')
|
f.write(f' <title>{atom_feed["title"]}</title>\n')
|
||||||
|
@ -66,7 +66,7 @@ def generer_index(dossier_source, fichier_index):
|
|||||||
article_name = trouver_nom_article(file_path_org, args.source, 'org')
|
article_name = trouver_nom_article(file_path_org, args.source, 'org')
|
||||||
|
|
||||||
basename_file = os.path.basename(file_path_org)
|
basename_file = os.path.basename(file_path_org)
|
||||||
article_relative_url = detect_slug_in_file_basename(basename_file)
|
article_relative_url = find_slug_in_file_basename(basename_file)
|
||||||
|
|
||||||
if not article_name:
|
if not article_name:
|
||||||
article_name = article_relative_url.replace('-', ' ')
|
article_name = article_relative_url.replace('-', ' ')
|
||||||
|
@ -77,6 +77,8 @@ generate_website() {
|
|||||||
|
|
||||||
|
|
||||||
mkdir -p html-websites/$website_name
|
mkdir -p html-websites/$website_name
|
||||||
|
mkdir -p html-websites/$website_name/feed
|
||||||
|
|
||||||
rm -rf html-websites/$website_name/*
|
rm -rf html-websites/$website_name/*
|
||||||
rm -rf sources/$website_name/converted/*
|
rm -rf sources/$website_name/converted/*
|
||||||
rm -rf sources/$website_name/lang_fr/converted/*
|
rm -rf sources/$website_name/lang_fr/converted/*
|
||||||
@ -212,5 +214,7 @@ for website_name in "${blogs_folders[@]}"; do
|
|||||||
|
|
||||||
# traiter les réductions d'images dans l'inbox
|
# traiter les réductions d'images dans l'inbox
|
||||||
python3 pictures_resize.py
|
python3 pictures_resize.py
|
||||||
|
python3 atom_generate.py $website_name
|
||||||
|
mv "index_$website_name.xml" "html-websites/$website_name/feed/index.xml"
|
||||||
|
|
||||||
done
|
done
|
||||||
|
@ -56,7 +56,7 @@ def enrich_one_file(html_content: str, partials: dict = {"header_page": "", "foo
|
|||||||
css_content = f.read()
|
css_content = f.read()
|
||||||
css_content = "<style type='text/css'>{css_content}</style>"
|
css_content = "<style type='text/css'>{css_content}</style>"
|
||||||
template_content["CSS_INLINE_CONTENT"] = css_content
|
template_content["CSS_INLINE_CONTENT"] = css_content
|
||||||
template_content["PAGE_SLUG"] = detect_slug_in_file_basename(file)
|
template_content["PAGE_SLUG"] = find_slug_in_file_basename(file)
|
||||||
# remplir le template
|
# remplir le template
|
||||||
html_content = f"""
|
html_content = f"""
|
||||||
|
|
||||||
@ -70,7 +70,7 @@ def enrich_one_file(html_content: str, partials: dict = {"header_page": "", "foo
|
|||||||
<meta property="og:description" content="{template_content['BLOG_SUBTITLE']}">
|
<meta property="og:description" content="{template_content['BLOG_SUBTITLE']}">
|
||||||
<meta property="og:url" content="{template_content['NDD']}">
|
<meta property="og:url" content="{template_content['NDD']}">
|
||||||
<meta property="og:site_name" content="{template_content['TITLE']}">
|
<meta property="og:site_name" content="{template_content['TITLE']}">
|
||||||
<link rel="alternate" type="application/rss+xml" title="Cipher Bliss » Flux" href="{template_content['NDD']}/feed/">
|
<link rel="alternate" type="application/atom+xml" title="Cipher Bliss » Flux" href="{template_content['NDD']}/feed/">
|
||||||
<link href="/style.css" rel="stylesheet">
|
<link href="/style.css" rel="stylesheet">
|
||||||
<script src="main_script.js"></script>
|
<script src="main_script.js"></script>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
@ -145,6 +145,7 @@ def enrich_one_file(html_content: str, partials: dict = {"header_page": "", "foo
|
|||||||
<nav class="footer-nav">
|
<nav class="footer-nav">
|
||||||
{template_content['NAVIGATION']}
|
{template_content['NAVIGATION']}
|
||||||
<a href="/tags/">Tags</a>
|
<a href="/tags/">Tags</a>
|
||||||
|
<a href=""{template_content['NDD']}/feed/">Flux Atom</a>
|
||||||
</nav>
|
</nav>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -128,7 +128,7 @@ def generate_html_pages_for_all_tags(tag_to_files, html_output_folder):
|
|||||||
"""
|
"""
|
||||||
for file_path_org in files:
|
for file_path_org in files:
|
||||||
basename_file = os.path.basename(file_path_org)
|
basename_file = os.path.basename(file_path_org)
|
||||||
slug = detect_slug_in_file_basename(basename_file)
|
slug = find_slug_in_file_basename(basename_file)
|
||||||
if not slug:
|
if not slug:
|
||||||
slug = trouver_slug_in_article(file_path_org)
|
slug = trouver_slug_in_article(file_path_org)
|
||||||
|
|
||||||
|
@ -14,17 +14,17 @@ from website_config import configs_sites
|
|||||||
# tester la génération de slug sur un fichier org donné
|
# tester la génération de slug sur un fichier org donné
|
||||||
|
|
||||||
basename_file = '20111126170159_cipherbliss_blog_120_bienvenue-sur-informageek.org'
|
basename_file = '20111126170159_cipherbliss_blog_120_bienvenue-sur-informageek.org'
|
||||||
found_slug = detect_slug_in_file_basename(basename_file)
|
found_slug = find_slug_in_file_basename(basename_file)
|
||||||
print('slug found:',found_slug)
|
print('slug found:',found_slug)
|
||||||
|
|
||||||
basename_file = '20200803124344_blog_cil_gometz_11_ecrire-une-comptine-en-python-bonjour-le-jour.org'
|
basename_file = '20200803124344_blog_cil_gometz_11_ecrire-une-comptine-en-python-bonjour-le-jour.org'
|
||||||
found_slug = detect_slug_in_file_basename(basename_file)
|
found_slug = find_slug_in_file_basename(basename_file)
|
||||||
print('slug found:',found_slug)
|
print('slug found:',found_slug)
|
||||||
|
|
||||||
basename_file = '20241115010205_cipherbliss_blog_suivi-de-rédaction-de-livre-orgmode.org'
|
basename_file = '20241115010205_cipherbliss_blog_suivi-de-rédaction-de-livre-orgmode.org'
|
||||||
found_slug = detect_slug_in_file_basename(basename_file)
|
found_slug = find_slug_in_file_basename(basename_file)
|
||||||
print('slug found:',found_slug)
|
print('slug found:',found_slug)
|
||||||
|
|
||||||
basename_file = '20061125015032_tkblog_864_pourquoi-mee2-est-il-une-flamme.org'
|
basename_file = '20061125015032_tkblog_864_pourquoi-mee2-est-il-une-flamme.org'
|
||||||
found_slug = detect_slug_in_file_basename(basename_file)
|
found_slug = find_slug_in_file_basename(basename_file)
|
||||||
print('slug found:',found_slug)
|
print('slug found:',found_slug)
|
||||||
|
23
utils.py
23
utils.py
@ -66,7 +66,7 @@ def trouver_nom_article(fichier_org, blog_name, format="html"):
|
|||||||
def find_year_and_slug(fichier):
|
def find_year_and_slug(fichier):
|
||||||
fichier = fichier.replace('..', '.')
|
fichier = fichier.replace('..', '.')
|
||||||
mylog(f" ------------ build_indexes: find in {fichier} -------------")
|
mylog(f" ------------ build_indexes: find in {fichier} -------------")
|
||||||
slug = fichier.replace('.gmi', '')
|
slug = fichier.replace('.gmi', '').replace('.org', '')
|
||||||
annee = '2024'
|
annee = '2024'
|
||||||
date_str = '2024-00-00'
|
date_str = '2024-00-00'
|
||||||
date = '2024-00-00'
|
date = '2024-00-00'
|
||||||
@ -86,7 +86,6 @@ def find_year_and_slug(fichier):
|
|||||||
else:
|
else:
|
||||||
date = datetime.strptime(date_str, "%Y%m%d%H%M%S")
|
date = datetime.strptime(date_str, "%Y%m%d%H%M%S")
|
||||||
date_string_replaced = str(date).replace(' 00:00:00', '')
|
date_string_replaced = str(date).replace(' 00:00:00', '')
|
||||||
slug = fichier.replace('.gmi', '')
|
|
||||||
slug = slug.replace(date_string_replaced, '')
|
slug = slug.replace(date_string_replaced, '')
|
||||||
slug = enlever_premier_tiret_ou_underscore(slug)
|
slug = enlever_premier_tiret_ou_underscore(slug)
|
||||||
|
|
||||||
@ -155,6 +154,24 @@ def find_first_level1_title(content):
|
|||||||
return match.group(1)
|
return match.group(1)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def find_extract_in_content_org(org_content):
|
||||||
|
# Supprimer les lignes qui commencent par #+
|
||||||
|
org_content = re.sub(r'^\s*#\+.*\n', '', org_content, flags=re.MULTILINE)
|
||||||
|
|
||||||
|
# Supprimer les sections de logbook
|
||||||
|
org_content = re.sub(r'^\*\* Logbook\n.*?(?=\*\* |\Z)', '', org_content, flags=re.DOTALL | re.MULTILINE)
|
||||||
|
|
||||||
|
# Supprimer les propriétés
|
||||||
|
org_content = re.sub(r'^:PROPERTIES:\n.*?:END:\n', '', org_content, flags=re.DOTALL | re.MULTILINE)
|
||||||
|
|
||||||
|
# Supprimer les lignes vides supplémentaires
|
||||||
|
org_content = re.sub(r'\n\s*\n+', '\n', org_content)
|
||||||
|
|
||||||
|
# Supprimer les espaces en début et fin de chaque ligne
|
||||||
|
org_content = '\n'.join(line.strip() for line in org_content.splitlines())
|
||||||
|
|
||||||
|
# Supprimer les espaces en début et fin du contenu final
|
||||||
|
return org_content.strip()
|
||||||
|
|
||||||
def extract_body_content(html_content):
|
def extract_body_content(html_content):
|
||||||
pattern = r'<body.*?>(.*?)</body>'
|
pattern = r'<body.*?>(.*?)</body>'
|
||||||
@ -199,7 +216,7 @@ def slugify_title(title_text):
|
|||||||
title_text = title_text.strip('-')
|
title_text = title_text.strip('-')
|
||||||
return title_text
|
return title_text
|
||||||
|
|
||||||
def detect_slug_in_file_basename(file_basename) -> str:
|
def find_slug_in_file_basename(file_basename) -> str:
|
||||||
"""
|
"""
|
||||||
Extrait l'année et le slug du nom de fichier selon le format spécifié.
|
Extrait l'année et le slug du nom de fichier selon le format spécifié.
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user