utilisation de pypandoc et linking articles

This commit is contained in:
Tykayn 2025-02-19 22:39:11 +01:00 committed by tykayn
parent 2bdb9b7677
commit b31dcd6f8a
8 changed files with 320 additions and 110 deletions

View File

@ -50,25 +50,25 @@ convert_sources() {
ls $website_full_path/*.org
# echo "----------- convert_sources : aucun document dans converted"
if [ -z "$(ls -A /sources/$website_name/lang_fr/converted/)" ]; then
echo "coucou on convertit tout"
for i in $(find "$website_full_path" -maxdepth 1 -type f -name "*.$source_file_extension"); do
# echo "----------- convert_sources : org -> html et -> md : "
# echo " $i"
# echo ""
pandoc --from "${source_file_extension}" --to html -s "${i}" -o "${website_full_path}/converted/${i%.${source_file_extension}}.html" --metadata title="$i - $website_name"
pandoc --from "${source_file_extension}" --to markdown -s "${i}" -o "${website_full_path}/converted/${i%.${source_file_extension}}.md" --metadata title="$website_name"
done
else
echo "on ne convertit que les plus récents "
for i in $(find "$website_full_path" -maxdepth 1 -type f -name "*.$source_file_extension" -newermt "2024-11-20"); do
# echo "----------- convert_sources : org -> html et -> md : "
# echo " $i"
# echo ""
pandoc --from "${source_file_extension}" --to html -s "${i}" -o "${website_full_path}/converted/${i%.${source_file_extension}}.html" --metadata title="$i - $website_name"
pandoc --from "${source_file_extension}" --to markdown -s "${i}" -o "${website_full_path}/converted/${i%.${source_file_extension}}.md" --metadata title="$website_name"
done
fi
# if [ -z "$(ls -A /sources/$website_name/lang_fr/converted/)" ]; then
# echo "coucou on convertit tout"
# for i in $(find "$website_full_path" -maxdepth 1 -type f -name "*.$source_file_extension"); do
# # echo "----------- convert_sources : org -> html et -> md : "
# # echo " $i"
# # echo ""
# pandoc --from "${source_file_extension}" --to html -s "${i}" -o "${website_full_path}/converted/${i%.${source_file_extension}}.html" --metadata title="$i - $website_name"
# pandoc --from "${source_file_extension}" --to markdown -s "${i}" -o "${website_full_path}/converted/${i%.${source_file_extension}}.md" --metadata title="$website_name"
# done
# else
# echo "on ne convertit que les plus récents "
# for i in $(find "$website_full_path" -maxdepth 1 -type f -name "*.$source_file_extension" -newermt "2024-11-20"); do
# # echo "----------- convert_sources : org -> html et -> md : "
# # echo " $i"
# # echo ""
# pandoc --from "${source_file_extension}" --to html -s "${i}" -o "${website_full_path}/converted/${i%.${source_file_extension}}.html" --metadata title="$i - $website_name"
# pandoc --from "${source_file_extension}" --to markdown -s "${i}" -o "${website_full_path}/converted/${i%.${source_file_extension}}.md" --metadata title="$website_name"
# done
# fi
# exit
# convertir seulement les pages récentes
@ -213,7 +213,7 @@ for website_name in "${blogs_folders[@]}"; do
# générer l'index montrant les posts les plus récents à la suite
bash concat_list_billets.sh $website_name
# sauver le tout dans un fichier index.gmi
python3 build_indexes.py $website_name
# python3 build_indexes.py $website_name
# créer les pages de tags
mkdir -p "html-websites/$website_name/tags"
@ -224,7 +224,7 @@ for website_name in "${blogs_folders[@]}"; do
# déplacer les fichiers générés en html dans le dossier statique
mv sources/$website_name/converted/*.html html-websites/$website_name/
python3 enrich_html.py $website_name --style $style_file
# python3 enrich_html.py $website_name --style $style_file
# copier le style dans le dossier html
cp $style_file html-websites/$website_name/style.css

View File

@ -209,4 +209,6 @@ for root_path, dirs, files in liste_fichiers_du_blog_convertis :
f.write(html_content)
# mylog('\n ----------- enrich_html: html écrit ', html_path_enriched)
# traduire en html le fichier
print('articles listés :',count_articles)

View File

@ -8,11 +8,14 @@ import os
import json
import re
import argparse
import pypandoc
from jinja2 import Environment, FileSystemLoader
# Configurer argparse pour prendre le blog en argument
parser = argparse.ArgumentParser(description='Générer une liste des derniers articles de blog.')
parser.add_argument('blog', type=str, help='Nom du dossier du blog à traiter', default='tykayn_blog')
args = parser.parse_args()
limit_articles_on_index = 10
# Fonction pour extraire le basename d'un fichier
def get_basename(file_name):
@ -21,6 +24,7 @@ def get_basename(file_name):
# Chemin du dossier contenant les fichiers orgmode
directory = f'sources/{args.blog}/lang_fr'
destination_json = f'sources/{args.blog}/build'
destination_html = f'html-websites/{args.blog}/'
# Dictionnaire pour stocker les informations des fichiers
files_dict = {}
@ -33,11 +37,16 @@ for file_name in os.listdir(directory):
basename = get_basename(file_name)
date_str, annee, slug = find_year_and_slug_on_filename(basename)
tags = extract_tags_from_file(file_path, global_config['excluded_tags'])
# Convertir les tags en liste si c'est un set
if isinstance(tags, set):
tags = list(tags)
boom = basename.split('__')
title = find_first_level1_title(content)
# Convertir le contenu Org en HTML
html_content = pypandoc.convert_text(content, 'html', format='org')
files_dict[f"{annee}/{slug}"] = {
'path': file_path,
'basename': basename,
@ -48,7 +57,9 @@ for file_name in os.listdir(directory):
'tags': tags, # Assurez-vous que c'est une liste
'title': title,
'next': None,
'previous': None
'previous': None,
'org_content': content, # Contenu Org original
'html_content': html_content # Contenu HTML converti
}
# Trier les basenames par ordre décroissant
@ -67,6 +78,20 @@ with open(destination_json+'/articles_info.json', 'w', encoding='utf-8') as json
json_file.write(files_dict_serialized)
print(f"Nombre d'articles trouvés : {len(sorted_basenames)}")
count_articles_updated = 0
for basename, info in files_dict.items():
date_str = info['date']
if date_str > '20240101':
count_articles_updated += 1
print(f"Nombre d'articles mis à jour après le 01 01 2024 : {count_articles_updated}")
# Afficher le dictionnaire pour vérification
# for basename, info in files_dict.items():
# print(f"Article: {basename}")
@ -76,3 +101,38 @@ with open(destination_json+'/articles_info.json', 'w', encoding='utf-8') as json
# print(f" Previous: {info['previous']}")
# print(f" Next: {info['next']}")
# print("-" * 40)
def generate_blog_index(json_file, template_file, output_file):
"""
Génère la page d'index du blog à partir des informations JSON et d'un template Jinja2.
:param json_file: Chemin du fichier JSON contenant les informations des articles.
:param template_file: Chemin du fichier template Jinja2.
:param output_file: Chemin du fichier HTML de sortie.
"""
# Charger les données JSON
with open(json_file, 'r', encoding='utf-8') as f:
articles_info = json.load(f)
# Trier les articles par date (ou par slug) et prendre les 10 derniers
sorted_articles = sorted(articles_info.values(), key=lambda x: x['date'], reverse=True)[:10]
# Configurer Jinja2
env = Environment(loader=FileSystemLoader('.'))
template = env.get_template(template_file)
# Rendre le template avec les données
output_html = template.render(
template_content=configs_sites[args.blog],
articles=sorted_articles[:global_config['posts_per_page']],
articles_info=articles_info
)
# Écrire le fichier de sortie
with open(output_file, 'w', encoding='utf-8') as f:
f.write(output_html)
print(f"Page d'index générée dans {output_file}")
# Appel de la fonction pour générer la page d'index
generate_blog_index(destination_json + '/articles_info.json', 'templates/html/index_template.html.jinja', destination_html + '/index.html')

View File

@ -0,0 +1,96 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta property="og:image" content="{{template_content['SITE_ICON']}">
<meta property="og:locale" content="{{template_content['LOCALE']}">
<meta property="og:description" content="{{template_content['BLOG_SUBTITLE']}">
<meta property="og:url" content="{{template_content['NDD']}">
<meta property="og:site_name" content="{{template_content['TITLE']}">
<link rel="alternate" type="application/atom+xml" title="Cipher Bliss » Flux"
href="{{template_content['NDD']}}/feed/">
<link href="/style.css" rel="stylesheet">
<script src="main_script.js"></script>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{{template_content['TITLE']}}</title>
<meta name="author" content="{{template_content['AUTHOR']}}">
<link rel="alternate" type="application/rss+xml" title="{{template_content['BLOG_TITLE']}} » Flux"
href="{{template_content['NDD']}}/feed/">
<meta property="og:title" content="{{template_content['PAGE_TITLE']}}">
<meta property="og:locale" content="{{template_content['LOCALE']}}">
<!-- Description de la page -->
<meta name="description" content="{{template_content['PAGE_TITLE']}}">
<meta name="reply-to" content="{{template_content['EMAIL']}}">
<link rel="icon" type="{{template_content['SITE_ICON_TYPE']}}" href="{{template_content['SITE_ICON']}}">
</head>
<body>
<div id="page" class="page__{{template_content[" PAGE_SLUG"]}}">
<header id="masthead" class="site-header">
<div class="header-image"
style="background-image: url({{template_content['BANNIERE_ENTETE']}}); background-repeat: no-repeat; background-size: cover;">
<a href="/">
<img src="{{template_content['SITE_ICON']}}" class="site-icon img">
</a>
<h1 class="blog-title">{template_content['BLOG_TITLE']}</h1>
<p class="blog-subtitle">{template_content['BLOG_SUBTITLE']}</p>
<div class="template-header">
</div>
</div>
<nav class="navbar is-fixed-top is-dark" role="navigation" aria-label="main navigation">
<div class="navbar-brand">
<a class="navbar-item" href="{{template_content['NDD']}}">
{{template_content['NDD']}}
</a>
</div>
<div id="navbarBasicExample" class="navbar-menu">
<div class="navbar-start">
<a class="logo" href="{{template_content['NDD']}}">
<img src="{{template_content['SITE_ICON']}}" class="img-fluid">
</a>
{{template_content['NAVIGATION']}}
</div>
<div class="navbar-end">
<div class="navbar-item">
<form role="search" method="get" class="search-form" action="/" id="recherche">
<label>
<input class="search-field" placeholder="Recherche" value="" name="s" type="search">
</label>
<input class="is-hidden search-submit" value="Rechercher" type="submit">
</form>
</div>
</div>
</div>
</nav>
</header>
<main class="body-wrap boxed-container">
{% for article in articles %}
<article class="content">
<a href="{{ article.slug }}">{{ article.title }}</a>
<div>{{ article.html_content | safe }}</div>
</article>
{% endfor %}
</main>
<footer class="site-footer has-top-divider">
<div class="container">
<div class="site-footer-inner">
<div class="site-foot">
</div>
<nav class="footer-nav">
{{template_content['NAVIGATION']}}
<a href="/tags/">Tags</a>
<a href="{{template_content['NDD']}}/feed/">Flux Atom</a>
</nav>
</div>
</div>
</footer>
</div>
</body>
<!-- généré avec orgmode-to-gemini-blog par Tykayn -->
</html>

View File

@ -1,86 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta property="og:image" content="/img/icon.jpg">
<meta property="og:locale" content="fr_FR">
<meta property="og:description" content="Code, nouvelles technologies et entrepreneurariat par Baptiste Lemoine">
<meta property="og:url" content="https://portfolio.cipherbliss.com/">
<meta property="og:site_name" content="Cipher Bliss">
<link rel="alternate" type="application/rss+xml" title="Cipher Bliss » Flux"
href="https://www.cipherbliss.com/feed/">
<link href="https://portfolio.cipherbliss.com/build/css/app.7d561c23.css" rel="stylesheet">
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>$$TITLE$$</title>
<meta name="author" content="$$AUTHOR$$">
<link rel="alternate" type="application/rss+xml" title="Cipher Bliss » Flux"
href="http://www.cipherbliss.com/feed/">
<meta property="og:title" content="$$PAGE_TITLE$$">
<meta property="og:locale" content="$$LOCALE$$">
<!-- Description de la page -->
<meta name="description" content="$$DESCRIPTION$$">
<meta name="reply-to" content="$$EMAIL$$">
<link rel="icon" type="$$SITE_ICON$$" href="$$SITE_ICON$$">
</head>
<body>
<div id="page">
<header>
<header id="masthead" class="site-header">
<nav class="navbar is-fixed-top is-dark" role="navigation" aria-label="main navigation">
<div class="navbar-brand">
<a class="navbar-item" href="https://portfolio.cipherbliss.com">
<img src="https://www.cipherbliss.com/wp-content/uploads/2016/12/rond.png"
class="img-fluid">
</a>
</div>
<div id="navbarBasicExample" class="navbar-menu">
<div class="navbar-start">
<a class="navbar-item" href="https://portfolio.cipherbliss.com">
<img src="https://www.cipherbliss.com/wp-content/uploads/2016/12/rond.png"
class="img-fluid">
</a>
$$NAVIGATION$$
</div>
<div class="navbar-end">
<div class="navbar-item">
<form role="search" method="get" class="search-form" action="/">
<label>
<input class="search-field" placeholder="Recherche" value="" name="s"
type="search">
</label>
<input class="is-hidden search-submit" value="Rechercher" type="submit">
</form>
</div>
</div>
</div>
</nav>
</header>
<div class="header-image">
$$BANNIERE_ENTETE$$
<!-- <img src="https://clairelemoine.art/wp-content/uploads/2023/10/cropped-SiteEntete-Oiseau-aquarelle-1.jpg" alt="$$BANNIERE_ENTETE_ALT$$"> -->
</div>
</header>
<main class="body-wrap boxed-container">
<article class="content">
$$ARTICLE$$
</article>
</main>
<footer class="site-footer has-top-divider">
<div class="container">
<div class="site-footer-inner">
$$FOOTER$$
</div>
</div>
</footer>
</div>
</body>
<!-- généré avec orgmode-to-gemini-blog par Tykayn -->
</html>

View File

@ -0,0 +1,96 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta property="og:image" content="{template_content['SITE_ICON']}">
<meta property="og:locale" content="{template_content['LOCALE']}">
<meta property="og:description" content="{template_content['BLOG_SUBTITLE']}">
<meta property="og:url" content="{template_content['NDD']}">
<meta property="og:site_name" content="{template_content['TITLE']}">
<link rel="alternate" type="application/atom+xml" title="Cipher Bliss » Flux"
href="{template_content['NDD']}/feed/">
<link href="/style.css" rel="stylesheet">
<script src="main_script.js"></script>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{template_content['TITLE']}</title>
<meta name="author" content="{template_content['AUTHOR']}">
<link rel="alternate" type="application/rss+xml" title="{template_content['BLOG_TITLE']} » Flux"
href="{template_content['NDD']}/feed/">
<meta property="og:title" content="{template_content['PAGE_TITLE']}">
<meta property="og:locale" content="{template_content['LOCALE']}">
<!-- Description de la page -->
<meta name="description" content="{template_content['PAGE_TITLE']}">
<meta name="reply-to" content="{template_content['EMAIL']}">
<link rel="icon" type="{template_content['SITE_ICON_TYPE']}" href="{template_content['SITE_ICON']}">
</head>
<body>
<div id="page" class="page__{template_content[" PAGE_SLUG"]}">
<header id="masthead" class="site-header">
<div class="header-image" style="background: url('{template_content['BANNIERE_ENTETE']}') no-repeat;
background-size: cover;">
<a href="/">
<img src="{template_content['SITE_ICON']}" class="site-icon img">
</a>
<h1 class="blog-title">{template_content['BLOG_TITLE']}</h1>
<p class="blog-subtitle">{template_content['BLOG_SUBTITLE']}</p>
<div class="template-header">
</div>
</div>
<nav class="navbar is-fixed-top is-dark" role="navigation" aria-label="main navigation">
<div class="navbar-brand">
<a class="navbar-item" href="{template_content['NDD']}">
{template_content['NDD']}
</a>
</div>
<div id="navbarBasicExample" class="navbar-menu">
<div class="navbar-start">
<a class="logo" href="{template_content['NDD']}">
<img src="{template_content['SITE_ICON']}" class="img-fluid">
</a>
{template_content['NAVIGATION']}
</div>
<div class="navbar-end">
<div class="navbar-item">
<form role="search" method="get" class="search-form" action="/" id="recherche">
<label>
<input class="search-field" placeholder="Recherche" value="" name="s" type="search">
</label>
<input class="is-hidden search-submit" value="Rechercher" type="submit">
</form>
</div>
</div>
</div>
</nav>
</header>
<main class="body-wrap boxed-container">
<article class="content">
{html_content}
<p class="after-article">
{after_article}
</p>
</article>
</main>
<footer class="site-footer has-top-divider">
<div class="container">
<div class="site-footer-inner">
<div class="site-foot">
</div>
<nav class="footer-nav">
{template_content['NAVIGATION']}
<a href="/tags/">Tags</a>
<a href="{template_content['NDD']}/feed/">Flux Atom</a>
</nav>
</div>
</div>
</footer>
</div>
</body>
<!-- généré avec orgmode-to-gemini-blog par Tykayn -->
</html>

View File

@ -221,8 +221,8 @@ def extract_tags_from_file(file_path, excluded_tags):
tags.add(tag)
tag_found = True
if not tag_found:
print('no tag in the article', file_path)
# if not tag_found:
# print('no tag in the article', file_path)
return tags
def remove_properties_section(text):

View File

@ -19,6 +19,9 @@ global_config = {
"wanda","bouffe","anniversaire","conventions","Japan Expo","3.0","cours","! Q","dessin","mariage",
"réutilisation","sketch","road trip","kot","vélo","Bruxelles","Dieppe"]
}
configs_sites = {
"cipherbliss_blog": {
"DOSSIER_SOURCE": "cipherbliss_blog",
@ -183,3 +186,42 @@ configs_sites = {
"BANNIERE_ENTETE_ALT": "Bannière du site",
},
}
# Configuration par défaut
default_config = {
"BLOG_TITLE": "Titre par défaut",
"BLOG_SUBTITLE": "Sous-titre par défaut",
"AUTHOR": "Auteur par défaut",
"LOCALE": "fr_FR",
"DESCRIPTION": "Description par défaut",
"NDD": "https://example.com",
"EMAIL": "contact@example.com",
"SITE_ICON": "https://example.com/icon.png",
"SITE_ICON_TYPE": "image/png",
"NAVIGATION": """
<nav>
<a href="/">Accueil</a>
<a href="/tags">Tags</a>
<a href="/contact">Contact</a>
</nav>
""",
"BANNIERE_ENTETE": "https://example.com/banner.jpg",
"BANNIERE_ENTETE_ALT": "Bannière par défaut",
}
def fill_missing_config(site_config):
"""
Remplit les valeurs manquantes dans la configuration du site avec celles de la configuration par défaut.
:param site_config: Dictionnaire contenant la configuration du site.
:return: Dictionnaire avec les valeurs manquantes remplies.
"""
for key, value in default_config.items():
if key not in site_config:
site_config[key] = value
return site_config
# Exemple d'utilisation avec les configurations des sites
for site_name, site_config in configs_sites.items():
configs_sites[site_name] = fill_missing_config(site_config)