handle no file json for a blog

2025-02-23 20:47:50 +01:00 · 2025-02-23 20:47:50 +01:00 · 9d5c9ba9ea
commit 9d5c9ba9ea
parent 9543704e15
1 changed files with 53 additions and 14 deletions
--- a/linking_articles_prev_next.py
+++ b/linking_articles_prev_next.py
@ -17,7 +17,7 @@ import time  # Importer le module time
 start_time = time.time()
 # Configs pour tester
 generate_linkings_json = True
-generate_articles = True
+


 # Configurer argparse pour prendre le blog en argument
@ -48,7 +48,21 @@ destination_html = f'html-websites/{args.blog}/'
 destination_gmi = f'gemini-capsules/{args.blog}/'

 # Dictionnaire pour stocker les informations des fichiers
-files_dict = {}
+# Vérifier si le fichier JSON existe déjà
+json_file = destination_json + '/articles_info.json'
+if os.path.exists(json_file):
+    print(f"Chargement du fichier JSON existant: {json_file}")
+    try:
+        with open(json_file, 'r', encoding='utf-8') as f:
+            files_dict = json.load(f)
+        print(f"Fichier JSON chargé avec succès, {len(files_dict)} articles trouvés")
+    except Exception as e:
+        print(f"Erreur lors du chargement du fichier JSON: {e}")
+        files_dict = {}
+else:
+    print("Aucun fichier JSON existant trouvé")
+    files_dict = {}
+

 def get_first_picture_url(content):
    # Utiliser une expression régulière pour trouver la première URL d'image dans le contenu
@ -100,11 +114,12 @@ Navigation:
        print(f"Erreur lors de la sauvegarde du fichier : {e}")
    return output

+count_articles = len(os.listdir(directory))
+counter=0
+rebuild_counter = 0
+pandoc_runs_counter = 0
+
 if generate_linkings_json :
-    count_articles = len(os.listdir(directory))
-    counter=0
-    rebuild_counter = 0
-    pandoc_runs_counter = 0
    
    print(f"Génération des liens entre articles pour {count_articles} articles")
    print(f"run_pandoc: {run_pandoc}")
@ -180,9 +195,19 @@ if generate_linkings_json :
                if rebuild_this_article_html:
                    rebuild_counter += 1

+
+                # Garder le contenu HTML existant si déjà présent
+                if f"{annee}/{slug}" in files_dict and 'html_content' in files_dict[f"{annee}/{slug}"]:
+                    html_content = files_dict[f"{annee}/{slug}"]['html_content']
+                    html_content_without_h1 = files_dict[f"{annee}/{slug}"]['html_content_without_h1']
+                else:
+                    html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
+                    
+
                if run_pandoc and rebuild_this_article_html or force_html_regen:
                    # convertir le contenu d'article org vers html
-                    print(f"BRRRRRRRRRRRR pandoc time {time.strftime('%H:%M:%S')} : Conversion de {file_name} en html")
+                    print(f"\033[91mBRRRRRRRRRRRRR pandoc time {time.strftime('%H:%M:%S')} : Conversion de {file_name} en html\033[0m")
+                    
                    html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
                    pandoc_runs_counter += 1
                else:
@ -195,6 +220,8 @@ if generate_linkings_json :



+
+
                files_dict[f"{annee}/{slug}"] = {
                    'path': file_path,
                    'basename': basename,
@ -213,7 +240,7 @@ if generate_linkings_json :
                    'last_html_build': last_html_build_time,
                    'last_gemini_build': last_gemini_build,
                    'org_content': content,  # Contenu Org original
-                    'html_content_without_h1': re.sub(r'<h1>.*?</h1>', '', html_content),  # Contenu HTML converti sans le titre de premier niveau
+                    'html_content_without_h1': html_content_without_h1,  # Contenu HTML converti sans le titre de premier niveau
                    'html_content': html_content  # Contenu first_picture_urlHTML converti
                }

@ -253,9 +280,14 @@ for i in range(len(sorted_basenames)):
 os.makedirs(destination_json, exist_ok=True)

 json_file=destination_json+'/articles_info.json'
-with open(  json_file, 'w', encoding='utf-8') as json_file:
-    files_dict_serialized = json.dumps(files_dict, ensure_ascii=False, indent=4)
-    json_file.write(files_dict_serialized)
+
+
+
+if pandoc_runs_counter > 0 or not os.path.exists(json_file):
+    print(f"\033[91m Les articles ont changé, Génération du json {json_file} \033[0m")
+    with open(  json_file, 'w', encoding='utf-8') as json_file:
+        files_dict_serialized = json.dumps(files_dict, ensure_ascii=False, indent=4)
+        json_file.write(files_dict_serialized)



@ -344,8 +376,15 @@ Pages:
        f.write(output_index_gmi)
        print(f"Page d'index gemini générée dans {gmi_index_file}")

-# Appel de la fonction pour générer la page d'index
-generate_blog_index(destination_json + '/articles_info.json', 'templates/html/index.html.jinja', destination_html + '/index.html')
+
+# Générer la page d'index seulement si des articles ont été convertis
+# if pandoc_runs_counter > 0:
+    # Appel de la fonction pour générer la page d'index
+generate_blog_index(destination_json + '/articles_info.json', 'templates/html/index.html.jinja', destination_html + 'index.html')
+
+print(f"\033[91m index régénéré {destination_html}index.html \033[0m")
+# else:
+    # print("Aucun article n'a été converti, la page d'index n'est pas régénérée")


 def generate_article_pages(json_file, template_file, output_dir):
@ -395,7 +434,7 @@ def generate_article_pages(json_file, template_file, output_dir):
                f.write(output_html)
        print('generate_article_pages: fin de génération de l index')

-if generate_articles:
+if pandoc_runs_counter:
 # Appel de la fonction pour générer les pages des articles
    generate_article_pages(destination_json + '/articles_info.json', 'templates/html/article.html.jinja', destination_html)