up utils and global conf

2024-11-15 15:56:11 +01:00 · 2024-11-15 15:56:11 +01:00 · 9c72473913
commit 9c72473913
parent ed322fbad4
17 changed files with 170 additions and 155 deletions
--- a/WIP/export_html_dragonfeu_blog.sh
+++ b/WIP/export_html_dragonfeu_blog.sh
--- a/WIP/find_info_in_org.py
+++ b/WIP/find_info_in_org.py
@ -1,70 +0,0 @@
-import os
-import argparse
-import re 
-import datetime
-import shutil
-
-
-def enlever_premier_tiret_ou_underscore(chaîne):
-    if chaîne.startswith('-') or chaîne.startswith('_'):
-        chaîne = chaîne[1:]
-    return chaîne
-
-# Expression régulière pour extraire la date et le slug du nom de fichier org
-regex = r"^(\d{4}(-?\d{2}){2}|\d{8})(-[a-zA-Z0-9_-]+)\.gmi$" 
-regex_orgroam = r"^(\d{14})_([a-zA-Z0-9_-]+)\.gmi$" 
-
-def find_year_and_slug(nom_de_fichier):
-    nom_de_fichier = nom_de_fichier.replace('..','.') 
-    annee = ''
-    annee_presumed = nom_de_fichier[:4]
-    print(f"/////////////////   /////////////////")
-    print(f"nom_de_fichier: {nom_de_fichier}")
-    if int(annee_presumed) >1970 and len(annee_presumed) == 4:
-        annee = str(annee_presumed)
-    print(f"année: {annee}")
-    match = re.match(regex_orgroam, nom_de_fichier)
-    if match:
-        date_str = match.group(1)
-        # annee = date_str[:4]
-        slug = match.group(2)
-
-    match = re.match(regex, nom_de_fichier)
-    if match:
-        date_str = match.group(1)
-        # Convertir la date en objet datetime
-        if "-" in date_str:
-            date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
-        else:
-            date = datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S")
-        date_string_replaced = str(date).replace(' 00:00:00','')
-        slug = nom_de_fichier.replace('.gmi','')
-        slug = slug.replace(date_string_replaced,'')
-        slug = enlever_premier_tiret_ou_underscore(slug)
-        
-        annee = str(date.year).replace(' 00:00:00','')
-    else:
-        print('find_year_and_slug : ERREUR aucun slug trouvé')
-        return [None,annee,None]
-
-
-    print(f"str(date): {str(date)}")
-    print(f"slug: {slug}")
-    print(f"chemin: {annee}/{slug}/")
-    return [date_str, annee,slug]
-
-def test_find_year_and_slug_short_date():
-    date_string, année, slug = find_year_and_slug("2024-10-12-machin_truc-chose.gmi")
-    assert slug == "machin_truc-chose"
-    assert année == "2024"
-    assert date_string == "2024-10-12"
-
-def test_find_year_and_slug_orgroam_date():
-    date_string, année, slug = find_year_and_slug("20060925105123_tkblog_879_by-the-wall-intersubkey.gmi")
-    # assert slug == "by-the-wall-intersubkey"
-    assert année == "2006"
-    # assert date_string == "20060925"
-
-
-test_find_year_and_slug_short_date()
-test_find_year_and_slug_orgroam_date()
--- a/WIP/parse_article.py
+++ b/WIP/parse_article.py
@ -1,42 +0,0 @@
-import re 
-import os
-def trouver_nom_article(fichier_org):
-    print('fichier_org, ',fichier_org)
-    with open(fichier_org, 'r') as file:
-        lignes = file.readlines()
-    
-    # Expressions régulières pour trouver les titres de niveau 1 et 2
-    titre_niveau_1 = r'^\*+ (.+)$'
-    titre_niveau_2 = r'^\*\*+ (.+)$'
-    
-    nom_article = None
-    
-    # Itérer sur les lignes du fichier
-    for ligne in lignes:
-        # Rechercher un titre de niveau 1
-        titre_niveau_1_match = re.match(titre_niveau_1, ligne)
-        if titre_niveau_1_match:
-            titre_niveau_1_texte = titre_niveau_1_match.group(1)
-            if titre_niveau_1_texte.lower() != "article":
-                nom_article = titre_niveau_1_texte
-                break
-            else:
-                # Si le premier titre de niveau 1 est "Article", rechercher le premier titre de niveau 2
-                titre_niveau_2_match = re.match(titre_niveau_2, ligne)
-                if titre_niveau_2_match:
-                    nom_article = titre_niveau_2_match.group(1)
-                    break
-    print(f"Nom de l'article : {nom_article}")
-    
-    return nom_article
-
-
-# Chemin absolu du dossier parent (pour sauver le fichier d'index)
-dossier_parent = os.path.dirname(os.path.abspath(__file__))
-
-nom = trouver_nom_article(dossier_parent+'/sources/cipherbliss_blog/contact.org')
-print('nom ',nom)
-
-nom = trouver_nom_article(dossier_parent+'/sources/cipherbliss_blog/lang_fr/20210927092238_cipherbliss_blog_238_des-sauvegardes-qui-durent-mille-ans.org')
-
-print('nom ',nom)
--- a/pycache/build_indexes.cpython-312.pyc
+++ b/pycache/build_indexes.cpython-312.pyc
--- a/pycache/enrich_html.cpython-312.pyc
+++ b/pycache/enrich_html.cpython-312.pyc
--- a/sources/cipherbliss_blog/index.org
+++ b/sources/cipherbliss_blog/index.org
@ -1,9 +0,0 @@
-* CipherBliss
-
-[contact](contact.org)
-
-* Articles
-* en Fr
-* en En 
-* Flux RSS 
-* Bisous 
--- a/sources/cipherbliss_blog/lang_fr_de_coté/20241103124156_cipherbliss_blog_création-d'un-blog-gemini.org
+++ b/sources/cipherbliss_blog/lang_fr_de_coté/20241103124156_cipherbliss_blog_création-d'un-blog-gemini.org
--- a/sources/cipherbliss_blog/lang_fr_de_coté/20241103131856_cipherbliss_blog_soutien.org
+++ b/sources/cipherbliss_blog/lang_fr_de_coté/20241103131856_cipherbliss_blog_soutien.org
--- a/sources/cipherbliss_blog/lang_fr_de_coté/20241108151758-ça-déménage.org
+++ b/sources/cipherbliss_blog/lang_fr_de_coté/20241108151758-ça-déménage.org
--- a/sources/cipherbliss_blog/lang_fr_de_coté/20241109230937-ressources-de-café-vie-privée.org
+++ b/sources/cipherbliss_blog/lang_fr_de_coté/20241109230937-ressources-de-café-vie-privée.org
--- a/sources/cipherbliss_blog/lang_fr_de_coté/20241109231110-tags.org
+++ b/sources/cipherbliss_blog/lang_fr_de_coté/20241109231110-tags.org
--- a/sources/cipherbliss_blog/lang_fr_de_coté/20241109231603-connaître-la-couleur-des-jours-edf-tempo-en-ligne-de-commande.org
+++ b/sources/cipherbliss_blog/lang_fr_de_coté/20241109231603-connaître-la-couleur-des-jours-edf-tempo-en-ligne-de-commande.org
--- a/sources/cipherbliss_blog/lang_fr_de_coté/20241111181104_cipherbliss_blog_techniques-pour-planter-votre-association-ou-votre-entreprise.org
+++ b/sources/cipherbliss_blog/lang_fr_de_coté/20241111181104_cipherbliss_blog_techniques-pour-planter-votre-association-ou-votre-entreprise.org
--- a/testing.py
+++ b/testing.py
@ -12,4 +12,4 @@ args = parser.parse_args()


 print(configs_sites[args.blog_name]['DESCRIPTION'])
-# print(configs_sites[f"{args.blog_name}"]['DESCRIPTION'])
+mylog(configs_sites[f"{args.blog_name}"]['DESCRIPTION'])
--- a/update_on_server.sh
+++ b/update_on_server.sh
@ -3,11 +3,14 @@
 # script de déploiement pour syncroniser le site web statique et sa capsule gemini
 # chemins sur le serveur :
 racine_sites_statiques="/poule/encrypted/www/"
+images_inbox="/poule/encrypted/www/tykayn-inbox/blogs-output-pictures"
+images_destination="/poule/encrypted/www/tykayn.fr/wp-content/uploads/i/"
 # on part du principe que ce dépot est cloné sur le serveur pour simplifier la mise à jour
 racine_depot_git="/poule/encrypted/www/orgmode-to-gemini-blog"

 # on copie les fichiers générés dans les dépots servis pour chaque nom de domaine

+mv "$images_inbox/*" $images_destination
 cp $racine_depot_git/templates/style_general.css $racine_sites_statiques/cipherbliss.com/
 cp $racine_depot_git/html-websites/cipherbliss_blog/index.html $racine_sites_statiques/cipherbliss.com/
 cp $racine_depot_git/html-websites/cipherbliss_blog/lang_fr/ $racine_sites_statiques/cipherbliss.com -r
--- a/utils.py
+++ b/utils.py
@ -1,10 +1,138 @@
 #!/bin/python3
+import os
 import re
+import shutil
+from datetime import datetime
+
+import website_config
+from website_config import *

 # this path should be customized
 org_roam_dir: str = '/home/tykayn/Nextcloud/textes/orgmode/org-roam/'

+# Trouver l'identifiant OrgROAM
 pattern_roam_id_search = r':ID:(?:\s+)?([a-zA-Z0-9-]+)'
+# Expression régulière pour extraire la date et le slug du nom de fichier org
+regex = r"^(\d{14})(-[a-zA-Z0-9_-]+)\.gmi$"
+# Recherche de date de création du fichier org-roam dans un article gemini
+regex_orgroam = r"^(\d{14})_([a-zA-Z0-9_-]+)\.gmi$"
+
+# show_logs=True
+show_logs = global_config["show_logs"]
+
+
+def mylog(*content):
+    """Fonction qui imprime tous les arguments passés selon le niveau de debug souhaité."""
+    if show_logs:
+        print(content)
+
+
+def trouver_nom_article(fichier_org, blog_name, format="html"):
+    mylog('fichier_org, ', fichier_org)
+    with open(fichier_org, 'r') as file:
+        lignes = file.readlines()
+
+    nom_article = ''
+
+    mylog('trouver_nom_article format', format)
+    # Expressions régulières pour trouver les titres de niveau 1 et 2
+    if format == 'html':
+        titre_niveau_1 = r'<h1\s+(?:id|data-created)="[^"]*">(.*?)</h1>'
+        titre_niveau_2 = r'^\<h2.*?\>(.+)\<\/h2\>$'
+    else:
+        titre_niveau_1 = r'^\*+ (.+)$'
+        titre_niveau_2 = r'^\*\*+ (.+)$'
+
+    # Itérer sur les lignes du fichier
+    for ligne in lignes:
+        # Rechercher un titre de niveau 1
+        titre_niveau_1_match = re.match(titre_niveau_1, ligne)
+        if titre_niveau_1_match:
+            titre_niveau_1_texte = titre_niveau_1_match.group(1)
+            if titre_niveau_1_texte.lower() != "article" and titre_niveau_1_texte.lower() != "liens":
+                nom_article = titre_niveau_1_texte
+                break
+            else:
+                # Si le premier titre de niveau 1 est "Article", rechercher le premier titre de niveau 2
+                titre_niveau_2_match = re.match(titre_niveau_2, ligne)
+                if titre_niveau_2_match:
+                    nom_article = titre_niveau_2_match.group(1)
+                    break
+    mylog(f"Nom de l'article : {nom_article}")
+
+    return nom_article.replace(blog_name + '_', '').replace('_', ' ')
+
+
+def find_year_and_slug(fichier):
+    fichier = fichier.replace('..', '.')
+    mylog(f" ------------ build_indexes: find in {fichier} -------------")
+    slug = fichier.replace('.gmi', '')
+    annee = '2024'
+    date_str = '2024-00-00'
+    date = '2024-00-00'
+    match = re.match(regex_orgroam, fichier)
+
+    if match:
+        date_str = match.group(1)
+        annee = date_str[:4]
+        slug = match.group(2)
+
+    match = re.match(regex, fichier)
+    if match:
+        date_str = match.group(1)
+        # Convertir la date en objet datetime
+        if "-" in date_str:
+            date = datetime.strptime(date_str, "%Y-%m-%d")
+        else:
+            date = datetime.strptime(date_str, "%Y%m%d%H%M%S")
+        date_string_replaced = str(date).replace(' 00:00:00', '')
+        slug = fichier.replace('.gmi', '')
+        slug = slug.replace(date_string_replaced, '')
+        slug = enlever_premier_tiret_ou_underscore(slug)
+
+        annee = str(date.year).replace(' 00:00:00', '')
+    # else:
+    #     print('ERREUR aucun slug trouvé')
+
+    mylog(f" ------------ build_indexes:  ")
+    mylog(f" ------------ build_indexes: Fichier: {fichier}")
+    mylog(f" ------------ build_indexes: année: {annee}")
+    mylog(f" ------------ build_indexes: str(date): {str(date)}")
+    mylog(f" ------------ build_indexes: slug: {slug}")
+    mylog(f" ------------ build_indexes: chemin: {annee}/{slug}/")
+    return [date_str, annee, slug]
+
+
+def enlever_premier_tiret_ou_underscore(chaîne):
+    if chaîne.startswith('-') or chaîne.startswith('_'):
+        chaîne = chaîne[1:]
+    return chaîne
+
+
+# création des dossiers intermédiaires s'il y en a
+# déplace le fichier dans le dossier spécifié
+def create_path_folders_and_move_file(path, file):
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+
+    shutil.move(file, path)
+
+
+def get_files_list_of_folder(folder_path):
+    # Vérifie si le dossier existe
+    if not os.path.exists(folder_path):
+        print(f" ------------ build_indexes: Erreur : Le dossier '{folder_path}' n'existe pas.")
+        return
+    mylog('----------- get_files_list_of_folder: folder_path : ', folder_path)
+    # Liste les fichiers articles, trie par nom décroissant
+    try:
+        fichiers_md = sorted(
+            [f.replace('.' + website_config['source_files_extension'], '.gmi') for f in os.listdir(folder_path) if
+             f.endswith(website_config['source_files_extension'])], reverse=True)
+        print('fichiers trouvés:', len(fichiers_md))
+        return fichiers_md
+    except OSError as e:
+        print(f" ------------ build_indexes: Erreur lors de la lecture du dossier : {e}")
+        return


 def get_id_of_roam_note_content(content):
@ -29,7 +157,7 @@ def find_first_level1_title(content):


 def extract_body_content(html_content):
-    pattern = r'<body[^>]*?>(.*?)</body>'
+    pattern = r'<body.*?>(.*?)</body>'
    match = re.search(pattern, html_content, re.DOTALL)
    if match:
        return match.group(1)
--- a/website_config.py
+++ b/website_config.py
@ -2,7 +2,12 @@
 # configuration pour générer les sites web de plusieurs dossiers
 global_config = {
    "slug_with_year": True,
+    # "show_logs": False,
+    "show_logs": True,
    "rebuild_files_filter": 2024,
+    "source_files_extension": "org",
+    # controlled vocabulary to find tags automatically
+    "auto_tag_terms": ["illustration", "tuto", "nsfw", "bd", "récit", "science"]
 }
 configs_sites = {
    "cipherbliss_blog": {