orgmode-to-gemini-blog/enrich_html.py

#!/bin/python3
import os
import argparse
import re

parser = argparse.ArgumentParser(description="Générer un site Web à partir de fichiers HTML.")
parser.add_argument("blog_name", help="Le chemin vers le dossier contenant les fichiers HTML.")
parser.add_argument("--title", "-t", default="Mon site Web", help="Le titre du site Web.")
parser.add_argument("--style", default="templates/style_general.css", help="Le chemin vers le fichier de style CSS.")
args = parser.parse_args()

# Style CSS minimaliste
style_file = args.style


blog_name = args.blog_name
source_blog = f"sources/{blog_name}"
header_content_path = f"{source_blog}/templates/converted/header_page.html"
footer_content_path = f"{source_blog}/templates/converted/footer_page.html"
static_page_path = f"{source_blog}/templates/html/static.html"

footer_content=''
after_article=''
# with open(footer_content_path, "r") as f:
#         footer_content = f.read()

# variables du template de page
BANNIERE_ENTETE=''
BLOG_TITLE='Cipher Bliss'
BLOG_SUBTITLE='Code, nouvelles technologies et entrepreneurariat par B. Lemoine'
TITLE=''
AUTHOR=''
PAGE_TITLE=''
LOCALE='fr_FR'
DESCRIPTION=''
NDD='https://www.cipherbliss.com'
EMAIL='contact@cipherbliss.com'
SITE_ICON='https://www.cipherbliss.com/wp-content/uploads/2016/12/rond.png'
SITE_ICON_TYPE='image/png'
NAVIGATION="""
<nav>
    <a href="/">Accueil</a>
    <a href="https://portfolio.cipherbliss.com">Portfolio</a>
    <a href="/feed">Flux RSS</a>
    <a href="/contact">Contact</a>
    <a href="/ressources-de-café-vie-privée">Ressources</a>
</nav>
"""
BANNIERE_ENTETE='https://www.cipherbliss.com/wp-content/uploads/2016/11/bg.jpg'
BANNIERE_ENTETE_ALT='bannière du site'
ARTICLE=''
FOOTER=''


def extract_body_content(html_content):
    pattern = r'<body[^>]*?>(.*?)</body>'
    match = re.search(pattern, html_content, re.DOTALL)
    if match:
        return match.group(1)
    else:
        return None

def remove_properties_section(text):
    pattern = r"<h1 id=\"article\">Article</h1>.+?</ul>"
    replacement = ""
    return re.sub(pattern, replacement, text, flags=re.DOTALL)

def remove_article_head_properties_orgmode(text):
    pattern = r":PROPERTIES:.+?:END:"
    replacement = ""
    return re.sub(pattern, replacement, text, flags=re.DOTALL)

def remove_hint_html(text):
    pattern = r"<p>ceci<sub>estduhtml</sub></p>"
    replacement = ""
    return re.sub(pattern, replacement, text, flags=re.DOTALL)


def enrich_one_file(file, root_path):


    print(' ----------- enrich_html: file:',os.path.join(root_path, file))
    css_content = ""

    inline_the_css=False
    # inline_the_css=True

    print(' ----------- enrich_html: CSS inline: ',inline_the_css)
    # Trouver le fichier entête
    header_content=''
    with open(os.path.join(root_path, file), "r") as f:
        header_content = f.read()
    # Ouvrir le fichier HTML en mode lecture
    with open(os.path.join(root_path, file), "r") as f:
        html_content = f.read()

    # remove some parts
    html_content = remove_properties_section(html_content)
    html_content = remove_article_head_properties_orgmode(html_content)
    html_content = remove_hint_html(html_content)

    html_content = extract_body_content(html_content)

    if inline_the_css is True:
        print(' ----------- enrich_html: include css inline in each html page')
        with open(os.path.join(root_path, file), "r") as f:
            css_content = f.read()
            css_content = "<style type='text/css'>{css_content}</style>"

    # remplir le template
    html_content = f"""

    <!DOCTYPE html>
<html>
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <meta property="og:image" content="{SITE_ICON}">
        <meta property="og:locale" content="{LOCALE}">
        <meta property="og:description" content="{BLOG_SUBTITLE}">
        <meta property="og:url" content="{NDD}">
        <meta property="og:site_name" content="Cipher Bliss">
        <link rel="alternate" type="application/rss+xml" title="Cipher Bliss » Flux" href="{NDD}/feed/">
        <link href="/style.css" rel="stylesheet">
        <meta charset="utf-8">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <title>{TITLE}</title>
        <meta name="author" content="{AUTHOR}">
        <link rel="alternate" type="application/rss+xml" title="{BLOG_TITLE} » Flux"
            href="{NDD}/feed/">
        <meta property="og:title" content="{PAGE_TITLE}">
        <meta property="og:locale" content="{LOCALE}">
        <!-- Description de la page -->
        <meta name="description" content="{PAGE_TITLE}">
        <meta name="reply-to" content="{EMAIL}">
        <link rel="icon" type="{SITE_ICON_TYPE}" href="{SITE_ICON}">
    </head>

    <body>
        <div id="page">
            <header id="masthead" class="site-header">
                <div class="header-image" style="background: url({BANNIERE_ENTETE})">
                    <a href="/">
                    <img src="{SITE_ICON}" class="site-icon img">
                    </a>
                    <h1 class="blog-title">{BLOG_TITLE}</h1>
                    <p class="blog-subtitle">{BLOG_SUBTITLE}</p>
                </div>
                    <nav class="navbar is-fixed-top is-dark" role="navigation" aria-label="main navigation">
                        <div class="navbar-brand">
                            <a class="navbar-item" href="{NDD}">

                            </a>

                        </div>

                        <div id="navbarBasicExample" class="navbar-menu">
                            <div class="navbar-start">
                                <a class="navbar-item" href="{NDD}">
                                    <img src="{SITE_ICON}"
                                        class="img-fluid">
                                </a>
                                {NAVIGATION}
                            </div>
                            <div class="navbar-end">
                                <div class="navbar-item">
                                    <form role="search" method="get" class="search-form" action="/">
                                        <label>
                                            <input class="search-field" placeholder="Recherche" value="" name="s"
                                                type="search">
                                        </label>
                                        <input class="is-hidden search-submit" value="Rechercher" type="submit">
                                    </form>
                                </div>
                            </div>
                        </div>
                    </nav>
            </header>
            <main class="body-wrap boxed-container">
                <article class="content">
                    {html_content}
                    <p class="after-article">
                    {after_article}
                    </p>
                </article>
            </main>
            <footer class="site-footer has-top-divider">
                <div class="container">
                    <div class="site-footer-inner">


            {NAVIGATION}

                    </div>
                </div>
            </footer>
        </div>
    </body>
    <!-- généré avec orgmode-to-gemini-blog par Tykayn -->
</html>

"""

    html_path_enriched=os.path.join(root_path, file)
    print(' ----------- enrich_html: html_path_enriched ============> ',html_path_enriched)
    # Écrire le contenu modifié dans le fichier HTML
    with open(html_path_enriched, "w") as f:
        f.write(html_content)
        print('\n ----------- enrich_html: html écrit ', html_path_enriched)

# Parcourir tous les fichiers HTML dans le dossier
for root, _, files in os.walk(blog_name):
    # print(files)
    for file in files:
        if file.endswith(".html"):
            enrich_one_file(file, root)