orgmode-to-gemini-blog/enrich_html.py

#!/bin/python3
import os
import argparse
import re
from website_config import configs_sites

parser = argparse.ArgumentParser(description="Générer un site Web à partir de fichiers HTML.")
parser.add_argument("blog_name", help="Le chemin vers le dossier contenant les fichiers HTML.")
parser.add_argument("--title", "-t", default="Mon site Web", help="Le titre du site Web.")
parser.add_argument("--style", default="templates/style_general.css", help="Le chemin vers le fichier de style CSS.")
args = parser.parse_args()

# Style CSS minimaliste
style_file = args.style

blog_name = args.blog_name.replace('html-websites/','')
source_blog = f"sources/{blog_name}"
header_content_path = f"{source_blog}/templates/converted/header_page.html"
footer_content_path = f"{source_blog}/templates/converted/footer_page.html"
static_page_path = f"{source_blog}/templates/html/static.html"


print('---------- blog name ', blog_name)
template_content =  configs_sites[blog_name]

footer_content=''
after_article=''
# TODO add footer on every article
# with open(footer_content_path, "r") as f:
#         footer_content = f.read()

# TODO make these variables overrided by configuration of a source website
# variables du template de page

def extract_body_content(html_content):
    pattern = r'<body[^>]*?>(.*?)</body>'
    match = re.search(pattern, html_content, re.DOTALL)
    if match:
        return match.group(1)
    else:
        return None

def remove_properties_section(text):
    pattern = r"<h1 id=\"article\">Article</h1>.+?</ul>"
    replacement = ""
    return re.sub(pattern, replacement, text, flags=re.DOTALL)

def remove_article_head_properties_orgmode(text):
    pattern = r":PROPERTIES:.+?:END:"
    replacement = ""
    return re.sub(pattern, replacement, text, flags=re.DOTALL)

def remove_hint_html(text):
    pattern = r"<p>ceci<sub>estduhtml</sub></p>"
    replacement = ""
    return re.sub(pattern, replacement, text, flags=re.DOTALL)


def enrich_one_file(file, root_path):


    print(' ----------- enrich_html: file:',os.path.join(root_path, file))
    css_content = ""

    inline_the_css=False
    # inline_the_css=True

    print(' ----------- enrich_html: CSS inline: ',inline_the_css)
    # Trouver le fichier entête
    header_content=''
    with open(os.path.join(root_path, file), "r") as f:
        header_content = f.read()
    # Ouvrir le fichier HTML en mode lecture
    with open(os.path.join(root_path, file), "r") as f:
        html_content = f.read()

    # remove some parts
    html_content = remove_properties_section(html_content)
    html_content = remove_article_head_properties_orgmode(html_content)
    html_content = remove_hint_html(html_content)

    html_content = extract_body_content(html_content)

    if inline_the_css is True:
        print(' ----------- enrich_html: include css inline in each html page')
        with open(os.path.join(root_path, file), "r") as f:
            css_content = f.read()
            css_content = "<style type='text/css'>{css_content}</style>"

    # remplir le template
    html_content = f"""

    <!DOCTYPE html>
<html>
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <meta property="og:image" content="{template_content['SITE_ICON']}">
        <meta property="og:locale" content="{template_content['LOCALE']}">
        <meta property="og:description" content="{template_content['BLOG_SUBTITLE']}">
        <meta property="og:url" content="{template_content['NDD']}">
        <meta property="og:site_name" content="{template_content['TITLE']}">
        <link rel="alternate" type="application/rss+xml" title="Cipher Bliss » Flux" href="{template_content['NDD']}/feed/">
        <link href="/style.css" rel="stylesheet">
        <meta charset="utf-8">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <title>{template_content['TITLE']}</title>
        <meta name="author" content="{template_content['AUTHOR']}">
        <link rel="alternate" type="application/rss+xml" title="{template_content['BLOG_TITLE']} » Flux"
            href="{template_content['NDD']}/feed/">
        <meta property="og:title" content="{template_content['PAGE_TITLE']}">
        <meta property="og:locale" content="{template_content['LOCALE']}">
        <!-- Description de la page -->
        <meta name="description" content="{template_content['PAGE_TITLE']}">
        <meta name="reply-to" content="{template_content['EMAIL']}">
        <link rel="icon" type="{template_content['SITE_ICON_TYPE']}" href="{template_content['SITE_ICON']}">
    </head>

    <body>
        <div id="page">
            <header id="masthead" class="site-header">
                <div class="header-image" style="background: url({template_content['BANNIERE_ENTETE']})">
                    <a href="/">
                    <img src="{template_content['SITE_ICON']}" class="site-icon img">
                    </a>
                    <h1 class="blog-title">{template_content['BLOG_TITLE']}</h1>
                    <p class="blog-subtitle">{template_content['BLOG_SUBTITLE']}</p>
                </div>
                    <nav class="navbar is-fixed-top is-dark" role="navigation" aria-label="main navigation">
                        <div class="navbar-brand">
                            <a class="navbar-item" href="{template_content['NDD']}">

                            </a>

                        </div>

                        <div id="navbarBasicExample" class="navbar-menu">
                            <div class="navbar-start">
                                <a class="navbar-item" href="{template_content['NDD']}">
                                    <img src="{template_content['SITE_ICON']}"
                                        class="img-fluid">
                                </a>
                                {template_content['NAVIGATION']}
                            </div>
                            <div class="navbar-end">
                                <div class="navbar-item">
                                    <form role="search" method="get" class="search-form" action="/">
                                        <label>
                                            <input class="search-field" placeholder="Recherche" value="" name="s"
                                                type="search">
                                        </label>
                                        <input class="is-hidden search-submit" value="Rechercher" type="submit">
                                    </form>
                                </div>
                            </div>
                        </div>
                    </nav>
            </header>
            <main class="body-wrap boxed-container">
                <article class="content">
                    {html_content}
                    <p class="after-article">
                    {after_article}
                    </p>
                </article>
            </main>
            <footer class="site-footer has-top-divider">
                <div class="container">
                    <div class="site-footer-inner">


            {template_content['NAVIGATION']}

                    </div>
                </div>
            </footer>
        </div>
    </body>
    <!-- généré avec orgmode-to-gemini-blog par Tykayn -->
</html>

"""

    html_path_enriched=os.path.join(root_path, file)
    print(' ----------- enrich_html: html_path_enriched ============> ',html_path_enriched)
    # Écrire le contenu modifié dans le fichier HTML
    with open(html_path_enriched, "w") as f:
        f.write(html_content)
        print('\n ----------- enrich_html: html écrit ', html_path_enriched)

# Parcourir tous les fichiers HTML dans le dossier
for root, _, files in os.walk(blog_name):
    # print(files)
    for file in files:
        if file.endswith(".html"):
            enrich_one_file(file, root)