orgmode-to-gemini-blog/enrich_html.py

#!/bin/python3
import argparse
import os
from utils import *
from website_config import configs_sites

parser = argparse.ArgumentParser(description="Générer un site Web à partir de fichiers HTML.")
parser.add_argument("blog_name", help="Le chemin vers le dossier contenant les fichiers HTML.")

parser.add_argument("--style", default="templates/style_general.css", help="Le chemin vers le fichier de style CSS.")
args = parser.parse_args()

# Style CSS minimaliste
style_file = args.style

blog_name = args.blog_name.replace('html-websites/', '')
html_pages =  'html-websites/'+blog_name
source_blog = f"sources/{blog_name}"
header_content_path = f"{source_blog}/templates/converted/header_page.html"
footer_content_path = f"{source_blog}/templates/converted/footer_page.html"
static_page_path = f"{source_blog}/templates/html/static.html"

enable_header=True

mylog('---------- blog name ', blog_name)
template_content = configs_sites[blog_name]

after_article = ''

inline_the_css = False
# inline_the_css=True


def enrich_one_file(html_content: str, partials: dict = {"header_page": "", "footer_content": ""}) -> str:
    """
        Enrich a single HTML file by removing certain parts, extracting the body content, and incorporating a template.
        The function also includes options to inline the CSS and customize the HTML content based on the provided partials.

        Parameters:
        html_content (str): The original HTML content to be enriched.
        partials (dict, optional): A dictionary containing partial HTML content for the header and footer. The default values are empty strings.

        Returns:
        str: The enriched HTML content.
        """
    # remove some parts
    html_content = extract_body_content(html_content)
    html_content = remove_properties_section(html_content)
    html_content = remove_article_head_properties_orgmode(html_content)
    html_content = remove_hint_html(html_content)


    if inline_the_css is True:
        mylog(' ----------- enrich_html: include css inline in each html page')
        with open(os.path.join(root_path, file), "r") as f:
            css_content = f.read()
            css_content = "<style type='text/css'>{css_content}</style>"
            template_content["CSS_INLINE_CONTENT"] = css_content
    template_content["PAGE_SLUG"] = find_slug_in_file_basename(file)
    # remplir le template
    html_content = f"""

    <!DOCTYPE html>
<html>
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <meta property="og:image" content="{template_content['SITE_ICON']}">
        <meta property="og:locale" content="{template_content['LOCALE']}">
        <meta property="og:description" content="{template_content['BLOG_SUBTITLE']}">
        <meta property="og:url" content="{template_content['NDD']}">
        <meta property="og:site_name" content="{template_content['TITLE']}">
        <link rel="alternate" type="application/atom+xml" title="Cipher Bliss » Flux" href="{template_content['NDD']}/feed/">
        <link href="/style.css" rel="stylesheet">
        <script src="main_script.js"></script>
        <meta charset="utf-8">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <title>{template_content['TITLE']}</title>
        <meta name="author" content="{template_content['AUTHOR']}">
        <link rel="alternate" type="application/rss+xml" title="{template_content['BLOG_TITLE']} » Flux"
            href="{template_content['NDD']}/feed/">
        <meta property="og:title" content="{template_content['PAGE_TITLE']}">
        <meta property="og:locale" content="{template_content['LOCALE']}">
        <!-- Description de la page -->
        <meta name="description" content="{template_content['PAGE_TITLE']}">
        <meta name="reply-to" content="{template_content['EMAIL']}">
        <link rel="icon" type="{template_content['SITE_ICON_TYPE']}" href="{template_content['SITE_ICON']}">
    </head>

    <body>
        <div id="page" class="page__{template_content["PAGE_SLUG"]}">
            <header id="masthead" class="site-header">
                <div class="header-image" style="background: url('{template_content['BANNIERE_ENTETE']}') no-repeat;
                    background-size: cover;">
                    <a href="/">
                    <img src="{template_content['SITE_ICON']}" class="site-icon img">
                    </a>
                    <h1 class="blog-title">{template_content['BLOG_TITLE']}</h1>
                    <p class="blog-subtitle">{template_content['BLOG_SUBTITLE']}</p>
                    <div class="template-header">

                    </div>
                </div>
                    <nav class="navbar is-fixed-top is-dark" role="navigation" aria-label="main navigation">
                        <div class="navbar-brand">
                            <a class="navbar-item" href="{template_content['NDD']}">
                                {template_content['NDD']}
                            </a>
                        </div>

                        <div id="navbarBasicExample" class="navbar-menu">
                            <div class="navbar-start">
                                <a class="logo" href="{template_content['NDD']}">
                                    <img src="{template_content['SITE_ICON']}"
                                        class="img-fluid">
                                </a>
                                {template_content['NAVIGATION']}
                            </div>
                            <div class="navbar-end">
                                <div class="navbar-item">
                                    <form role="search" method="get" class="search-form" action="/" id="recherche">
                                        <label>
                                            <input class="search-field" placeholder="Recherche" value="" name="s"
                                                type="search">
                                        </label>
                                        <input class="is-hidden search-submit" value="Rechercher" type="submit">
                                    </form>
                                </div>
                            </div>
                        </div>
                    </nav>
            </header>
            <main class="body-wrap boxed-container">
                <article class="content">
                    {html_content}
                    <p class="after-article">
                    {after_article}
                    </p>
                </article>
            </main>
            <footer class="site-footer has-top-divider">
                <div class="container">
                    <div class="site-footer-inner">
                        <div class="site-foot">
                        </div>
                        <nav class="footer-nav">
                            {template_content['NAVIGATION']}
                            <a href="/tags/">Tags</a>
                            <a href="{template_content['NDD']}/feed/">Flux Atom</a>
                        </nav>
                    </div>
                </div>
            </footer>
        </div>
    </body>
    <!-- généré avec orgmode-to-gemini-blog par Tykayn -->
</html>

"""
    # {partials['footer_page']}
    # {partials['header_page']}
    return html_content


def ouvrir_fichier(chemin_fichier):
    if os.path.exists(chemin_fichier):
        with open(chemin_fichier, 'r') as fichier:
            # Faire quelque chose avec le fichier ouvert
            contenu = fichier.read()
            return contenu
    else:
        print(f"Le fichier {chemin_fichier} n'existe pas.")
        return ''

liste_fichiers_du_blog_convertis = os.walk(html_pages)

count_articles=0
# Parcourir tous les fichiers HTML dans le dossier du blog donné
for root_path, dirs, files in liste_fichiers_du_blog_convertis :
    mylog('fichiers à enrichir:', len(files))

    # Prendre les templates partiaux pour chaque site web
    partials = {
        "header_content": "",
        "footer_content": "",
    }
    partials["header_content"] = ouvrir_fichier(os.path.join('sources',blog_name, 'templates', 'header_page.org'))
    partials["footer_content"] = ouvrir_fichier(os.path.join('sources',blog_name, 'templates', 'footer_page.org'))

    for file in files:
        # mylog(file)
        # if file == "index.html":
        #     template_content['no_header']=True

        if file.endswith(".html"):
            # mylog(' ----------- enrich_html: file:'+ os.path.join(root_path, file))
            count_articles+=1
#             mylog(' ----------- enrich_html: CSS inline: ', inline_the_css)

            # Ouvrir le fichier HTML en mode lecture
            with open(os.path.join(root_path, file), "r") as f:
                html_content = f.read()

            html_content = enrich_one_file(html_content, partials)
            html_path_enriched = os.path.join(root_path, file)
#             mylog(' ----------- enrich_html: html_path_enriched ============> ', html_path_enriched)
            # Écrire le contenu modifié dans le fichier HTML
            with open(html_path_enriched, "w") as f:
                f.write(html_content)
#                 mylog('\n ----------- enrich_html: html écrit ', html_path_enriched)

# traduire en html le fichier

print('articles listés :',count_articles)