orgmode-to-gemini-blog/enrich_html.py
2024-11-12 00:55:21 +01:00

198 lines
7.9 KiB
Python
Executable File

#!/bin/python3
import os
import argparse
import re
from website_config import configs_sites
parser = argparse.ArgumentParser(description="Générer un site Web à partir de fichiers HTML.")
parser.add_argument("blog_name", help="Le chemin vers le dossier contenant les fichiers HTML.")
parser.add_argument("--title", "-t", default="Mon site Web", help="Le titre du site Web.")
parser.add_argument("--style", default="templates/style_general.css", help="Le chemin vers le fichier de style CSS.")
args = parser.parse_args()
# Style CSS minimaliste
style_file = args.style
blog_name = args.blog_name.replace('html-websites/','')
source_blog = f"sources/{blog_name}"
header_content_path = f"{source_blog}/templates/converted/header_page.html"
footer_content_path = f"{source_blog}/templates/converted/footer_page.html"
static_page_path = f"{source_blog}/templates/html/static.html"
print('---------- blog name ', blog_name)
template_content = configs_sites[blog_name]
footer_content=''
after_article=''
# TODO add footer on every article
# with open(footer_content_path, "r") as f:
# footer_content = f.read()
# TODO make these variables overrided by configuration of a source website
# variables du template de page
def extract_body_content(html_content):
pattern = r'<body[^>]*?>(.*?)</body>'
match = re.search(pattern, html_content, re.DOTALL)
if match:
return match.group(1)
else:
return None
def remove_properties_section(text):
pattern = r"<h1 id=\"article\">Article</h1>.+?</ul>"
replacement = ""
return re.sub(pattern, replacement, text, flags=re.DOTALL)
def remove_article_head_properties_orgmode(text):
pattern = r":PROPERTIES:.+?:END:"
replacement = ""
return re.sub(pattern, replacement, text, flags=re.DOTALL)
def remove_hint_html(text):
pattern = r"<p>ceci<sub>estduhtml</sub></p>"
replacement = ""
return re.sub(pattern, replacement, text, flags=re.DOTALL)
def enrich_one_file(file, root_path):
print(' ----------- enrich_html: file:',os.path.join(root_path, file))
css_content = ""
inline_the_css=False
# inline_the_css=True
print(' ----------- enrich_html: CSS inline: ',inline_the_css)
# Trouver le fichier entête
header_content=''
with open(os.path.join(root_path, file), "r") as f:
header_content = f.read()
# Ouvrir le fichier HTML en mode lecture
with open(os.path.join(root_path, file), "r") as f:
html_content = f.read()
# remove some parts
html_content = remove_properties_section(html_content)
html_content = remove_article_head_properties_orgmode(html_content)
html_content = remove_hint_html(html_content)
html_content = extract_body_content(html_content)
if inline_the_css is True:
print(' ----------- enrich_html: include css inline in each html page')
with open(os.path.join(root_path, file), "r") as f:
css_content = f.read()
css_content = "<style type='text/css'>{css_content}</style>"
# remplir le template
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta property="og:image" content="{template_content['SITE_ICON']}">
<meta property="og:locale" content="{template_content['LOCALE']}">
<meta property="og:description" content="{template_content['BLOG_SUBTITLE']}">
<meta property="og:url" content="{template_content['NDD']}">
<meta property="og:site_name" content="{template_content['TITLE']}">
<link rel="alternate" type="application/rss+xml" title="Cipher Bliss » Flux" href="{template_content['NDD']}/feed/">
<link href="/style.css" rel="stylesheet">
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{template_content['TITLE']}</title>
<meta name="author" content="{template_content['AUTHOR']}">
<link rel="alternate" type="application/rss+xml" title="{template_content['BLOG_TITLE']} » Flux"
href="{template_content['NDD']}/feed/">
<meta property="og:title" content="{template_content['PAGE_TITLE']}">
<meta property="og:locale" content="{template_content['LOCALE']}">
<!-- Description de la page -->
<meta name="description" content="{template_content['PAGE_TITLE']}">
<meta name="reply-to" content="{template_content['EMAIL']}">
<link rel="icon" type="{template_content['SITE_ICON_TYPE']}" href="{template_content['SITE_ICON']}">
</head>
<body>
<div id="page">
<header id="masthead" class="site-header">
<div class="header-image" style="background: url({template_content['BANNIERE_ENTETE']})">
<a href="/">
<img src="{template_content['SITE_ICON']}" class="site-icon img">
</a>
<h1 class="blog-title">{template_content['BLOG_TITLE']}</h1>
<p class="blog-subtitle">{template_content['BLOG_SUBTITLE']}</p>
</div>
<nav class="navbar is-fixed-top is-dark" role="navigation" aria-label="main navigation">
<div class="navbar-brand">
<a class="navbar-item" href="{template_content['NDD']}">
</a>
</div>
<div id="navbarBasicExample" class="navbar-menu">
<div class="navbar-start">
<a class="navbar-item" href="{template_content['NDD']}">
<img src="{template_content['SITE_ICON']}"
class="img-fluid">
</a>
{template_content['NAVIGATION']}
</div>
<div class="navbar-end">
<div class="navbar-item">
<form role="search" method="get" class="search-form" action="/">
<label>
<input class="search-field" placeholder="Recherche" value="" name="s"
type="search">
</label>
<input class="is-hidden search-submit" value="Rechercher" type="submit">
</form>
</div>
</div>
</div>
</nav>
</header>
<main class="body-wrap boxed-container">
<article class="content">
{html_content}
<p class="after-article">
{after_article}
</p>
</article>
</main>
<footer class="site-footer has-top-divider">
<div class="container">
<div class="site-footer-inner">
{template_content['NAVIGATION']}
</div>
</div>
</footer>
</div>
</body>
<!-- généré avec orgmode-to-gemini-blog par Tykayn -->
</html>
"""
html_path_enriched=os.path.join(root_path, file)
print(' ----------- enrich_html: html_path_enriched ============> ',html_path_enriched)
# Écrire le contenu modifié dans le fichier HTML
with open(html_path_enriched, "w") as f:
f.write(html_content)
print('\n ----------- enrich_html: html écrit ', html_path_enriched)
# Parcourir tous les fichiers HTML dans le dossier
for root, _, files in os.walk(blog_name):
# print(files)
for file in files:
if file.endswith(".html"):
enrich_one_file(file, root)