orgmode-to-gemini-blog/enrich_html.py

125 lines
4.0 KiB
Python
Raw Normal View History

2024-11-03 11:42:44 +01:00
#!/bin/python3
2024-11-03 10:29:30 +01:00
import os
2024-11-03 11:42:44 +01:00
import argparse
2024-11-10 00:16:15 +01:00
import re
2024-11-04 00:13:58 +01:00
2024-11-03 11:42:44 +01:00
parser = argparse.ArgumentParser(description="Générer un site Web à partir de fichiers HTML.")
parser.add_argument("blog_name", help="Le chemin vers le dossier contenant les fichiers HTML.")
2024-11-03 11:42:44 +01:00
parser.add_argument("--title", "-t", default="Mon site Web", help="Le titre du site Web.")
2024-11-08 18:16:36 +01:00
parser.add_argument("--style", default="templates/style_general.css", help="Le chemin vers le fichier de style CSS.")
2024-11-03 11:42:44 +01:00
args = parser.parse_args()
2024-11-03 10:29:30 +01:00
2024-11-04 00:13:58 +01:00
# Style CSS minimaliste
2024-11-03 11:42:44 +01:00
style_file = args.style
2024-11-08 18:05:08 +01:00
blog_name = args.blog_name
source_blog = f"sources/{blog_name}"
header_content_path = f"{source_blog}/templates/header_page.org"
footer_content_path = f"{source_blog}/templates/header_page.org"
2024-11-10 00:01:15 +01:00
static_page_path = f"{source_blog}/templates/html/static.html"
# variables du template de page
BANNIERE_ENTETE=''
BLOG_TITLE='Cipher Bliss'
BLOG_SUBTITLE='Code, nouvelles technologies et entrepreneurariat par B. Lemoine'
TITLE=''
AUTHOR=''
PAGE_TITLE=''
LOCALE=''
DESCRIPTION=''
EMAIL='contact@cipherbliss.com'
SITE_ICON='https://www.cipherbliss.com/wp-content/uploads/2016/12/rond.png'
SITE_ICON='image/png'
NAVIGATION='<nav><a href="/">Accueil</a><a href="https://portfolio.cipherbliss.com">Portfolio</a><a href="/feed">Flux RSS</a><a href="/contact">Contact</a><a href="/ressources-de-café-vie-privée">Ressources</a></nav>'
BANNIERE_ENTETE='https://www.cipherbliss.com/wp-content/uploads/2016/11/bg.jpg'
BANNIERE_ENTETE_ALT='bannière du site'
ARTICLE=''
FOOTER=''
2024-11-03 11:42:44 +01:00
2024-11-10 00:16:15 +01:00
def remove_properties_section(text):
pattern = r"<h1 id=\"article\">Article</h1>.+?</ul>"
replacement = ""
return re.sub(pattern, replacement, text, flags=re.DOTALL)
def remove_article_head_properties_orgmode(text):
pattern = r":PROPERTIES:.+?:END:"
replacement = ""
return re.sub(pattern, replacement, text, flags=re.DOTALL)
def remove_hint_html(text):
pattern = r"<p>ceci<sub>estduhtml</sub></p>"
replacement = ""
return re.sub(pattern, replacement, text, flags=re.DOTALL)
2024-11-08 18:05:08 +01:00
2024-11-08 23:19:39 +01:00
def enrich_one_file(file, root_path):
2024-11-10 00:01:15 +01:00
print(' ----------- enrich html file:',os.path.join(root_path, file))
css_content = ""
inline_the_css=False
# inline_the_css=True
print(' ----------- CSSS inline: ',inline_the_css)
# Trouver le fichier entête
header_content=''
with open(os.path.join(root_path, file), "r") as f:
header_content = f.read()
2024-11-08 18:05:08 +01:00
# Ouvrir le fichier HTML en mode lecture
2024-11-08 23:19:39 +01:00
with open(os.path.join(root_path, file), "r") as f:
2024-11-08 18:05:08 +01:00
html_content = f.read()
2024-11-10 00:16:15 +01:00
html_content = remove_properties_section(html_content)
html_content = remove_article_head_properties_orgmode(html_content)
html_content = remove_hint_html(html_content)
2024-11-08 23:19:39 +01:00
if inline_the_css:
print(' ----------- include css inline in each html page')
2024-11-08 23:19:39 +01:00
with open(os.path.join(root_path, file), "r") as f:
css_content = f.read()
css_content = "<style type='text/css'>{css_content}</style>"
2024-11-08 18:05:08 +01:00
# Ajouter la déclaration de charset UTF-8, le doctype HTML et le titre du site Web
html_content = f"""<!DOCTYPE html>
2024-11-10 00:01:15 +01:00
<!-- généré avec orgmode-to-gemini-blog par Tykayn -->
<html lang=\"fr\">
<head>
<meta charset=\"UTF-8\">
2024-11-10 00:01:15 +01:00
<title>{BLOG_TITLE}</title>
{css_content}
<link rel=\"stylesheet\" href=\"/style.css\"></link>
</head>
<body>
2024-11-10 00:01:15 +01:00
<header>
<h1>{BLOG_TITLE}</h1>
<p>{BLOG_SUBTITLE}</p>
{NAVIGATION}
</header>
<hr/>
<main>
{html_content}
</main>
<hr/>
<footer>
{NAVIGATION}
</footer>
</body>
2024-11-10 00:01:15 +01:00
</html>
"""
2024-11-08 18:05:08 +01:00
html_path_enriched=os.path.join(root_path, file)
2024-11-08 18:05:08 +01:00
# Écrire le contenu modifié dans le fichier HTML
with open(html_path_enriched, "w") as f:
2024-11-08 18:05:08 +01:00
f.write(html_content)
print('\n ----------- html écrit ', html_path_enriched)
2024-11-03 10:29:30 +01:00
# Parcourir tous les fichiers HTML dans le dossier
for root, _, files in os.walk(blog_name):
# print(files)
2024-11-03 10:29:30 +01:00
for file in files:
if file.endswith(".html"):
2024-11-08 23:19:39 +01:00
enrich_one_file(file, root)