orgmode-to-gemini-blog/enrich_html.py

212 lines
9.2 KiB
Python
Raw Normal View History

2024-11-03 11:42:44 +01:00
#!/bin/python3
import argparse
2024-11-14 13:32:56 +01:00
import os
2024-11-14 16:22:34 +01:00
from utils import *
2024-11-12 00:55:21 +01:00
from website_config import configs_sites
2024-11-04 00:13:58 +01:00
2024-11-03 11:42:44 +01:00
parser = argparse.ArgumentParser(description="Générer un site Web à partir de fichiers HTML.")
parser.add_argument("blog_name", help="Le chemin vers le dossier contenant les fichiers HTML.")
2024-11-14 13:32:56 +01:00
2024-11-08 18:16:36 +01:00
parser.add_argument("--style", default="templates/style_general.css", help="Le chemin vers le fichier de style CSS.")
2024-11-03 11:42:44 +01:00
args = parser.parse_args()
2024-11-03 10:29:30 +01:00
2024-11-04 00:13:58 +01:00
# Style CSS minimaliste
2024-11-03 11:42:44 +01:00
style_file = args.style
2024-11-08 18:05:08 +01:00
2024-11-14 13:32:56 +01:00
blog_name = args.blog_name.replace('html-websites/', '')
2024-11-14 16:22:34 +01:00
html_pages = 'html-websites/'+blog_name
source_blog = f"sources/{blog_name}"
2024-11-11 00:58:44 +01:00
header_content_path = f"{source_blog}/templates/converted/header_page.html"
footer_content_path = f"{source_blog}/templates/converted/footer_page.html"
2024-11-10 00:01:15 +01:00
static_page_path = f"{source_blog}/templates/html/static.html"
2024-11-14 16:22:34 +01:00
enable_header=True
mylog('---------- blog name ', blog_name)
2024-11-14 13:32:56 +01:00
template_content = configs_sites[blog_name]
after_article = ''
inline_the_css = False
# inline_the_css=True
2024-11-11 00:58:44 +01:00
2024-11-10 00:16:15 +01:00
2024-11-15 01:45:11 +01:00
def enrich_one_file(html_content: str, partials: dict = {"header_page": "", "footer_content": ""}) -> str:
"""
Enrich a single HTML file by removing certain parts, extracting the body content, and incorporating a template.
The function also includes options to inline the CSS and customize the HTML content based on the provided partials.
Parameters:
html_content (str): The original HTML content to be enriched.
partials (dict, optional): A dictionary containing partial HTML content for the header and footer. The default values are empty strings.
Returns:
str: The enriched HTML content.
"""
2024-11-11 00:02:50 +01:00
# remove some parts
2024-11-15 01:45:11 +01:00
html_content = extract_body_content(html_content)
2024-11-11 00:58:44 +01:00
html_content = remove_properties_section(html_content)
html_content = remove_article_head_properties_orgmode(html_content)
html_content = remove_hint_html(html_content)
2024-11-10 00:16:15 +01:00
2024-11-11 00:58:44 +01:00
if inline_the_css is True:
mylog(' ----------- enrich_html: include css inline in each html page')
2024-11-08 23:19:39 +01:00
with open(os.path.join(root_path, file), "r") as f:
css_content = f.read()
css_content = "<style type='text/css'>{css_content}</style>"
2024-11-14 16:22:34 +01:00
template_content["CSS_INLINE_CONTENT"] = css_content
2024-11-18 11:18:50 +01:00
template_content["PAGE_SLUG"] = find_slug_in_file_basename(file)
2024-11-11 00:02:50 +01:00
# remplir le template
2024-11-10 18:03:31 +01:00
html_content = f"""
2024-11-10 00:01:15 +01:00
2024-11-10 18:03:31 +01:00
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
2024-11-12 00:55:21 +01:00
<meta property="og:image" content="{template_content['SITE_ICON']}">
<meta property="og:locale" content="{template_content['LOCALE']}">
<meta property="og:description" content="{template_content['BLOG_SUBTITLE']}">
<meta property="og:url" content="{template_content['NDD']}">
<meta property="og:site_name" content="{template_content['TITLE']}">
2024-11-18 11:18:50 +01:00
<link rel="alternate" type="application/atom+xml" title="Cipher Bliss » Flux" href="{template_content['NDD']}/feed/">
2024-11-10 18:03:31 +01:00
<link href="/style.css" rel="stylesheet">
<script src="main_script.js"></script>
2024-11-10 18:03:31 +01:00
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
2024-11-12 00:55:21 +01:00
<title>{template_content['TITLE']}</title>
<meta name="author" content="{template_content['AUTHOR']}">
<link rel="alternate" type="application/rss+xml" title="{template_content['BLOG_TITLE']} » Flux"
href="{template_content['NDD']}/feed/">
<meta property="og:title" content="{template_content['PAGE_TITLE']}">
<meta property="og:locale" content="{template_content['LOCALE']}">
2024-11-10 18:03:31 +01:00
<!-- Description de la page -->
2024-11-12 00:55:21 +01:00
<meta name="description" content="{template_content['PAGE_TITLE']}">
<meta name="reply-to" content="{template_content['EMAIL']}">
<link rel="icon" type="{template_content['SITE_ICON_TYPE']}" href="{template_content['SITE_ICON']}">
2024-11-10 18:03:31 +01:00
</head>
<body>
2024-11-14 16:22:34 +01:00
<div id="page" class="page__{template_content["PAGE_SLUG"]}">
2024-11-10 18:03:31 +01:00
<header id="masthead" class="site-header">
2024-11-12 00:55:21 +01:00
<div class="header-image" style="background: url({template_content['BANNIERE_ENTETE']})">
2024-11-10 18:43:38 +01:00
<a href="/">
2024-11-12 00:55:21 +01:00
<img src="{template_content['SITE_ICON']}" class="site-icon img">
2024-11-10 18:43:38 +01:00
</a>
2024-11-12 00:55:21 +01:00
<h1 class="blog-title">{template_content['BLOG_TITLE']}</h1>
<p class="blog-subtitle">{template_content['BLOG_SUBTITLE']}</p>
2024-11-14 13:32:56 +01:00
<div class="template-header">
</div>
2024-11-10 18:03:31 +01:00
</div>
<nav class="navbar is-fixed-top is-dark" role="navigation" aria-label="main navigation">
<div class="navbar-brand">
2024-11-12 00:55:21 +01:00
<a class="navbar-item" href="{template_content['NDD']}">
{template_content['NDD']}
2024-11-10 18:03:31 +01:00
</a>
</div>
<div id="navbarBasicExample" class="navbar-menu">
<div class="navbar-start">
2024-11-15 23:55:20 +01:00
<a class="logo" href="{template_content['NDD']}">
2024-11-12 00:55:21 +01:00
<img src="{template_content['SITE_ICON']}"
2024-11-10 18:03:31 +01:00
class="img-fluid">
</a>
2024-11-12 00:55:21 +01:00
{template_content['NAVIGATION']}
2024-11-10 18:03:31 +01:00
</div>
<div class="navbar-end">
<div class="navbar-item">
<form role="search" method="get" class="search-form" action="/" id="recherche">
2024-11-10 18:03:31 +01:00
<label>
<input class="search-field" placeholder="Recherche" value="" name="s"
type="search">
</label>
<input class="is-hidden search-submit" value="Rechercher" type="submit">
</form>
</div>
</div>
</div>
</nav>
</header>
<main class="body-wrap boxed-container">
<article class="content">
{html_content}
2024-11-11 00:58:44 +01:00
<p class="after-article">
{after_article}
</p>
2024-11-10 18:03:31 +01:00
</article>
</main>
<footer class="site-footer has-top-divider">
<div class="container">
<div class="site-footer-inner">
2024-11-14 13:32:56 +01:00
<div class="site-foot">
</div>
<nav class="footer-nav">
{template_content['NAVIGATION']}
<a href="/tags/">Tags</a>
2024-11-18 11:18:50 +01:00
<a href=""{template_content['NDD']}/feed/">Flux Atom</a>
2024-11-14 13:32:56 +01:00
</nav>
2024-11-10 18:03:31 +01:00
</div>
</div>
</footer>
</div>
</body>
2024-11-10 18:03:31 +01:00
<!-- généré avec orgmode-to-gemini-blog par Tykayn -->
</html>
2024-11-10 00:01:15 +01:00
"""
2024-11-14 13:32:56 +01:00
# {partials['footer_page']}
# {partials['header_page']}
return html_content
def ouvrir_fichier(chemin_fichier):
if os.path.exists(chemin_fichier):
with open(chemin_fichier, 'r') as fichier:
# Faire quelque chose avec le fichier ouvert
contenu = fichier.read()
return contenu
else:
raise FileNotFoundError(f"Le fichier {chemin_fichier} n'existe pas.")
2024-11-08 18:05:08 +01:00
2024-11-14 16:22:34 +01:00
liste_fichiers_du_blog_convertis = os.walk(html_pages)
mylog('fichiers à enrichir:', liste_fichiers_du_blog_convertis)
2024-11-14 16:22:34 +01:00
count_articles=0
2024-11-14 16:22:34 +01:00
# Parcourir tous les fichiers HTML dans le dossier du blog donné
for root_path, dirs, files in liste_fichiers_du_blog_convertis :
2024-11-14 13:32:56 +01:00
# Prendre les templates partiaux pour chaque site web
partials = {
"header_content": "",
"footer_content": "",
}
2024-11-14 16:22:34 +01:00
print(len(files))
partials["header_content"] = ouvrir_fichier(os.path.join('sources',blog_name, 'templates', 'header_page.org'))
partials["footer_content"] = ouvrir_fichier(os.path.join('sources',blog_name, 'templates', 'footer_page.org'))
2024-11-14 13:32:56 +01:00
2024-11-03 10:29:30 +01:00
for file in files:
# mylog(file)
# if file == "index.html":
# template_content['no_header']=True
2024-11-14 16:22:34 +01:00
2024-11-03 10:29:30 +01:00
if file.endswith(".html"):
# mylog(' ----------- enrich_html: file:'+ os.path.join(root_path, file))
count_articles+=1
mylog(' ----------- enrich_html: CSS inline: ', inline_the_css)
2024-11-14 13:32:56 +01:00
# Ouvrir le fichier HTML en mode lecture
with open(os.path.join(root_path, file), "r") as f:
html_content = f.read()
html_content = enrich_one_file(html_content, partials)
html_path_enriched = os.path.join(root_path, file)
mylog(' ----------- enrich_html: html_path_enriched ============> ', html_path_enriched)
2024-11-14 13:32:56 +01:00
# Écrire le contenu modifié dans le fichier HTML
with open(html_path_enriched, "w") as f:
f.write(html_content)
mylog('\n ----------- enrich_html: html écrit ', html_path_enriched)
print('articles listés :',count_articles)