orgmode-to-gemini-blog/enrich_html.py

214 lines
9.3 KiB
Python
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/python3
import argparse
import os
from utils import *
from website_config import configs_sites
parser = argparse.ArgumentParser(description="Générer un site Web à partir de fichiers HTML.")
parser.add_argument("blog_name", help="Le chemin vers le dossier contenant les fichiers HTML.")
parser.add_argument("--style", default="templates/style_general.css", help="Le chemin vers le fichier de style CSS.")
args = parser.parse_args()
# Style CSS minimaliste
style_file = args.style
blog_name = args.blog_name.replace('html-websites/', '')
html_pages = 'html-websites/'+blog_name
source_blog = f"sources/{blog_name}"
header_content_path = f"{source_blog}/templates/converted/header_page.html"
footer_content_path = f"{source_blog}/templates/converted/footer_page.html"
static_page_path = f"{source_blog}/templates/html/static.html"
enable_header=True
mylog('---------- blog name ', blog_name)
template_content = configs_sites[blog_name]
after_article = ''
inline_the_css = False
# inline_the_css=True
def enrich_one_file(html_content: str, partials: dict = {"header_page": "", "footer_content": ""}) -> str:
"""
Enrich a single HTML file by removing certain parts, extracting the body content, and incorporating a template.
The function also includes options to inline the CSS and customize the HTML content based on the provided partials.
Parameters:
html_content (str): The original HTML content to be enriched.
partials (dict, optional): A dictionary containing partial HTML content for the header and footer. The default values are empty strings.
Returns:
str: The enriched HTML content.
"""
# remove some parts
html_content = extract_body_content(html_content)
html_content = remove_properties_section(html_content)
html_content = remove_article_head_properties_orgmode(html_content)
html_content = remove_hint_html(html_content)
if inline_the_css is True:
mylog(' ----------- enrich_html: include css inline in each html page')
with open(os.path.join(root_path, file), "r") as f:
css_content = f.read()
css_content = "<style type='text/css'>{css_content}</style>"
template_content["CSS_INLINE_CONTENT"] = css_content
template_content["PAGE_SLUG"] = find_slug_in_file_basename(file)
# remplir le template
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta property="og:image" content="{template_content['SITE_ICON']}">
<meta property="og:locale" content="{template_content['LOCALE']}">
<meta property="og:description" content="{template_content['BLOG_SUBTITLE']}">
<meta property="og:url" content="{template_content['NDD']}">
<meta property="og:site_name" content="{template_content['TITLE']}">
<link rel="alternate" type="application/atom+xml" title="Cipher Bliss » Flux" href="{template_content['NDD']}/feed/">
<link href="/style.css" rel="stylesheet">
<script src="main_script.js"></script>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{template_content['TITLE']}</title>
<meta name="author" content="{template_content['AUTHOR']}">
<link rel="alternate" type="application/rss+xml" title="{template_content['BLOG_TITLE']} » Flux"
href="{template_content['NDD']}/feed/">
<meta property="og:title" content="{template_content['PAGE_TITLE']}">
<meta property="og:locale" content="{template_content['LOCALE']}">
<!-- Description de la page -->
<meta name="description" content="{template_content['PAGE_TITLE']}">
<meta name="reply-to" content="{template_content['EMAIL']}">
<link rel="icon" type="{template_content['SITE_ICON_TYPE']}" href="{template_content['SITE_ICON']}">
</head>
<body>
<div id="page" class="page__{template_content["PAGE_SLUG"]}">
<header id="masthead" class="site-header">
<div class="header-image" style="background: url('{template_content['BANNIERE_ENTETE']}') no-repeat;
background-size: cover;">
<a href="/">
<img src="{template_content['SITE_ICON']}" class="site-icon img">
</a>
<h1 class="blog-title">{template_content['BLOG_TITLE']}</h1>
<p class="blog-subtitle">{template_content['BLOG_SUBTITLE']}</p>
<div class="template-header">
</div>
</div>
<nav class="navbar is-fixed-top is-dark" role="navigation" aria-label="main navigation">
<div class="navbar-brand">
<a class="navbar-item" href="{template_content['NDD']}">
{template_content['NDD']}
</a>
</div>
<div id="navbarBasicExample" class="navbar-menu">
<div class="navbar-start">
<a class="logo" href="{template_content['NDD']}">
<img src="{template_content['SITE_ICON']}"
class="img-fluid">
</a>
{template_content['NAVIGATION']}
</div>
<div class="navbar-end">
<div class="navbar-item">
<form role="search" method="get" class="search-form" action="/" id="recherche">
<label>
<input class="search-field" placeholder="Recherche" value="" name="s"
type="search">
</label>
<input class="is-hidden search-submit" value="Rechercher" type="submit">
</form>
</div>
</div>
</div>
</nav>
</header>
<main class="body-wrap boxed-container">
<article class="content">
{html_content}
<p class="after-article">
{after_article}
</p>
</article>
</main>
<footer class="site-footer has-top-divider">
<div class="container">
<div class="site-footer-inner">
<div class="site-foot">
</div>
<nav class="footer-nav">
{template_content['NAVIGATION']}
<a href="/tags/">Tags</a>
<a href="{template_content['NDD']}/feed/">Flux Atom</a>
</nav>
</div>
</div>
</footer>
</div>
</body>
<!-- généré avec orgmode-to-gemini-blog par Tykayn -->
</html>
"""
# {partials['footer_page']}
# {partials['header_page']}
return html_content
def ouvrir_fichier(chemin_fichier):
if os.path.exists(chemin_fichier):
with open(chemin_fichier, 'r') as fichier:
# Faire quelque chose avec le fichier ouvert
contenu = fichier.read()
return contenu
else:
print(f"Le fichier {chemin_fichier} n'existe pas.")
return ''
liste_fichiers_du_blog_convertis = os.walk(html_pages)
count_articles=0
# Parcourir tous les fichiers HTML dans le dossier du blog donné
for root_path, dirs, files in liste_fichiers_du_blog_convertis :
mylog('fichiers à enrichir:', len(files))
# Prendre les templates partiaux pour chaque site web
partials = {
"header_content": "",
"footer_content": "",
}
partials["header_content"] = ouvrir_fichier(os.path.join('sources',blog_name, 'templates', 'header_page.org'))
partials["footer_content"] = ouvrir_fichier(os.path.join('sources',blog_name, 'templates', 'footer_page.org'))
for file in files:
# mylog(file)
# if file == "index.html":
# template_content['no_header']=True
if file.endswith(".html"):
# mylog(' ----------- enrich_html: file:'+ os.path.join(root_path, file))
count_articles+=1
# mylog(' ----------- enrich_html: CSS inline: ', inline_the_css)
# Ouvrir le fichier HTML en mode lecture
with open(os.path.join(root_path, file), "r") as f:
html_content = f.read()
html_content = enrich_one_file(html_content, partials)
html_path_enriched = os.path.join(root_path, file)
# mylog(' ----------- enrich_html: html_path_enriched ============> ', html_path_enriched)
# Écrire le contenu modifié dans le fichier HTML
with open(html_path_enriched, "w") as f:
f.write(html_content)
# mylog('\n ----------- enrich_html: html écrit ', html_path_enriched)
# traduire en html le fichier
print('articles listés :',count_articles)