#!/bin/python3 import os import argparse import re parser = argparse.ArgumentParser(description="Générer un site Web à partir de fichiers HTML.") parser.add_argument("blog_name", help="Le chemin vers le dossier contenant les fichiers HTML.") parser.add_argument("--title", "-t", default="Mon site Web", help="Le titre du site Web.") parser.add_argument("--style", default="templates/style_general.css", help="Le chemin vers le fichier de style CSS.") args = parser.parse_args() # Style CSS minimaliste style_file = args.style blog_name = args.blog_name source_blog = f"sources/{blog_name}" header_content_path = f"{source_blog}/templates/converted/header_page.html" footer_content_path = f"{source_blog}/templates/converted/footer_page.html" static_page_path = f"{source_blog}/templates/html/static.html" footer_content='' after_article='' # TODO add footer on every article # with open(footer_content_path, "r") as f: # footer_content = f.read() # TODO make these variables overrided by configuration of a source website # variables du template de page BANNIERE_ENTETE='' BLOG_TITLE='Cipher Bliss' BLOG_SUBTITLE='Code, nouvelles technologies et entrepreneurariat par B. Lemoine' TITLE='' AUTHOR='' PAGE_TITLE='' LOCALE='fr_FR' DESCRIPTION='' NDD='https://www.cipherbliss.com' EMAIL='contact@cipherbliss.com' SITE_ICON='https://www.cipherbliss.com/wp-content/uploads/2016/12/rond.png' SITE_ICON_TYPE='image/png' NAVIGATION=""" """ BANNIERE_ENTETE='https://www.cipherbliss.com/wp-content/uploads/2016/11/bg.jpg' BANNIERE_ENTETE_ALT='bannière du site' ARTICLE='' FOOTER='' def extract_body_content(html_content): pattern = r']*?>(.*?)' match = re.search(pattern, html_content, re.DOTALL) if match: return match.group(1) else: return None def remove_properties_section(text): pattern = r"

Article

.+?" replacement = "" return re.sub(pattern, replacement, text, flags=re.DOTALL) def remove_article_head_properties_orgmode(text): pattern = r":PROPERTIES:.+?:END:" replacement = "" return re.sub(pattern, replacement, text, flags=re.DOTALL) def remove_hint_html(text): pattern = r"

ceciestduhtml

" replacement = "" return re.sub(pattern, replacement, text, flags=re.DOTALL) def enrich_one_file(file, root_path): print(' ----------- enrich_html: file:',os.path.join(root_path, file)) css_content = "" inline_the_css=False # inline_the_css=True print(' ----------- enrich_html: CSS inline: ',inline_the_css) # Trouver le fichier entête header_content='' with open(os.path.join(root_path, file), "r") as f: header_content = f.read() # Ouvrir le fichier HTML en mode lecture with open(os.path.join(root_path, file), "r") as f: html_content = f.read() # remove some parts html_content = remove_properties_section(html_content) html_content = remove_article_head_properties_orgmode(html_content) html_content = remove_hint_html(html_content) html_content = extract_body_content(html_content) if inline_the_css is True: print(' ----------- enrich_html: include css inline in each html page') with open(os.path.join(root_path, file), "r") as f: css_content = f.read() css_content = "" # remplir le template html_content = f""" {TITLE}
{html_content}

{after_article}

""" html_path_enriched=os.path.join(root_path, file) print(' ----------- enrich_html: html_path_enriched ============> ',html_path_enriched) # Écrire le contenu modifié dans le fichier HTML with open(html_path_enriched, "w") as f: f.write(html_content) print('\n ----------- enrich_html: html écrit ', html_path_enriched) # Parcourir tous les fichiers HTML dans le dossier for root, _, files in os.walk(blog_name): # print(files) for file in files: if file.endswith(".html"): enrich_one_file(file, root)