diff --git a/build_indexes.py b/build_indexes.py index 751726b6..7293c326 100755 --- a/build_indexes.py +++ b/build_indexes.py @@ -26,17 +26,37 @@ regex_orgroam = r"^(\d{14})_([a-zA-Z0-9_-]+)\.gmi$" use_article_file_for_name=False website_name = args.source +def extract_body_content(html_content): + pattern = r'.+?' + match = re.search(pattern, html_content, re.DOTALL) + if match: + return match.group(1) + else: + return None +def extract_body_content(html_content): + pattern = r']*?>(.*?)' + match = re.search(pattern, html_content, re.DOTALL) + if match: + return match.group(1) + else: + return None -def trouver_nom_article(fichier_org): +def trouver_nom_article(fichier_org, format="html"): print('fichier_org, ',fichier_org) with open(fichier_org, 'r') as file: lignes = file.readlines() - - # Expressions régulières pour trouver les titres de niveau 1 et 2 - titre_niveau_1 = r'^\*+ (.+)$' - titre_niveau_2 = r'^\*\*+ (.+)$' - + nom_article = None + + # Expressions régulières pour trouver les titres de niveau 1 et 2 + if format == 'html': + titre_niveau_1 = r'^\

(.+)\<\/h1\>$' + titre_niveau_2 = r'^\(.+)\<\/h2\>$' + else: + titre_niveau_1 = r'^\*+ (.+)$' + titre_niveau_2 = r'^\*\*+ (.+)$' + + # Itérer sur les lignes du fichier for ligne in lignes: @@ -182,9 +202,9 @@ def generer_index(dossier_source, fichier_index, titre_index): if use_article_file_for_name: article_name = link_html else: - file_path_org = os.path.join(dossier_parent,"sources",website_name,link_org) + file_path_org = os.path.join(dossier_parent,"sources",website_name, link_org) print('-------------- trouver_nom_article ',file_path_org) - article_name=trouver_nom_article(file_path_org) + article_name=trouver_nom_article(file_path_org, 'org') if not article_name: article_name = link_html diff --git a/converters.sh b/converters.sh index b1b22751..fd9954fd 100755 --- a/converters.sh +++ b/converters.sh @@ -90,6 +90,10 @@ generate_website() { cd sources/$website_name convert_sources + cd templates + convert_sources ../ + echo "----------- convert_sources : pages template $website_name converties" + cd .. # traduction fr cd lang_fr @@ -101,9 +105,10 @@ generate_website() { cd lang_en convert_sources ../ # echo "----------- pages en anglais du site web $website_name converties" - - cd .. + + + cd .. cd .. pwd @@ -196,7 +201,7 @@ for website_name in "${blogs_folders[@]}"; do # cp sources/$website_name/converted/*.html html-websites/$website_name/ cp index_$website_name.html html-websites/$website_name/index.html -# cp sources/$website_name/lang_fr/converted/*.html html-websites/$website_name/lang_fr/ +# cp sources/$website_name/templates/converted/*.html html-websites/$website_name/templates/ # cp sources/$website_name/lang_en/converted/*.html html-websites/$website_name/lang_en/ python3 enrich_html.py html-websites/$website_name -t $website_name --style $style_file diff --git a/enrich_html.py b/enrich_html.py index f40091c4..020762c2 100755 --- a/enrich_html.py +++ b/enrich_html.py @@ -15,10 +15,15 @@ style_file = args.style blog_name = args.blog_name source_blog = f"sources/{blog_name}" -header_content_path = f"{source_blog}/templates/header_page.org" -footer_content_path = f"{source_blog}/templates/footer_page.org" +header_content_path = f"{source_blog}/templates/converted/header_page.html" +footer_content_path = f"{source_blog}/templates/converted/footer_page.html" static_page_path = f"{source_blog}/templates/html/static.html" +footer_content='' +after_article='' +# with open(footer_content_path, "r") as f: +# footer_content = f.read() + # variables du template de page BANNIERE_ENTETE='' BLOG_TITLE='Cipher Bliss' @@ -47,23 +52,14 @@ ARTICLE='' FOOTER='' + def extract_body_content(html_content): - pattern = r'.+?' + pattern = r']*?>(.*?)' match = re.search(pattern, html_content, re.DOTALL) if match: return match.group(1) else: return None -def remove_before_body(text): - pattern = r".+?" - replacement = "" - return re.sub(pattern, replacement, text, flags=re.DOTALL) - -def remove_after_body(text): - pattern = r".+?" - replacement = "" - return re.sub(pattern, replacement, text, flags=re.DOTALL) - def remove_properties_section(text): pattern = r"

Article

.+?" @@ -85,13 +81,13 @@ def remove_hint_html(text): def enrich_one_file(file, root_path): - print(' ----------- enrich html file:',os.path.join(root_path, file)) + print(' ----------- enrich_html: file:',os.path.join(root_path, file)) css_content = "" inline_the_css=False # inline_the_css=True - print(' ----------- CSSS inline: ',inline_the_css) + print(' ----------- enrich_html: CSS inline: ',inline_the_css) # Trouver le fichier entête header_content='' with open(os.path.join(root_path, file), "r") as f: @@ -101,12 +97,14 @@ def enrich_one_file(file, root_path): html_content = f.read() # remove some parts - # html_content = remove_properties_section(html_content) - # html_content = remove_article_head_properties_orgmode(html_content) - # html_content = remove_hint_html(html_content) + html_content = remove_properties_section(html_content) + html_content = remove_article_head_properties_orgmode(html_content) + html_content = remove_hint_html(html_content) - if inline_the_css == True: - print(' ----------- include css inline in each html page') + html_content = extract_body_content(html_content) + + if inline_the_css is True: + print(' ----------- enrich_html: include css inline in each html page') with open(os.path.join(root_path, file), "r") as f: css_content = f.read() css_content = "" @@ -124,12 +122,9 @@ def enrich_one_file(file, root_path): - + - - {TITLE} @@ -186,6 +181,9 @@ def enrich_one_file(file, root_path):
{html_content} +

+ {after_article} +