diff --git a/.gitignore b/.gitignore index bb9696ed..c8f6ad20 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ sources/*/lang_*/converted sources/**/*.html index_*.html index_*.gmi -__pycache__ \ No newline at end of file +__pycache__ +output/pictures +html-websites/* \ No newline at end of file diff --git a/__pycache__/enrich_html.cpython-312.pyc b/__pycache__/enrich_html.cpython-312.pyc index 7111203c..59537439 100644 Binary files a/__pycache__/enrich_html.cpython-312.pyc and b/__pycache__/enrich_html.cpython-312.pyc differ diff --git a/build_indexes.py b/build_indexes.py index 3bc024ae..a69341ae 100755 --- a/build_indexes.py +++ b/build_indexes.py @@ -191,11 +191,11 @@ def generer_index(dossier_source, fichier_index): # déplacer le fichier html dans le dossier slug, # et le renommer en index.html ensuite pour ne pas modifier l'index du blog - contenu_index_html += f"
{année} {article_name}" + contenu_index_html += f"
{année} {article_name}" os.makedirs(os.path.dirname(new_folder_path_this_article), exist_ok=True) shutil.copy(chemin_fichier_this_article_html, new_folder_path_this_article + 'index.html') else: - contenu_index_html += f"
{link_html}" + contenu_index_html += f"
{link_html}" contenu_index_html += "
" contenu_index_html += "

Navigation

" @@ -223,7 +223,7 @@ def generer_index(dossier_source, fichier_index): else: article_name = 'Index' article_name = article_name.replace('_', ' ') - contenu_index_html += f"
{article_name}" + contenu_index_html += f"
{article_name}" # ---------------- pareil en anglais TODO # contenu_index_gmi += "\n# Articles in English\n-------------------------\n" # contenu_index_html += "

Articles in English

" diff --git a/enrich_html.py b/enrich_html.py index cc4b1942..1d7f0101 100755 --- a/enrich_html.py +++ b/enrich_html.py @@ -31,13 +31,24 @@ inline_the_css = False # inline_the_css=True -def enrich_one_file(html_content, partials={"header_page": "", "footer_content": ""}): +def enrich_one_file(html_content: str, partials: dict = {"header_page": "", "footer_content": ""}) -> str: + """ + Enrich a single HTML file by removing certain parts, extracting the body content, and incorporating a template. + The function also includes options to inline the CSS and customize the HTML content based on the provided partials. + + Parameters: + html_content (str): The original HTML content to be enriched. + partials (dict, optional): A dictionary containing partial HTML content for the header and footer. The default values are empty strings. + + Returns: + str: The enriched HTML content. + """ # remove some parts + html_content = extract_body_content(html_content) html_content = remove_properties_section(html_content) html_content = remove_article_head_properties_orgmode(html_content) html_content = remove_hint_html(html_content) - html_content = extract_body_content(html_content) if inline_the_css is True: print(' ----------- enrich_html: include css inline in each html page') diff --git a/gather_tags_in_json.py b/gather_tags_in_json.py index 8c020324..02d26429 100644 --- a/gather_tags_in_json.py +++ b/gather_tags_in_json.py @@ -1,27 +1,42 @@ -import os +import argparse import json +import os from collections import defaultdict +parser = argparse.ArgumentParser(description="Générer un site Web à partir de fichiers HTML.") +parser.add_argument("blog_name", help="Le chemin vers le dossier contenant les fichiers HTML.") + +args = parser.parse_args() + # Configuration -blog_folder = 'tykayn_blog' -directory = f'sources/{blog_folder}/lang_fr' # Remplacez par le chemin de votre dossier +blog_folder = args.blog_name +directory_base = f'sources/{blog_folder}' # Remplacez par le chemin de votre dossier +directory_fr = f'{directory_base}/lang_fr' # Remplacez par le chemin de votre dossier output_file = f'sources/{blog_folder}/converted/tags.json' # Fichier de sortie html_output_folder = f'html-websites/{blog_folder}/tags' # Dossier de sortie pour les fichiers HTML excluded_tags = {'PROPERTIES', 'CREATED', 'ID', 'END'} +count_orgfiles = 0 + def find_org_files(directory): org_files = [] - for root, dirs, files in os.walk(directory): - for file in files: - if file.endswith('.org'): - org_files.append(os.path.join(root, file)) + directories_to_scan = [directory, f'{directory}/lang_fr', f'{directory}/lang_en'] + + for directory in directories_to_scan: + for root, _, files in os.walk(directory): + for file in files: + if file.endswith('.org'): + org_files.append(os.path.join(root, file)) + + print(f"nombre de fichiers org: {len(org_files)}") return org_files -def extract_tags_from_file(file_path, excluded_tags): +def extract_tags_from_file(file_path, excluded_tags, count_not_tagged_files=0): tags = set() with open(file_path, 'r', encoding='utf-8') as file: + tag_found = False for line in file: # Check for orgmode tags :tag1:tag2: if ':' in line: @@ -30,19 +45,24 @@ def extract_tags_from_file(file_path, excluded_tags): tag = word[1:-1] if tag not in excluded_tags: tags.add(tag) + tag_found = True # Check for #+tags: tag1,tag2 if line.startswith('#+tags:'): for tag in line[len('#+tags:'):].split(','): tag = tag.strip() if tag and tag not in excluded_tags: tags.add(tag) + tag_found = True + if not tag_found: + count_not_tagged_files = count_not_tagged_files + 1 + print('no tag in the article', file_path) return tags -def group_files_by_tags(org_files, excluded_tags): +def group_files_by_tags(org_files, excluded_tags, count_not_tagged_files): tag_to_files = defaultdict(set) for file_path in org_files: - tags = extract_tags_from_file(file_path, excluded_tags) + tags = extract_tags_from_file(file_path, excluded_tags, count_not_tagged_files) for tag in tags: tag_to_files[tag].add(file_path) return tag_to_files @@ -71,7 +91,7 @@ def generate_html_pages(tag_to_files, html_output_folder):