import os import re from collections import defaultdict # on recherche les anciens ID org roam, on les relie au titre d'un article, # et on met cet ID dans les fichiers d'export wp2org. # Chemins des dossiers previous_dir = 'sources/org-roam-export/qzine_blog' after_dir = 'sources/generated_wp2org_output/qzine' # Fonction pour lire un fichier orgmode et extraire le titre de niveau 1 et l'ID def read_org_file(file_path): with open(file_path, 'r', encoding='utf-8') as file: content = file.read() title_match = re.search(r'^\*\* (.+)', content, re.MULTILINE) id_match = re.search(r'^:PROPERTIES:\s*:ID:\s*([^\s]+)\s*:END:', content, re.MULTILINE) title = title_match.group(1) if title_match else None id_value = id_match.group(1) if id_match else None return title, id_value def read_org_file_exported(file_path): with open(file_path, 'r', encoding='utf-8') as file: content = file.read() title_match = re.search(r'^\* (.+)', content, re.MULTILINE) id_match = re.search(r'^:PROPERTIES:\s*:ID:\s*([^\s]+)\s*:END:', content, re.MULTILINE) title = title_match.group(1) if title_match else None id_value = id_match.group(1) if id_match else None return title, id_value # écrire un fichier orgmode avec un nouvel ID def write_org_file(file_path, title, new_id): with open(file_path, 'r', encoding='utf-8') as file: content = file.read() updated_content = re.sub(r'^:PROPERTIES:\s*:ID:\s*[^\s]+\s*:END:', f':PROPERTIES:\n:ID: {new_id}\n:END:', content, flags=re.MULTILINE) with open(file_path, 'w', encoding='utf-8') as file: file.write(updated_content) def find_correspondances(previous_dir, after_dir): # Extraire les informations des fichiers du dossier previous previous_files = defaultdict(dict) for file_name in os.listdir(previous_dir): if file_name.endswith('.org'): title, id_value = read_org_file(os.path.join(previous_dir, file_name)) if title and id_value: previous_files[title]['id'] = id_value previous_files[title]['file_name'] = file_name # Extraire et afficher les titres des fichiers du dossier previous for file_name in os.listdir(previous_dir): if file_name.endswith('.org'): file_path = os.path.join(previous_dir, file_name) title = read_org_file(file_path) if not title: # print(f"Fichier: {file_name}, Titre: {title}") # else: print(f"Fichier: {previous_dir}/{file_name}, Titre non trouvé") # Extraire les informations des fichiers du dossier after after_files = {} for file_name in os.listdir(after_dir): if file_name.endswith('.org'): title, _ = read_org_file_exported(os.path.join(after_dir, file_name)) if title: after_files[title] = file_name # Établir les correspondances et réécrire les fichiers matched_count = 0 unmatched_after_files = [] for title, file_info in after_files.items(): if title in previous_files: matched_count += 1 new_id = previous_files[title]['id'] write_org_file(os.path.join(after_dir, file_info), title, new_id) else: unmatched_after_files.append(file_info) # Calculer la proportion de correspondances trouvées total_after_files = len(after_files) if total_after_files > 0: match_proportion = matched_count / total_after_files * 100 else: match_proportion = 0 # Afficher les résultats print(f"matched_count: {matched_count}") print(f"Proportion de correspondances trouvées : {match_proportion:.2f}% , {len(unmatched_after_files)} fichiers non trouvés") if unmatched_after_files: print("Fichiers sans correspondance dans le dossier after :") for file_name in unmatched_after_files: print(f" {previous_dir}/{file_name}") else: print("Tous les fichiers dans le dossier after ont été mis en correspondance.") find_correspondances('sources/org-roam-export/qzine_blog', 'sources/generated_wp2org_output/qzine') find_correspondances('sources/org-roam-export/tykayn_blog', 'sources/generated_wp2org_output/tkblog') find_correspondances('sources/org-roam-export/cipherbliss_blog', 'sources/generated_wp2org_output/cipherbliss') find_correspondances('sources/org-roam-export/cil_gometz', 'sources/generated_wp2org_output/cil') find_correspondances('sources/org-roam-export/helia_blog', 'sources/generated_wp2org_output/helia')