#!/bin/python3 import re # this path should be customized org_roam_dir: str = '/home/tykayn/Nextcloud/textes/orgmode/org-roam/' pattern_roam_id_search = r':ID:(?:\s+)?([a-zA-Z0-9-]+)' def get_id_of_roam_note_content(content): match = re.search(pattern_roam_id_search, content) if match: return match.group(1) return None def find_first_level1_title(content): pattern = r'^\* (.+)$' match = re.search(pattern, content, re.MULTILINE) if match: if match.group(1) != 'Article': return match.group(1) else: pattern = r'^\*\* (.+)$' match = re.search(pattern, content, re.MULTILINE) if match: return match.group(1) return None def extract_body_content(html_content): pattern = r']*?>(.*?)' match = re.search(pattern, html_content, re.DOTALL) if match: return match.group(1) else: print('---- extract_body_content : no body found in this html') return html_content def remove_properties_section(text): pattern = r"

Article

.+?" replacement = "" return re.sub(pattern, replacement, text, flags=re.DOTALL) def remove_article_head_properties_orgmode(text): pattern = r":PROPERTIES:.+?:END:" replacement = "" return re.sub(pattern, replacement, text, flags=re.DOTALL) def remove_hint_html(text): pattern = r"

ceci_estduhtml

" replacement = "" return re.sub(pattern, replacement, text, flags=re.DOTALL)