57 lines
1.5 KiB
Python
57 lines
1.5 KiB
Python
#!/bin/python3
|
|
import re
|
|
|
|
# this path should be customized
|
|
org_roam_dir: str = '/home/tykayn/Nextcloud/textes/orgmode/org-roam/'
|
|
|
|
pattern_roam_id_search = r':ID:(?:\s+)?([a-zA-Z0-9-]+)'
|
|
|
|
|
|
def get_id_of_roam_note_content(content):
|
|
match = re.search(pattern_roam_id_search, content)
|
|
if match:
|
|
return match.group(1)
|
|
return None
|
|
|
|
|
|
def find_first_level1_title(content):
|
|
pattern = r'^\* (.+)$'
|
|
match = re.search(pattern, content, re.MULTILINE)
|
|
if match:
|
|
if match.group(1) != 'Article':
|
|
return match.group(1)
|
|
else:
|
|
pattern = r'^\*\* (.+)$'
|
|
match = re.search(pattern, content, re.MULTILINE)
|
|
if match:
|
|
return match.group(1)
|
|
return None
|
|
|
|
|
|
def extract_body_content(html_content):
|
|
pattern = r'<body[^>]*?>(.*?)</body>'
|
|
match = re.search(pattern, html_content, re.DOTALL)
|
|
if match:
|
|
return match.group(1)
|
|
else:
|
|
print('---- extract_body_content : no body found in this html')
|
|
return html_content
|
|
|
|
|
|
def remove_properties_section(text):
|
|
pattern = r"<h1 id=\"article\">Article</h1>.+?</ul>"
|
|
replacement = ""
|
|
return re.sub(pattern, replacement, text, flags=re.DOTALL)
|
|
|
|
|
|
def remove_article_head_properties_orgmode(text):
|
|
pattern = r":PROPERTIES:.+?:END:"
|
|
replacement = ""
|
|
return re.sub(pattern, replacement, text, flags=re.DOTALL)
|
|
|
|
|
|
def remove_hint_html(text):
|
|
pattern = r"<p>ceci<sub>estduhtml</sub></p>"
|
|
replacement = ""
|
|
return re.sub(pattern, replacement, text, flags=re.DOTALL)
|