orgmode-to-gemini-blog/utils.py
2024-11-15 01:45:11 +01:00

57 lines
1.5 KiB
Python

#!/bin/python3
import re
# this path should be customized
org_roam_dir: str = '/home/tykayn/Nextcloud/textes/orgmode/org-roam/'
pattern_roam_id_search = r':ID:(?:\s+)?([a-zA-Z0-9-]+)'
def get_id_of_roam_note_content(content):
match = re.search(pattern_roam_id_search, content)
if match:
return match.group(1)
return None
def find_first_level1_title(content):
pattern = r'^\* (.+)$'
match = re.search(pattern, content, re.MULTILINE)
if match:
if match.group(1) != 'Article':
return match.group(1)
else:
pattern = r'^\*\* (.+)$'
match = re.search(pattern, content, re.MULTILINE)
if match:
return match.group(1)
return None
def extract_body_content(html_content):
pattern = r'<body[^>]*?>(.*?)</body>'
match = re.search(pattern, html_content, re.DOTALL)
if match:
return match.group(1)
else:
print('---- extract_body_content : no body found in this html')
return html_content
def remove_properties_section(text):
pattern = r"<h1 id=\"article\">Article</h1>.+?</ul>"
replacement = ""
return re.sub(pattern, replacement, text, flags=re.DOTALL)
def remove_article_head_properties_orgmode(text):
pattern = r":PROPERTIES:.+?:END:"
replacement = ""
return re.sub(pattern, replacement, text, flags=re.DOTALL)
def remove_hint_html(text):
pattern = r"<p>ceci<sub>estduhtml</sub></p>"
replacement = ""
return re.sub(pattern, replacement, text, flags=re.DOTALL)