orgmode-to-gemini-blog/utils.py

57 lines
1.5 KiB
Python
Raw Normal View History

2024-11-14 13:32:56 +01:00
#!/bin/python3
import re
# this path should be customized
org_roam_dir: str = '/home/tykayn/Nextcloud/textes/orgmode/org-roam/'
pattern_roam_id_search = r':ID:(?:\s+)?([a-zA-Z0-9-]+)'
2024-11-14 16:22:34 +01:00
2024-11-14 13:32:56 +01:00
def get_id_of_roam_note_content(content):
match = re.search(pattern_roam_id_search, content)
if match:
return match.group(1)
return None
2024-11-14 16:22:34 +01:00
2024-11-14 13:32:56 +01:00
def find_first_level1_title(content):
pattern = r'^\* (.+)$'
match = re.search(pattern, content, re.MULTILINE)
if match:
if match.group(1) != 'Article':
return match.group(1)
else:
pattern = r'^\*\* (.+)$'
match = re.search(pattern, content, re.MULTILINE)
if match:
return match.group(1)
return None
2024-11-14 16:22:34 +01:00
def extract_body_content(html_content):
pattern = r'<body[^>]*?>(.*?)</body>'
match = re.search(pattern, html_content, re.DOTALL)
if match:
return match.group(1)
else:
2024-11-15 01:45:11 +01:00
print('---- extract_body_content : no body found in this html')
return html_content
2024-11-14 16:22:34 +01:00
def remove_properties_section(text):
pattern = r"<h1 id=\"article\">Article</h1>.+?</ul>"
replacement = ""
return re.sub(pattern, replacement, text, flags=re.DOTALL)
def remove_article_head_properties_orgmode(text):
pattern = r":PROPERTIES:.+?:END:"
replacement = ""
return re.sub(pattern, replacement, text, flags=re.DOTALL)
def remove_hint_html(text):
pattern = r"<p>ceci<sub>estduhtml</sub></p>"
replacement = ""
return re.sub(pattern, replacement, text, flags=re.DOTALL)