ploum-fork/publish.py

531 lines
17 KiB
Python
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/python
# TODO :links should all be absolute, never relative
import sys
import os
from pathlib import Path
# used to escape the content
import html
# used to produce the time
from datetime import datetime
import textwrap
import unicodedata
global_title = "webair.xyz"
global_subtitle = "Theo's web corner"
global_name = "Theo"
base_url = "webair.xyz/"
global_pubkey = ""
try:
with open("public.gpg", "r") as f:
global_pubkey = f.read()
except FileNotFoundError:
print("GPG public key not found!")
global_pubkey = ""
geminidir = "./public_gemini"
htmldir = "./public_html"
local_url = "/home/ploum/dev/gemlog/"
short_limit = 25
maxwidth = 72
old_post_url = ["2005-01-25","2013-05-17","2011-02-07","2012-03-19","2012-01-18","2012-10-15"]
html_page_template = "page_template.html"
email_template = "email_template.html"
gemini_page_template = "page_template.gmi"
image_extensions = [".png", ".jpg", ".jpeg"]
def is_image(link):
for ext in image_extensions:
if link.endswith(ext):
return True
return False
def generate_rels(links):
result = ""
for link in links:
result += f'<link rel="me" href="https://soc.webair.xyz/@theo">\n'
return result
def fill_globals(text):
return text.replace("$AUTHOR", global_name)\
.replace("$BASE_URL", base_url)\
.replace("$GLOBAL_TITLE", global_title)\
.replace("$SUBTITLE", global_subtitle)\
.replace("$GPGPUBKEY", global_pubkey)
# Add the html version to the post dictionnary
# Also convert locals links that ends .gmi to .html
# if index = true, a special style is applied before the first subtitle
def gmi2html(raw,index=False,signature=None,relative_links=True,local=False):
lines = raw.split("\n")
inquote = False
inpre = False
inul = False
inindex = index
def sanitize(line):
line = unicodedata.normalize('NFC', line)
return html.escape(line)
content = ""
title = ""
if inindex:
content += "<div class=\"abstract\">\n"
for line in lines:
if inul and not line.startswith("=>") and not line.startswith("* "):
content += "</ul>\n"
inul = False
if inquote and not line.startswith(">"):
content += "</blockquote>\n"
inquote = False
if line.startswith("```"):
if inpre:
content += "</pre>\n"
else:
content += "<pre>"
inpre = not inpre
elif inpre:
content += sanitize(line) + "\n"
elif line.startswith("* "):
if not inul:
content +="<ul>"
inul = True
content += "<li>%s</li>\n" %sanitize(line[2:])
elif line.startswith(">"):
if not inquote:
content += "<blockquote>"
inquote = True
content += sanitize(line[1:]) + "<br>"
elif line.startswith("##"):
if inindex:
inindex = False
content += "</div>\n"
counter = 0
while line[counter] == "#":
counter += 1
counter = min(6, counter) # There's no h7 in html
content += f"<h{counter}>"
content += sanitize(line.lstrip("# "))
content += f"</h{counter}>\n"
elif line.startswith("# "):
#We dont add directly the first title as it is used in the template
if not title:
title = sanitize(line[2:])
else:
content += "<h1>"
content += sanitize(line[2:])
content += "</h1>\n"
elif line.startswith("=>"):
splitted = line.strip().split(maxsplit=2)[1:]
link = splitted[0]
#converting local links
if "://" not in link and link.endswith(".gmi"):
link = link[:-4] + ".html"
if not relative_links and "://" not in link:
link = "https://" + base_url + link.lstrip("./")
elif local:
link = local_url + link.lstrip("./")
if len(splitted) == 1:
description = ""
name = link
else:
name = sanitize(splitted[1])
description = name
if is_image(link):
if inul:
content += "</ul>\n"
inul = False
imgtag = f"<img alt='{name}' src='{link}' class='center'>"
content += f"<a href='{link}'>{imgtag}</a>"
if description:
content += f"<p class='subtitle'>{description}</p>"
else:
if not inul:
if inindex:
content += "<ul class=\"horizontal\">\n"
else:
content += "<ul>\n"
inul = True
#elif inindex :
# content += " - "
if inindex:
content += " "
content += "<li><a href=\"%s\">%s</a></li>"%(link,name)
if inindex:
content += " "
content += "\n"
elif line.strip() :
content += "<p>%s</p>\n"%sanitize(line)
if inul:
content += "</ul>\n"
inul = False
if signature:
content += "\n<div class=\"signature\">" + signature + "</div>"
return content
def plaintext(content):
lines = content.split("\n")
result = ""
for l in lines:
if l.startswith("=>"):
words = l.split(maxsplit=2)
if len(words) > 2:
result += textwrap.fill(words[2],width=maxwidth) + "\n"
url = words[1]
if "://" not in url and "mailto:" not in url:
if url.endswith(".gmi"):
url = url[:-4] + ".html"
url = "https://" + base_url + url.lstrip("/")
result += url.rstrip() + "\n\n"
elif l.startswith("```"):
pass
else:
istitle = l.startswith("#")
newlines = textwrap.wrap(l.lstrip("# "),width=maxwidth)
size = len(newlines)
while len(newlines) > 0:
nl = newlines.pop(0)
space = " \n"
last = len(newlines) == 0
if last : space = "\n"
result += nl + space
if istitle:
result += len(nl)*"=" + "\n"
if size > 0:
result += "\n"
return result
# We first create a list of all entries
# Each entry is a dictionnary with the following keys.
# - title
# - date (if None, its a page)
# - gmifilename
# - htmlfilename
# - lang
# - folder
# - gem_content
# - html_content
# - gem_url
# - html_url
# - image
def build_post(filepath,lang="fr",signature=True,relative_links=True,local=False):
post = {}
post["lang"] = lang
with open(filepath) as fi:
lines = fi.readlines()
fi.close()
filename = ".".join(filepath.name.split(".")[:-1]) # Remove the extension
post["gmifilename"] = filename + ".gmi"
post["htmlfilename"] = filename + ".html"
post["gem_url"] = "gemini://" + base_url + post["gmifilename"]
post["html_url"] = "https://" + base_url + post["htmlfilename"]
if len(lines) > 0 and lines[0].startswith("# "):
post["title"] = lines.pop(0).strip("# ").strip()
content = "".join(lines)
post["gem_content"] = content
# This code will be outdated in 2100
if filename.startswith("20") and len(filename.split("-")) > 3:
post["date"] = "-".join(filename.split("-")[:3])
else:
post["date"] = ""
# on produit la version html avec la signature
sigpath = Path(post["lang"] + "/signature.html")
if sigpath.exists() and signature:
with open(sigpath) as sigf:
signature_content = sigf.read()
sigf.close()
else:
signature_content = None
post["html_content"] = gmi2html(content,signature=signature_content,relative_links=relative_links,\
local=local)
return post
def build_list(allposts,folder,local=False):
if folder:
folder += "/"
recurs = True
else:
recurs = False
folder = "./"
files = os.listdir(folder)
index_list = ""
# We recursively build nested folder except for root
for f in files:
ff = folder + f
p = Path(ff)
if recurs and p.is_dir():
print("Building recursively %s from %s"%(p,folder))
allposts = build_list(allposts,ff,local=local)
elif f.endswith(".gmi") and "index" not in f and "template" not in f:
if len(folder) > 2:
lang = folder[:2]
else:
lang = "fr"
post = build_post(p,lang=lang,local=local)
post["folder"] = folder
allposts.append(post)
return allposts
def build_atom_post(p):
with open("atom_post_template.xml") as f:
template = f.read()
f.close()
date = datetime.strptime(p["date"],"%Y-%m-%d").isoformat() + "Z"
content = html.escape(p["html_content"])
title = html.escape(p["title"])
final = fill_globals(
template
.replace("$DATE", date)
.replace("$TITLE", title)
.replace("$URL", p["html_url"])
.replace("$CONTENT", content)
.replace("$LANG", p["lang"])
)
return final
def write_atom_index(allposts,folder,limit=10):
with open("atom_template.xml") as f:
atom_template = f.read()
f.close()
atom_posts = []
if folder:
atomname = "atom_" + folder.strip("/").replace("/","_") + ".xml"
atom2 = []
else:
atomname = "atom.xml"
atom2 = [htmldir + "/feed/", htmldir + "/rss/"]
atompath = htmldir + "/" + atomname
allposts.sort(reverse=True,key=postdate)
for p in allposts:
if len(atom_posts)< limit and "date" in p.keys() and p["folder"].startswith(folder):
if len(p["date"]) >= 10:
atom_posts.append(build_atom_post(p))
atom_content = ""
for p in atom_posts:
atom_content += p
date = datetime.now().isoformat() + "Z"
if folder.startswith("en"):
lang = "en"
else:
lang = "fr"
url = "https://"+base_url
feedurl = url + atomname
final = fill_globals(
atom_template
.replace("$CONTENT", atom_content)
.replace("$DATE", date)
.replace("$URL", url)
.replace("$LANG", lang)
.replace("$FEEDURL", feedurl)
)
with open(atompath,"w") as f:
f.write(final)
f.close()
for a in atom2:
if not os.path.exists(a):
os.makedirs(a)
a += "index.html"
with open(a,"w") as f:
f.write(final)
f.close()
# Build the index and the corresponding atom.xml file
def build_index(allposts,folder,short=False):
index = {}
if folder:
indexname = "index_" + folder.strip("/").replace("/","_")
lang = folder[:2]
else:
if short:
indexname = "index"
else:
indexname = "index_all"
lang = "fr"
index["gmifilename"] = indexname + ".gmi"
index["htmlfilename"] = indexname + ".html"
index["lang"] = lang
index["folder"] = folder
content = ""
path = Path(folder + "/index.gmi")
if path.exists():
with open(path) as ind:
content += ind.read()
ind.close()
else:
with open("index.gmi") as main:
lines = main.readlines()
for l in lines:
content += l
main.close()
with open("fr/index.gmi") as fr:
lines = fr.readlines()
for l in lines:
if not l.startswith("# "):
content += l
fr.close()
with open("en/index.gmi") as en:
lines = en.readlines()
for l in lines:
if not l.startswith("# "):
content += l
en.close()
with open("postindex.gmi") as main:
lines = main.readlines()
for l in lines:
content += l
main.close()
if content:
lines = content.split("\n")
if lines[0].strip().startswith("# "):
index["title"] = lines.pop(0).strip().removeprefix("# ")
content = "\n".join(lines) + "\n"
allposts.sort(reverse=True,key=postdate)
last_year = 10000
nbr_post = 0
stop = False
print("we have %s posts"%len(allposts))
for p in allposts:
if short and nbr_post >= short_limit:
stop = True
if not stop and "date" in p.keys() and p["date"] and p["folder"].startswith(folder):
date = p["date"]
year = int(date[:4])
if not short and year < last_year:
last_year = year
content += "\n## %s\n\n"%year
if len(date) >= 10:
line = "=> %s %s : %s\n"%(p["gmifilename"],date,p["title"])
content += line
nbr_post += 1
# giving a title to year. Not working with different languages
#else:
#content += p["gem_content"]
if short and stop:
content += "\n=> index_all.gmi All posts"
index["gem_content"] = fill_globals(content)
index["html_content"] = gmi2html(content,index=True)
index["gem_url"] = "gemini://" + base_url + index["gmifilename"]
index["html_url"] = "https://" + base_url + index["htmlfilename"]
if "title" not in index.keys():
index["title"] = global_title
allposts.append(index)
return allposts
def filltemplate(post,template):
with open(template) as f:
template = f.read()
f.close()
if "date" in post.keys() and post["lang"] =="en" :
if post["date"]: ladate = " on %s"%post["date"]
else : ladate = ""
subtitle = f"by <a href='/'>{global_name}</a>{ladate}"
elif "date" in post.keys() and post["lang"] =="fr" :
if post["date"]: ladate = " le %s"%post["date"]
else : ladate = ""
subtitle = f"par <a href='/'>{global_name}</a>{ladate}"
else :
subtitle = global_subtitle
if "image" in post.keys():
image = "<img src=\"../%s\" class=\"header\">"%post["image"]
else:
image = ""
if "title" in post.keys():
template = template.replace("$TITLE", post["title"])
final_page = fill_globals(
template
.replace("$CONTENT", post["html_content"])
.replace("$LANG", post["lang"])
.replace("$GEMLINK", post["gem_url"])
.replace("$HTMLLINK", post["html_url"])
.replace("$IMAGE_HEADER", image)
)
return final_page
def writehtml(post):
filenames = []
filenames.append(htmldir + "/" + post["htmlfilename"])
if post["htmlfilename"].endswith("index.html"):
if "date" in post.keys() and post["date"] in old_post_url:
# old posts with old url
filenames.append(htmldir + "/post/" + post["htmlfilename"])
content = filltemplate(post, html_page_template)
for f in filenames:
p = Path(f)
if not p.parent.exists():
os.makedirs(p.parent)
with open(f, mode="w") as ff:
ff.write(content)
ff.close()
def writegmi(post):
with open(gemini_page_template) as f:
template = f.read()
f.close()
if "gem_content" not in post.keys():
print(f"no gem_content for {post}")
if "title" in post.keys():
template = template.replace("$TITLE", post["title"])
if "date" in post.keys():
date = post["date"]
else:
date = ""
final_page = fill_globals(
template
.replace("$CONTENT", post["gem_content"])
.replace("$DATE", date)
.replace("$GEMLINK", post["gem_url"])
.replace("$HTMLLINK", post["html_url"])
)
filename = geminidir + "/" + post["gmifilename"]
p = Path(filename)
if not p.parent.exists():
os.makedirs(p.parent)
with open(filename, mode="w") as f:
f.write(final_page)
f.close()
def postdate(p):
if "date" in p.keys():
return p["date"]
else:
return ""
def copy_static_files(source, dest):
os.popen(f"cp -r {source}/* {dest}/")
# Main call
if __name__ == "__main__":
# Check for the destination folders
for dir in [htmldir, geminidir]:
if not os.path.isdir(dir):
os.mkdir(dir)
all_posts = []
local = False
if len(sys.argv) > 1:
local = sys.argv[1] == "local"
print("building locally")
for folder in ["", "fr", "en"]:
all_posts = build_list(all_posts, folder, local=local)
all_posts = build_index(all_posts, folder)
write_atom_index(all_posts, folder)
all_posts = build_index(all_posts, "", short=True)
all_posts = build_index(all_posts, "", short=False)
write_atom_index(all_posts, "")
all_posts.sort(reverse=True, key=postdate)
for p in all_posts:
writehtml(p)
writegmi(p)
copy_static_files("static", htmldir)