ploum-fork/publish.py

580 lines
19 KiB
Python
Raw Normal View History

2023-01-11 21:17:10 +01:00
#!/bin/python
# TODO :links should all be absolute, never relative
import sys
import os
from pathlib import Path
# used to escape the content
import html
# used to produce the time
from datetime import datetime
import textwrap
import unicodedata
import ra
import racredentials
2023-01-11 21:17:10 +01:00
global_title = "webair.xyz"
2023-08-01 11:34:18 +02:00
global_subtitle = "Theo's web corner"
global_name = "Theo"
2023-01-12 23:40:30 +01:00
base_url = "webair.xyz/"
2023-01-11 21:17:10 +01:00
2023-08-24 17:36:17 +02:00
global_pubkey = ""
try:
with open("public.gpg", "r") as f:
global_pubkey = f.read()
except FileNotFoundError:
print("GPG public key not found!")
global_pubkey = ""
2023-01-11 21:17:10 +01:00
geminidir = "./public_gemini"
2023-01-12 23:40:30 +01:00
htmldir = "./public_html"
2023-01-11 21:17:10 +01:00
local_url = "/home/ploum/dev/gemlog/"
short_limit = 25
maxwidth = 72
old_post_url = ["2005-01-25","2013-05-17","2011-02-07","2012-03-19","2012-01-18","2012-10-15"]
html_page_template = "page_template.html"
email_template = "email_template.html"
gemini_page_template = "page_template.gmi"
raindex_template = "raindex_template.gmi"
raindex_destination = "rareview/retroachievements.gmi"
2023-01-12 22:17:07 +01:00
2023-01-12 23:40:30 +01:00
image_extensions = [".png", ".jpg", ".jpeg"]
def is_image(link):
for ext in image_extensions:
if link.endswith(ext):
return True
return False
2023-01-12 22:17:07 +01:00
2023-02-08 17:18:56 +01:00
def generate_rels(links):
result = ""
for link in links:
result += f'<link rel="me" href="https://soc.webair.xyz/@theo">\n'
return result
2023-01-12 22:17:07 +01:00
def fill_globals(text):
return text.replace("$AUTHOR", global_name)\
.replace("$BASE_URL", base_url)\
.replace("$GLOBAL_TITLE", global_title)\
2023-02-08 17:18:56 +01:00
.replace("$SUBTITLE", global_subtitle)\
2023-08-24 17:36:17 +02:00
.replace("$GPGPUBKEY", global_pubkey)
2023-01-12 22:17:07 +01:00
2023-01-11 21:17:10 +01:00
# Add the html version to the post dictionnary
# Also convert locals links that ends .gmi to .html
# if index = true, a special style is applied before the first subtitle
def gmi2html(raw,index=False,signature=None,relative_links=True,local=False):
lines = raw.split("\n")
inquote = False
inpre = False
inul = False
inindex = index
def sanitize(line):
line = unicodedata.normalize('NFC', line)
return html.escape(line)
content = ""
title = ""
if inindex:
content += "<div class=\"abstract\">\n"
for line in lines:
if inul and not line.startswith("=>") and not line.startswith("* "):
content += "</ul>\n"
inul = False
if inquote and not line.startswith(">"):
content += "</blockquote>\n"
inquote = False
if line.startswith("```"):
if inpre:
content += "</pre>\n"
else:
content += "<pre>"
inpre = not inpre
elif inpre:
content += sanitize(line) + "\n"
elif line.startswith("* "):
if not inul:
content +="<ul>"
inul = True
content += "<li>%s</li>\n" %sanitize(line[2:])
elif line.startswith(">"):
if not inquote:
content += "<blockquote>"
inquote = True
content += sanitize(line[1:]) + "<br>"
elif line.startswith("##"):
if inindex:
inindex = False
content += "</div>\n"
counter = 0
while line[counter] == "#":
counter += 1
counter = min(6, counter) # There's no h7 in html
content += f"<h{counter}>"
content += sanitize(line.lstrip("# "))
content += f"</h{counter}>\n"
elif line.startswith("# "):
#We dont add directly the first title as it is used in the template
if not title:
title = sanitize(line[2:])
else:
content += "<h1>"
content += sanitize(line[2:])
content += "</h1>\n"
elif line.startswith("=>"):
2023-01-12 23:40:30 +01:00
splitted = line.strip().split(maxsplit=2)[1:]
2023-01-11 21:17:10 +01:00
link = splitted[0]
#converting local links
if "://" not in link and link.endswith(".gmi"):
link = link[:-4] + ".html"
if not relative_links and "://" not in link:
link = "https://" + base_url + link.lstrip("./")
elif local:
link = local_url + link.lstrip("./")
if len(splitted) == 1:
description = ""
name = link
else:
name = sanitize(splitted[1])
description = name
2023-01-12 23:40:30 +01:00
if is_image(link):
2023-01-11 21:17:10 +01:00
if inul:
content += "</ul>\n"
inul = False
2023-01-12 23:40:30 +01:00
imgtag = f"<img alt='{name}' src='{link}' class='center'>"
content += f"<a href='{link}'>{imgtag}</a>"
2023-01-11 21:17:10 +01:00
if description:
2023-01-12 23:40:30 +01:00
content += f"<p class='subtitle'>{description}</p>"
2023-01-11 21:17:10 +01:00
else:
if not inul:
if inindex:
content += "<ul class=\"horizontal\">\n"
else:
content += "<ul>\n"
inul = True
#elif inindex :
# content += " - "
if inindex:
content += " "
content += "<li><a href=\"%s\">%s</a></li>"%(link,name)
if inindex:
content += " "
content += "\n"
elif line.strip() :
content += "<p>%s</p>\n"%sanitize(line)
if inul:
content += "</ul>\n"
inul = False
if signature:
content += "\n<div class=\"signature\">" + signature + "</div>"
return content
def plaintext(content):
lines = content.split("\n")
result = ""
for l in lines:
if l.startswith("=>"):
words = l.split(maxsplit=2)
if len(words) > 2:
result += textwrap.fill(words[2],width=maxwidth) + "\n"
url = words[1]
if "://" not in url and "mailto:" not in url:
if url.endswith(".gmi"):
url = url[:-4] + ".html"
url = "https://" + base_url + url.lstrip("/")
result += url.rstrip() + "\n\n"
elif l.startswith("```"):
pass
else:
istitle = l.startswith("#")
newlines = textwrap.wrap(l.lstrip("# "),width=maxwidth)
size = len(newlines)
while len(newlines) > 0:
nl = newlines.pop(0)
space = " \n"
last = len(newlines) == 0
if last : space = "\n"
result += nl + space
if istitle:
result += len(nl)*"=" + "\n"
if size > 0:
result += "\n"
return result
# We first create a list of all entries
# Each entry is a dictionnary with the following keys.
# - title
# - date (if None, its a page)
# - gmifilename
# - htmlfilename
# - lang
# - folder
# - gem_content
# - html_content
# - gem_url
# - html_url
# - image
def build_post(filepath,lang="fr",signature=True,relative_links=True,local=False):
post = {}
post["lang"] = lang
with open(filepath) as fi:
lines = fi.readlines()
fi.close()
2023-01-12 23:40:30 +01:00
filename = ".".join(filepath.name.split(".")[:-1]) # Remove the extension
post["gmifilename"] = filename + ".gmi"
post["htmlfilename"] = filename + ".html"
2023-01-11 21:17:10 +01:00
post["gem_url"] = "gemini://" + base_url + post["gmifilename"]
post["html_url"] = "https://" + base_url + post["htmlfilename"]
if len(lines) > 0 and lines[0].startswith("# "):
post["title"] = lines.pop(0).strip("# ").strip()
content = "".join(lines)
post["gem_content"] = content
2023-01-12 23:40:30 +01:00
# This code will be outdated in 2100
if filename.startswith("20") and len(filename.split("-")) > 3:
post["date"] = "-".join(filename.split("-")[:3])
2023-01-11 21:17:10 +01:00
else:
post["date"] = ""
# on produit la version html avec la signature
sigpath = Path(post["lang"] + "/signature.html")
if sigpath.exists() and signature:
with open(sigpath) as sigf:
signature_content = sigf.read()
sigf.close()
else:
signature_content = None
2023-01-12 23:40:30 +01:00
2023-01-11 21:17:10 +01:00
post["html_content"] = gmi2html(content,signature=signature_content,relative_links=relative_links,\
local=local)
return post
def build_list(allposts,folder,local=False):
if folder:
folder += "/"
recurs = True
else:
recurs = False
folder = "./"
files = os.listdir(folder)
index_list = ""
# We recursively build nested folder except for root
for f in files:
ff = folder + f
p = Path(ff)
if recurs and p.is_dir():
print("Building recursively %s from %s"%(p,folder))
allposts = build_list(allposts,ff,local=local)
elif f.endswith(".gmi") and "index" not in f and "template" not in f:
if len(folder) > 2:
lang = folder
2023-01-11 21:17:10 +01:00
else:
lang = "fr"
post = build_post(p,lang=lang,local=local)
post["folder"] = folder
allposts.append(post)
return allposts
def build_atom_post(p):
with open("atom_post_template.xml") as f:
template = f.read()
f.close()
date = datetime.strptime(p["date"],"%Y-%m-%d").isoformat() + "Z"
content = html.escape(p["html_content"])
title = html.escape(p["title"])
2023-01-12 22:17:07 +01:00
final = fill_globals(
template
.replace("$DATE", date)
.replace("$TITLE", title)
.replace("$URL", p["html_url"])
.replace("$CONTENT", content)
.replace("$LANG", p["lang"])
)
2023-01-11 21:17:10 +01:00
return final
def write_atom_index(allposts,folder,limit=10):
with open("atom_template.xml") as f:
atom_template = f.read()
f.close()
atom_posts = []
if folder:
atomname = "atom_" + folder.strip("/").replace("/","_") + ".xml"
atom2 = []
else:
atomname = "atom.xml"
atom2 = [htmldir + "/feed/", htmldir + "/rss/"]
atompath = htmldir + "/" + atomname
allposts.sort(reverse=True,key=postdate)
for p in allposts:
if len(atom_posts)< limit and "date" in p.keys() and p["folder"].startswith(folder):
if len(p["date"]) >= 10:
atom_posts.append(build_atom_post(p))
atom_content = ""
for p in atom_posts:
atom_content += p
date = datetime.now().isoformat() + "Z"
if folder.startswith("en"):
lang = "en"
else:
lang = "fr"
url = "https://"+base_url
feedurl = url + atomname
2023-01-12 22:17:07 +01:00
final = fill_globals(
atom_template
.replace("$CONTENT", atom_content)
.replace("$DATE", date)
.replace("$URL", url)
.replace("$LANG", lang)
.replace("$FEEDURL", feedurl)
)
2023-01-11 21:17:10 +01:00
with open(atompath,"w") as f:
f.write(final)
f.close()
for a in atom2:
if not os.path.exists(a):
os.makedirs(a)
a += "index.html"
with open(a,"w") as f:
f.write(final)
f.close()
# Build the index and the corresponding atom.xml file
def build_index(allposts,folder,short=False):
index = {}
if folder:
indexname = "index_" + folder.strip("/").replace("/","_")
lang = folder
2023-01-11 21:17:10 +01:00
else:
if short:
indexname = "index"
else:
indexname = "index_all"
lang = "fr"
index["gmifilename"] = indexname + ".gmi"
index["htmlfilename"] = indexname + ".html"
index["lang"] = lang
index["folder"] = folder
content = ""
path = Path(folder + "/index.gmi")
if path.exists():
with open(path) as ind:
content += ind.read()
ind.close()
else:
with open("index.gmi") as main:
lines = main.readlines()
for l in lines:
content += l
main.close()
with open("fr/index.gmi") as fr:
lines = fr.readlines()
for l in lines:
if not l.startswith("# "):
content += l
fr.close()
with open("en/index.gmi") as en:
lines = en.readlines()
for l in lines:
if not l.startswith("# "):
content += l
en.close()
with open("postindex.gmi") as main:
lines = main.readlines()
for l in lines:
content += l
main.close()
if content:
lines = content.split("\n")
if lines[0].strip().startswith("# "):
index["title"] = lines.pop(0).strip().removeprefix("# ")
content = "\n".join(lines) + "\n"
allposts.sort(reverse=True,key=postdate)
last_year = 10000
nbr_post = 0
stop = False
print("we have %s posts"%len(allposts))
for p in allposts:
if short and nbr_post >= short_limit:
stop = True
if not stop and "date" in p.keys() and p["date"] and p["folder"].startswith(folder):
date = p["date"]
year = int(date[:4])
if not short and year < last_year:
last_year = year
content += "\n## %s\n\n"%year
if len(date) >= 10:
line = "=> %s %s : %s\n"%(p["gmifilename"],date,p["title"])
content += line
nbr_post += 1
# giving a title to year. Not working with different languages
#else:
#content += p["gem_content"]
if short and stop:
content += "\n=> index_all.gmi All posts"
2023-01-12 22:17:07 +01:00
index["gem_content"] = fill_globals(content)
2023-01-11 21:17:10 +01:00
index["html_content"] = gmi2html(content,index=True)
index["gem_url"] = "gemini://" + base_url + index["gmifilename"]
index["html_url"] = "https://" + base_url + index["htmlfilename"]
if "title" not in index.keys():
index["title"] = global_title
allposts.append(index)
return allposts
def filltemplate(post,template):
with open(template) as f:
template = f.read()
f.close()
if "date" in post.keys() and post["lang"] =="en" :
if post["date"]: ladate = " on %s"%post["date"]
else : ladate = ""
subtitle = f"by <a href='/'>{global_name}</a>{ladate}"
elif "date" in post.keys() and post["lang"] =="fr" :
if post["date"]: ladate = " le %s"%post["date"]
else : ladate = ""
subtitle = f"par <a href='/'>{global_name}</a>{ladate}"
else :
subtitle = global_subtitle
if "image" in post.keys():
image = "<img src=\"../%s\" class=\"header\">"%post["image"]
else:
image = ""
if "title" in post.keys():
template = template.replace("$TITLE", post["title"])
2023-01-12 22:17:07 +01:00
final_page = fill_globals(
template
.replace("$CONTENT", post["html_content"])
.replace("$LANG", post["lang"])
.replace("$GEMLINK", post["gem_url"])
.replace("$HTMLLINK", post["html_url"])
.replace("$IMAGE_HEADER", image)
)
2023-01-11 21:17:10 +01:00
return final_page
def writehtml(post):
filenames = []
filenames.append(htmldir + "/" + post["htmlfilename"])
if post["htmlfilename"].endswith("index.html"):
if "date" in post.keys() and post["date"] in old_post_url:
# old posts with old url
filenames.append(htmldir + "/post/" + post["htmlfilename"])
content = filltemplate(post, html_page_template)
for f in filenames:
p = Path(f)
if not p.parent.exists():
os.makedirs(p.parent)
with open(f, mode="w") as ff:
ff.write(content)
ff.close()
def writegmi(post):
with open(gemini_page_template) as f:
template = f.read()
f.close()
if "gem_content" not in post.keys():
print(f"no gem_content for {post}")
if "title" in post.keys():
template = template.replace("$TITLE", post["title"])
if "date" in post.keys():
date = post["date"]
else:
date = ""
2023-01-12 22:17:07 +01:00
final_page = fill_globals(
template
.replace("$CONTENT", post["gem_content"])
.replace("$DATE", date)
.replace("$GEMLINK", post["gem_url"])
.replace("$HTMLLINK", post["html_url"])
)
2023-01-11 21:17:10 +01:00
filename = geminidir + "/" + post["gmifilename"]
p = Path(filename)
if not p.parent.exists():
os.makedirs(p.parent)
with open(filename, mode="w") as f:
f.write(final_page)
f.close()
def postdate(p):
if "date" in p.keys():
return p["date"]
else:
return ""
def copy_static_files(source, dest):
os.popen(f"cp -r {source}/* {dest}/")
def generate_raindex(awards, dest):
content = ""
with open(raindex_template) as f:
content = f.read()
content += "\n"
for award in reversed(awards):
content += f"=> {award.url} {award.type} {award.name}\n"
with open(dest, "w") as f:
f.write(content)
def generate_default_rareviews(awards):
for award in awards:
default_page = ""
with open("default_rareview.gmi", "r") as f:
default_page = f.read()
filepath = Path(award.url)
# Check if a file with same name doesn't exists in current folder
if not os.path.exists(filepath):
# Check if a review was not already written
if not os.path.exists(os.path.join("rareview", filepath)):
# Write a default page
with open(filepath, "w") as f:
f.write(
default_page.replace("$AWARDNAME", award.name)
.replace("$AWARDDATE", award.date)
.replace("$AWARDGAMEURL", award.gameurl)
)
# Build a post with it
post = build_post(filepath, lang="rareview")
writehtml(post)
writegmi(post)
os.remove(filepath)
else:
print(f"Warning, file {filepath} exists in root folder, skipping.")
2023-01-11 21:17:10 +01:00
# Main call
if __name__ == "__main__":
# Check for the destination folders
for dir in [htmldir, geminidir]:
if not os.path.isdir(dir):
os.mkdir(dir)
2023-01-11 21:17:10 +01:00
all_posts = []
local = False
# Generate the RetroAchievements index page
awards = ra.getAwards(racredentials.user, racredentials.key)
generate_raindex(awards, raindex_destination)
# Normal generation
2023-01-11 21:17:10 +01:00
if len(sys.argv) > 1:
local = sys.argv[1] == "local"
print("building locally")
for folder in ["", "fr", "en", "rareview"]:
2023-01-11 21:17:10 +01:00
all_posts = build_list(all_posts, folder, local=local)
all_posts = build_index(all_posts, folder)
write_atom_index(all_posts, folder)
all_posts = build_index(all_posts, "", short=True)
all_posts = build_index(all_posts, "", short=False)
write_atom_index(all_posts, "")
all_posts.sort(reverse=True, key=postdate)
for p in all_posts:
writehtml(p)
writegmi(p)
# Generate default RaReview pages
generate_default_rareviews(awards)
# Delete the generated raindex
os.remove(raindex_destination)
2023-01-11 21:17:10 +01:00
copy_static_files("static", htmldir)