ploum-fork/publish.py

544 lines
18 KiB
Python
Raw Normal View History

2023-01-11 21:17:10 +01:00
#!/bin/python
# TODO :links should all be absolute, never relative
import sys
import os
from pathlib import Path
# used to escape the content
import html
# used to produce the time
from datetime import datetime
import textwrap
import unicodedata
global_title = "webair.xyz"
global_subtitle = "Alice's web corner"
global_name = "Alice"
htmldir = "./public_html"
geminidir = "./public_gemini"
base_url = "webair.xyz/"
local_url = "/home/ploum/dev/gemlog/"
short_limit = 25
maxwidth = 72
old_post_url = ["2005-01-25","2013-05-17","2011-02-07","2012-03-19","2012-01-18","2012-10-15"]
html_page_template = "page_template.html"
email_template = "email_template.html"
gemini_page_template = "page_template.gmi"
2023-01-12 22:17:07 +01:00
def fill_globals(text):
return text.replace("$AUTHOR", global_name)\
.replace("$BASE_URL", base_url)\
.replace("$GLOBAL_TITLE", global_title)\
.replace("$SUBTITLE", global_subtitle)
2023-01-11 21:17:10 +01:00
# Add the html version to the post dictionnary
# Also convert locals links that ends .gmi to .html
# if index = true, a special style is applied before the first subtitle
def gmi2html(raw,index=False,signature=None,relative_links=True,local=False):
lines = raw.split("\n")
inquote = False
inpre = False
inul = False
inindex = index
def sanitize(line):
line = unicodedata.normalize('NFC', line)
return html.escape(line)
content = ""
title = ""
if inindex:
content += "<div class=\"abstract\">\n"
for line in lines:
if inul and not line.startswith("=>") and not line.startswith("* "):
content += "</ul>\n"
inul = False
if inquote and not line.startswith(">"):
content += "</blockquote>\n"
inquote = False
if line.startswith("```"):
if inpre:
content += "</pre>\n"
else:
content += "<pre>"
inpre = not inpre
elif inpre:
content += sanitize(line) + "\n"
elif line.startswith("* "):
if not inul:
content +="<ul>"
inul = True
content += "<li>%s</li>\n" %sanitize(line[2:])
elif line.startswith(">"):
if not inquote:
content += "<blockquote>"
inquote = True
content += sanitize(line[1:]) + "<br>"
elif line.startswith("##"):
if inindex:
inindex = False
content += "</div>\n"
counter = 0
while line[counter] == "#":
counter += 1
counter = min(6, counter) # There's no h7 in html
content += f"<h{counter}>"
content += sanitize(line.lstrip("# "))
content += f"</h{counter}>\n"
elif line.startswith("# "):
#We dont add directly the first title as it is used in the template
if not title:
title = sanitize(line[2:])
else:
content += "<h1>"
content += sanitize(line[2:])
content += "</h1>\n"
elif line.startswith("=>"):
splitted = line[2:].strip().split(maxsplit=1)
link = splitted[0]
#converting local links
if "://" not in link and link.endswith(".gmi"):
link = link[:-4] + ".html"
if not relative_links and "://" not in link:
link = "https://" + base_url + link.lstrip("./")
elif local:
link = local_url + link.lstrip("./")
if len(splitted) == 1:
description = ""
name = link
else:
name = sanitize(splitted[1])
description = name
if link[-4:] in [".jpg",".png",".gif"] or link[-5:] == ".jpeg":
if inul:
content += "</ul>\n"
inul = False
#content += "<div class=\"center\">"
imgtag = "<img alt=\"%s\" src=\"%s\" width=\"450\" class=\"center\">"%(name,link)
content += "<a href=\"%s\">"%link + imgtag + "</a>"
#content += "</div>"
if description:
content += "<p class=\"subtitle\">" + description + "</p>"
else:
if not inul:
if inindex:
content += "<ul class=\"horizontal\">\n"
else:
content += "<ul>\n"
inul = True
#elif inindex :
# content += " - "
if inindex:
content += " "
content += "<li><a href=\"%s\">%s</a></li>"%(link,name)
if inindex:
content += " "
content += "\n"
elif line.strip() :
content += "<p>%s</p>\n"%sanitize(line)
if inul:
content += "</ul>\n"
inul = False
if signature:
content += "\n<div class=\"signature\">" + signature + "</div>"
return content
def plaintext(content):
lines = content.split("\n")
result = ""
for l in lines:
if l.startswith("=>"):
words = l.split(maxsplit=2)
if len(words) > 2:
result += textwrap.fill(words[2],width=maxwidth) + "\n"
url = words[1]
if "://" not in url and "mailto:" not in url:
if url.endswith(".gmi"):
url = url[:-4] + ".html"
url = "https://" + base_url + url.lstrip("/")
result += url.rstrip() + "\n\n"
elif l.startswith("```"):
pass
else:
istitle = l.startswith("#")
newlines = textwrap.wrap(l.lstrip("# "),width=maxwidth)
size = len(newlines)
while len(newlines) > 0:
nl = newlines.pop(0)
space = " \n"
last = len(newlines) == 0
if last : space = "\n"
result += nl + space
if istitle:
result += len(nl)*"=" + "\n"
if size > 0:
result += "\n"
return result
# We first create a list of all entries
# Each entry is a dictionnary with the following keys.
# - title
# - date (if None, its a page)
# - gmifilename
# - htmlfilename
# - lang
# - folder
# - gem_content
# - html_content
# - gem_url
# - html_url
# - image
def build_post(filepath,lang="fr",signature=True,relative_links=True,local=False):
post = {}
post["lang"] = lang
with open(filepath) as fi:
lines = fi.readlines()
fi.close()
#special case for wordpress imported content
#to preserver URL
f = filepath.name
# old if the last directory is called old
old = filepath.parent.parts[-1] == "old"
if old:
slug = f[11:-4] #we remove the date and extension
post["gmifilename"] = slug + "/index.gmi"
post["htmlfilename"] = slug + "/index.html"
#normal case
else:
post["gmifilename"] = f[:-4] + ".gmi"
post["htmlfilename"] = f[:-4] + ".html"
post["gem_url"] = "gemini://" + base_url + post["gmifilename"]
post["html_url"] = "https://" + base_url + post["htmlfilename"]
if len(lines) > 0 and lines[0].startswith("# "):
post["title"] = lines.pop(0).strip("# ").strip()
# special case for header images which are right after the title
if len(lines) > 0 and lines[0].startswith("=> "):
if lines[0].strip().endswith(".jpg") or lines[0].strip().endswith(".png") or\
lines[0].strip().endswith(".jpeg"):
post["image"] = lines.pop(0).removeprefix("=> ").strip()
content = "".join(lines)
post["gem_content"] = content
if f.startswith("20") and len(f) >= 14:
# We need to get the title of the post
line = ""
post["date"] = f[:10]
elif f.startswith("20"):
post["date"] = f
else:
post["date"] = ""
# on produit la version html avec la signature
sigpath = Path(post["lang"] + "/signature.html")
if sigpath.exists() and signature:
with open(sigpath) as sigf:
signature_content = sigf.read()
sigf.close()
else:
signature_content = None
post["html_content"] = gmi2html(content,signature=signature_content,relative_links=relative_links,\
local=local)
txt = ""
if "title" in post.keys():
txt += textwrap.fill(post["title"].upper(),width=maxwidth) + "\n"
txt += "by Ploum"
if "date" in post.keys():
txt += " on %s"%post["date"]
txt += "\n\n"
txt += post["html_url"] + "\n\n"
txt += plaintext(content)
post["plaintext_content"] = txt
return post
def build_list(allposts,folder,local=False):
if folder:
folder += "/"
recurs = True
else:
recurs = False
folder = "./"
files = os.listdir(folder)
index_list = ""
# We recursively build nested folder except for root
for f in files:
ff = folder + f
p = Path(ff)
if recurs and p.is_dir():
print("Building recursively %s from %s"%(p,folder))
allposts = build_list(allposts,ff,local=local)
elif f.endswith(".gmi") and "index" not in f and "template" not in f:
if len(folder) > 2:
lang = folder[:2]
else:
lang = "fr"
post = build_post(p,lang=lang,local=local)
post["folder"] = folder
allposts.append(post)
return allposts
def build_atom_post(p):
with open("atom_post_template.xml") as f:
template = f.read()
f.close()
date = datetime.strptime(p["date"],"%Y-%m-%d").isoformat() + "Z"
content = html.escape(p["html_content"])
title = html.escape(p["title"])
2023-01-12 22:17:07 +01:00
final = fill_globals(
template
.replace("$DATE", date)
.replace("$TITLE", title)
.replace("$URL", p["html_url"])
.replace("$CONTENT", content)
.replace("$LANG", p["lang"])
)
2023-01-11 21:17:10 +01:00
return final
def write_atom_index(allposts,folder,limit=10):
with open("atom_template.xml") as f:
atom_template = f.read()
f.close()
atom_posts = []
if folder:
atomname = "atom_" + folder.strip("/").replace("/","_") + ".xml"
atom2 = []
else:
atomname = "atom.xml"
atom2 = [htmldir + "/feed/", htmldir + "/rss/"]
atompath = htmldir + "/" + atomname
allposts.sort(reverse=True,key=postdate)
for p in allposts:
if len(atom_posts)< limit and "date" in p.keys() and p["folder"].startswith(folder):
if len(p["date"]) >= 10:
atom_posts.append(build_atom_post(p))
atom_content = ""
for p in atom_posts:
atom_content += p
date = datetime.now().isoformat() + "Z"
if folder.startswith("en"):
lang = "en"
else:
lang = "fr"
url = "https://"+base_url
feedurl = url + atomname
2023-01-12 22:17:07 +01:00
final = fill_globals(
atom_template
.replace("$CONTENT", atom_content)
.replace("$DATE", date)
.replace("$URL", url)
.replace("$LANG", lang)
.replace("$FEEDURL", feedurl)
)
2023-01-11 21:17:10 +01:00
with open(atompath,"w") as f:
f.write(final)
f.close()
for a in atom2:
if not os.path.exists(a):
os.makedirs(a)
a += "index.html"
with open(a,"w") as f:
f.write(final)
f.close()
# Build the index and the corresponding atom.xml file
def build_index(allposts,folder,short=False):
index = {}
if folder:
indexname = "index_" + folder.strip("/").replace("/","_")
lang = folder[:2]
else:
if short:
indexname = "index"
else:
indexname = "index_all"
lang = "fr"
index["gmifilename"] = indexname + ".gmi"
index["htmlfilename"] = indexname + ".html"
index["lang"] = lang
index["folder"] = folder
content = ""
path = Path(folder + "/index.gmi")
if path.exists():
with open(path) as ind:
content += ind.read()
ind.close()
else:
with open("index.gmi") as main:
lines = main.readlines()
for l in lines:
content += l
main.close()
with open("fr/index.gmi") as fr:
lines = fr.readlines()
for l in lines:
if not l.startswith("# "):
content += l
fr.close()
with open("en/index.gmi") as en:
lines = en.readlines()
for l in lines:
if not l.startswith("# "):
content += l
en.close()
with open("postindex.gmi") as main:
lines = main.readlines()
for l in lines:
content += l
main.close()
if content:
lines = content.split("\n")
if lines[0].strip().startswith("# "):
index["title"] = lines.pop(0).strip().removeprefix("# ")
content = "\n".join(lines) + "\n"
allposts.sort(reverse=True,key=postdate)
last_year = 10000
nbr_post = 0
stop = False
print("we have %s posts"%len(allposts))
for p in allposts:
if short and nbr_post >= short_limit:
stop = True
if not stop and "date" in p.keys() and p["date"] and p["folder"].startswith(folder):
date = p["date"]
year = int(date[:4])
if not short and year < last_year:
last_year = year
content += "\n## %s\n\n"%year
if len(date) >= 10:
line = "=> %s %s : %s\n"%(p["gmifilename"],date,p["title"])
content += line
nbr_post += 1
# giving a title to year. Not working with different languages
#else:
#content += p["gem_content"]
if short and stop:
content += "\n=> index_all.gmi All posts"
2023-01-12 22:17:07 +01:00
index["gem_content"] = fill_globals(content)
2023-01-11 21:17:10 +01:00
index["html_content"] = gmi2html(content,index=True)
index["gem_url"] = "gemini://" + base_url + index["gmifilename"]
index["html_url"] = "https://" + base_url + index["htmlfilename"]
if "title" not in index.keys():
index["title"] = global_title
allposts.append(index)
return allposts
def filltemplate(post,template):
with open(template) as f:
template = f.read()
f.close()
if "date" in post.keys() and post["lang"] =="en" :
if post["date"]: ladate = " on %s"%post["date"]
else : ladate = ""
subtitle = f"by <a href='/'>{global_name}</a>{ladate}"
elif "date" in post.keys() and post["lang"] =="fr" :
if post["date"]: ladate = " le %s"%post["date"]
else : ladate = ""
subtitle = f"par <a href='/'>{global_name}</a>{ladate}"
else :
subtitle = global_subtitle
if "image" in post.keys():
image = "<img src=\"../%s\" class=\"header\">"%post["image"]
else:
image = ""
if "title" in post.keys():
template = template.replace("$TITLE", post["title"])
2023-01-12 22:17:07 +01:00
final_page = fill_globals(
template
.replace("$CONTENT", post["html_content"])
.replace("$LANG", post["lang"])
.replace("$GEMLINK", post["gem_url"])
.replace("$HTMLLINK", post["html_url"])
.replace("$IMAGE_HEADER", image)
)
2023-01-11 21:17:10 +01:00
return final_page
def writehtml(post):
filenames = []
filenames.append(htmldir + "/" + post["htmlfilename"])
if post["htmlfilename"].endswith("index.html"):
if "date" in post.keys() and post["date"] in old_post_url:
# old posts with old url
filenames.append(htmldir + "/post/" + post["htmlfilename"])
content = filltemplate(post, html_page_template)
for f in filenames:
p = Path(f)
if not p.parent.exists():
os.makedirs(p.parent)
with open(f, mode="w") as ff:
ff.write(content)
ff.close()
def make_email(file, lang, html=True):
post = build_post(file, lang, signature=False, relative_links=False)
if html:
content = filltemplate(post, email_template)
else:
content = post["plaintext_content"]
return content
def writegmi(post):
with open(gemini_page_template) as f:
template = f.read()
f.close()
if "gem_content" not in post.keys():
print(f"no gem_content for {post}")
if "title" in post.keys():
template = template.replace("$TITLE", post["title"])
if "date" in post.keys():
date = post["date"]
else:
date = ""
2023-01-12 22:17:07 +01:00
final_page = fill_globals(
template
.replace("$CONTENT", post["gem_content"])
.replace("$DATE", date)
.replace("$GEMLINK", post["gem_url"])
.replace("$HTMLLINK", post["html_url"])
)
2023-01-11 21:17:10 +01:00
filename = geminidir + "/" + post["gmifilename"]
p = Path(filename)
if not p.parent.exists():
os.makedirs(p.parent)
with open(filename, mode="w") as f:
f.write(final_page)
f.close()
def postdate(p):
if "date" in p.keys():
return p["date"]
else:
return ""
def copy_static_files(source, dest):
os.popen(f"cp -r {source}/* {dest}/")
# Main call
if __name__ == "__main__":
# Check for the destination folders
for dir in [htmldir, geminidir]:
if not os.path.isdir(dir):
os.mkdir(dir)
2023-01-11 21:17:10 +01:00
all_posts = []
local = False
if len(sys.argv) > 1:
local = sys.argv[1] == "local"
print("building locally")
for folder in ["", "fr", "en"]:
all_posts = build_list(all_posts, folder, local=local)
all_posts = build_index(all_posts, folder)
write_atom_index(all_posts, folder)
all_posts = build_index(all_posts, "", short=True)
all_posts = build_index(all_posts, "", short=False)
write_atom_index(all_posts, "")
all_posts.sort(reverse=True, key=postdate)
for p in all_posts:
writehtml(p)
writegmi(p)
copy_static_files("static", htmldir)