diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..076b027 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +DEST=/var/gemini/fosdem/ + +all: updateinstall + +updateinstall: update install + +update: + wget --quiet --output-document schedule.xml https://fosdem.org/2021/schedule/xml + ./schedule2gemtext.py + +install: + rsync -a -v -p --exclude=Makefile --exclude="*~" *.gmi ${DEST} diff --git a/README.md b/README.md index ae68497..061b030 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,24 @@ # fosdem2gemini -Convert the FOSDEM schedule to gemtext (text/gemini) format, to be served with Gemini. \ No newline at end of file +Convert the [FOSDEM](https://fosdem.org/) schedule to gemtext +(`text/gemini`) format, to be served via +[Gemini](https://en.wikipedia.org/wiki/Gemini_(protocol)). + +## Installation + +Download the program. Be sure you have the +[lxml](https://lxml.de/library. Edit sample-index.gmi to suit your +needs and rename it index.gmi. Then run the program periodically, for +instance through cron: + +``` +# FOSDEM +26 1,5,9,13,17,21 * * * (cd /path/to/FOSDEM; make all) +``` + +## Technical details + +The FOSDEM schedule is publically available (from the schedule +management system Pentabarf) as a XML file (whch we can retrieve with, +for instance, `wget https://fosdem.org/2021/schedule/xml`). The use of +a structured format allows us to convert it easily. diff --git a/sample-index.gmi b/sample-index.gmi new file mode 100644 index 0000000..4e5240f --- /dev/null +++ b/sample-index.gmi @@ -0,0 +1,14 @@ +# FOSDEM schedule + +A non-official Gemini version of FOSDEM schedule for the 2021 "COVID edition". + +=> allevents.gmi All events, sorted by date and time +=> allrooms.gmi All "rooms", with their events + +=> https://fosdem.org/ FOSDEM official page +=> https://fosdem.org/2021/schedule/ FOSDEM official schedule + +The schedule here is automatically produced from the FOSDEM "Pentabarf" files. + +This service is maintained by Stéphane Bortzmeyer . + diff --git a/schedule2gemtext.py b/schedule2gemtext.py new file mode 100755 index 0000000..9149bdf --- /dev/null +++ b/schedule2gemtext.py @@ -0,0 +1,235 @@ +#!/usr/bin/python3 + +# https://fosdem.org/2021/schedule/xml +FILE = "schedule.xml" +INDEX = "index.gmi" +ALLEVENTS = "allevents.gmi" +ALLROOMS = "allrooms.gmi" + +import xml.etree.ElementTree as ET +import sys + +# https://lxml.de/ +from lxml.html.clean import Cleaner +from lxml.html.soupparser import fromstring + +def html2gemini(html): + result = "" + for child in html: + if child.tag == "p": + if child.text != None: + result += child.text + elif child.tag == "div": + result += html2gemini(child) + elif child.tag == "hr": + result += "\n" + elif child.tag == "h1": + result += "# %s" % child.text + elif child.tag == "h2": + result += "## %s" % child.text + elif child.tag == "h3": + result += "### %s" % child.text + elif child.tag == "ul" or child.tag == "ol": + result += html2gemini(child) + elif child.tag == "li": + if child.text != None: + result += """ +* %s +""" % child.text + elif child.tag == "code": + result += child.text + elif child.tag == "pre": + result += """ +``` +%s +``` +""" % html2gemini(child) + else: + print("Unknown tag %s" % child.tag, file=sys.stderr) + if child.tag == "div" or child.tag == "p" or child.tag == "ul" or child.tag == "ol": + result += """ + +""" + return result + +def cleanroom(name): + components = name.split(".") + first_word = components[1][0].upper() + components[1][1:] + if len(components) == 2: + return first_word + elif len(components) == 3: + return first_word + " " + components[2] + else: + return first_word + " " + " ".join(components[2:]) + +def typeroom(name): + components = name.split(".") + if components[0] == "S": + return "Stand" + elif components[0] in ("D", "F", "M", "B"): + return "Room" + elif components[0] == "K": + return "Keynote" + elif components[0] == "I": # Infodesk + return "" + elif components[0] == "L": # Lightning talks + return "" + else: + return "Unknown" + +def matrixroom(name): + ptype = "" + if typeroom(name) == "Stand": + ptype = "-stand" + return "#%s%s:fosdem.org" % (cleanroom(name).lower(), ptype) + +def shortroom(name): + components = name.split(".") + return "".join(components).lower() + +def events_of(room): + result = [] + for event in events: + if events[event]["room"] == room: + result.append(event) + return result + +tree = ET.parse(FILE) +root = tree.getroot() +days = {} +rooms = {} +events = {} +# https://lxml.de/api/lxml.html.clean.Cleaner-class.html +html_cleaner = Cleaner(scripts=True, javascript=True, embedded=True, + meta=True, page_structure=True, links=True, + remove_unknown_tags=True, + frames=True,annoying_tags=True,add_nofollow=True, + style=False) +for day in root.findall("day"): + days[day.attrib["index"]] = {"date": day.attrib["date"]} + for room in day.findall("room"): + rooms[room.attrib["name"]] = {} + for event in room.findall("event"): + speakers = "" + first = True + for speaker in event.findall("persons/person"): + if not first: + speakers += " and " + speakers += speaker.text + first = False + id = event.attrib["id"] + if event.find("abstract").text is None: + abstract = None + else: + abstract = html_cleaner.clean_html(event.find("abstract").text) + events[id] = {"room": room.attrib["name"], + "day": day.attrib["index"], + "start": event.find("start").text, + "duration": event.find("duration").text, + "title": event.find("title").text, + "subtitle": event.find("subtitle").text, + "type": event.find("type").text, + "abstract": abstract, + "speakers": speakers + } + eventfile = open("event-%s.gmi" % event.attrib["id"], "w") + if events[id]["subtitle"] is None: + subtitle = "" + else: + subtitle = """ +%s +""" % events[id]["subtitle"] + if events[id]["abstract"] is None: + abstract = "" + else: + abstract_root = fromstring(events[id]["abstract"]) + abstract = """ +%s +""" % html2gemini(abstract_root) + if events[id]["speakers"] == "": + speakers = "" + else: + speakers = events[id]["speakers"] + print("""# FOSDEM event "%s" + +%s +Type %s +%s +Starts on day %s (%s) at %s (Brussels time, UTC+1) in room %s (duration %s) +Matrix room %s +%s + +=> . FOSDEM schedule page +""" % (events[id]["title"], speakers, events[id]["type"], + subtitle, events[id]["day"], days[events[id]["day"]]["date"], + events[id]["start"], cleanroom(events[id]["room"]), events[id]["duration"], + matrixroom(events[id]["room"]), + abstract), + file=eventfile) + eventfile.close() +sortedevents = sorted(events.keys(), + key=lambda x: "%s %s" % (events[x]["day"], events[x]["start"]), + reverse=False) +allevents = open(ALLEVENTS, "w") +print("""# All events at FOSDEM +""", file=allevents) +day = None +for event in sortedevents: + if typeroom(events[event]["room"]) not in ["Room", "Keynote"]: + continue + if events[event]["day"] != day: + day = events[event]["day"] + print("""## Day %s (%s) + +""" % (events[event]["day"], days[events[event]["day"]]["date"]), file=allevents) + if events[event]["subtitle"] is None: + subtitle = "" + else: + subtitle = """%s +""" % events[event]["subtitle"] + if events[event]["speakers"] == "": + speakers = "" + else: + speakers = "By %s" % events[event]["speakers"] + print("""### %s +%s +%s +Starts at %s (Brussels time, UTC+1) in room %s + +=> event-%s.gmi Details on the event + +""" % (events[event]["title"], speakers, + subtitle, + events[event]["start"], cleanroom(events[event]["room"]), event), + file=allevents) +print(""" + +=> . FOSDEM schedule page +""", file=allevents) +allevents.close() +sortedrooms = sorted(rooms.keys(), + key=lambda x: cleanroom(x), + reverse=False) +allrooms = open(ALLROOMS, "w") +print("""# All rooms at FOSDEM +""", file=allrooms) +day = None +for room in sortedrooms: + print("""## %s "%s" + +""" % (typeroom(room), cleanroom(room)), + file=allrooms) + room_events = events_of(room) + for event in room_events: + print("""=> event-%s.gmi %s""" % (event, events[event]["title"]), file=allrooms) + print(""" + +Matrix room : %s + +=> https://fosdem.org/2021/schedule/room/%s/ Description of this room on FOSDEM site +""" % (matrixroom(room), shortroom(room)), file=allrooms) +print(""" +=> . FOSDEM schedule page +""", file=allrooms) +allrooms.close() +