#!/usr/bin/python3 # https://fosdem.org/2021/schedule/xml FILE = "schedule.xml" INDEX = "index.gmi" ALLEVENTS = "allevents.gmi" ALLROOMS = "allrooms.gmi" import xml.etree.ElementTree as ET import sys # https://lxml.de/ from lxml.html.clean import Cleaner from lxml.html.soupparser import fromstring def html2gemini(html): result = "" for child in html: if child.tag == "p": if child.text != None: result += child.text elif child.tag == "div": result += html2gemini(child) elif child.tag == "hr": result += "\n" elif child.tag == "h1": result += "# %s" % child.text elif child.tag == "h2": result += "## %s" % child.text elif child.tag == "h3": result += "### %s" % child.text elif child.tag == "ul" or child.tag == "ol": result += html2gemini(child) elif child.tag == "li": if child.text != None: result += """ * %s """ % child.text elif child.tag == "code": result += child.text elif child.tag == "pre": result += """ ``` %s ``` """ % html2gemini(child) else: print("Unknown tag %s" % child.tag, file=sys.stderr) if child.tag == "div" or child.tag == "p" or child.tag == "ul" or child.tag == "ol": result += """ """ return result def cleanroom(name): components = name.split(".") first_word = components[1][0].upper() + components[1][1:] if len(components) == 2: return first_word elif len(components) == 3: return first_word + " " + components[2] else: return first_word + " " + " ".join(components[2:]) def typeroom(name): components = name.split(".") if components[0] == "S": return "Stand" elif components[0] in ("D", "F", "M", "B"): return "Room" elif components[0] == "K": return "Keynote" elif components[0] == "I": # Infodesk return "" elif components[0] == "L": # Lightning talks return "" else: return "Unknown" def matrixroom(name): ptype = "" if typeroom(name) == "Stand": ptype = "-stand" return "#%s%s:fosdem.org" % (cleanroom(name).lower(), ptype) def shortroom(name): components = name.split(".") return "".join(components).lower() def events_of(room): result = [] for event in events: if events[event]["room"] == room: result.append(event) return result tree = ET.parse(FILE) root = tree.getroot() days = {} rooms = {} events = {} # https://lxml.de/api/lxml.html.clean.Cleaner-class.html html_cleaner = Cleaner(scripts=True, javascript=True, embedded=True, meta=True, page_structure=True, links=True, remove_unknown_tags=True, frames=True,annoying_tags=True,add_nofollow=True, style=False) for day in root.findall("day"): days[day.attrib["index"]] = {"date": day.attrib["date"]} for room in day.findall("room"): rooms[room.attrib["name"]] = {} for event in room.findall("event"): speakers = "" first = True for speaker in event.findall("persons/person"): if not first: speakers += " and " speakers += speaker.text first = False id = event.attrib["id"] if event.find("abstract").text is None: abstract = None else: abstract = html_cleaner.clean_html(event.find("abstract").text) events[id] = {"room": room.attrib["name"], "day": day.attrib["index"], "start": event.find("start").text, "duration": event.find("duration").text, "title": event.find("title").text, "subtitle": event.find("subtitle").text, "type": event.find("type").text, "abstract": abstract, "speakers": speakers } eventfile = open("event-%s.gmi" % event.attrib["id"], "w") if events[id]["subtitle"] is None: subtitle = "" else: subtitle = """ %s """ % events[id]["subtitle"] if events[id]["abstract"] is None: abstract = "" else: abstract_root = fromstring(events[id]["abstract"]) abstract = """ %s """ % html2gemini(abstract_root) if events[id]["speakers"] == "": speakers = "" else: speakers = events[id]["speakers"] print("""# FOSDEM event "%s" %s Type %s %s Starts on day %s (%s) at %s (Brussels time, UTC+1) in room %s (duration %s) Matrix room %s %s => . FOSDEM schedule page """ % (events[id]["title"], speakers, events[id]["type"], subtitle, events[id]["day"], days[events[id]["day"]]["date"], events[id]["start"], cleanroom(events[id]["room"]), events[id]["duration"], matrixroom(events[id]["room"]), abstract), file=eventfile) eventfile.close() sortedevents = sorted(events.keys(), key=lambda x: "%s %s" % (events[x]["day"], events[x]["start"]), reverse=False) allevents = open(ALLEVENTS, "w") print("""# All events at FOSDEM """, file=allevents) day = None for event in sortedevents: if typeroom(events[event]["room"]) not in ["Room", "Keynote"]: continue if events[event]["day"] != day: day = events[event]["day"] print("""## Day %s (%s) """ % (events[event]["day"], days[events[event]["day"]]["date"]), file=allevents) if events[event]["subtitle"] is None: subtitle = "" else: subtitle = """%s """ % events[event]["subtitle"] if events[event]["speakers"] == "": speakers = "" else: speakers = "By %s" % events[event]["speakers"] print("""### %s %s %s Starts at %s (Brussels time, UTC+1) in room %s => event-%s.gmi Details on the event """ % (events[event]["title"], speakers, subtitle, events[event]["start"], cleanroom(events[event]["room"]), event), file=allevents) print(""" => . FOSDEM schedule page """, file=allevents) allevents.close() sortedrooms = sorted(rooms.keys(), key=lambda x: cleanroom(x), reverse=False) allrooms = open(ALLROOMS, "w") print("""# All rooms at FOSDEM """, file=allrooms) day = None for room in sortedrooms: print("""## %s "%s" """ % (typeroom(room), cleanroom(room)), file=allrooms) room_events = events_of(room) for event in room_events: print("""=> event-%s.gmi %s""" % (event, events[event]["title"]), file=allrooms) print(""" Matrix room : %s => https://fosdem.org/2021/schedule/room/%s/ Description of this room on FOSDEM site """ % (matrixroom(room), shortroom(room)), file=allrooms) print(""" => . FOSDEM schedule page """, file=allrooms) allrooms.close()