First code import

This commit is contained in:
Stephane Bortzmeyer 2021-03-27 14:23:16 +01:00
parent 5d706a10b8
commit 454d05cb7a
4 changed files with 283 additions and 1 deletions

12
Makefile Normal file
View File

@ -0,0 +1,12 @@
DEST=/var/gemini/fosdem/
all: updateinstall
updateinstall: update install
update:
wget --quiet --output-document schedule.xml https://fosdem.org/2021/schedule/xml
./schedule2gemtext.py
install:
rsync -a -v -p --exclude=Makefile --exclude="*~" *.gmi ${DEST}

View File

@ -1,3 +1,24 @@
# fosdem2gemini
Convert the FOSDEM schedule to gemtext (text/gemini) format, to be served with Gemini.
Convert the [FOSDEM](https://fosdem.org/) schedule to gemtext
(`text/gemini`) format, to be served via
[Gemini](https://en.wikipedia.org/wiki/Gemini_(protocol)).
## Installation
Download the program. Be sure you have the
[lxml](https://lxml.de/library. Edit sample-index.gmi to suit your
needs and rename it index.gmi. Then run the program periodically, for
instance through cron:
```
# FOSDEM
26 1,5,9,13,17,21 * * * (cd /path/to/FOSDEM; make all)
```
## Technical details
The FOSDEM schedule is publically available (from the schedule
management system Pentabarf) as a XML file (whch we can retrieve with,
for instance, `wget https://fosdem.org/2021/schedule/xml`). The use of
a structured format allows us to convert it easily.

14
sample-index.gmi Normal file
View File

@ -0,0 +1,14 @@
# FOSDEM schedule
A non-official Gemini version of FOSDEM schedule for the 2021 "COVID edition".
=> allevents.gmi All events, sorted by date and time
=> allrooms.gmi All "rooms", with their events
=> https://fosdem.org/ FOSDEM official page
=> https://fosdem.org/2021/schedule/ FOSDEM official schedule
The schedule here is automatically produced from the FOSDEM "Pentabarf" files.
This service is maintained by Stéphane Bortzmeyer <stephane+fosdem@bortzmeyer.org>.

235
schedule2gemtext.py Executable file
View File

@ -0,0 +1,235 @@
#!/usr/bin/python3
# https://fosdem.org/2021/schedule/xml
FILE = "schedule.xml"
INDEX = "index.gmi"
ALLEVENTS = "allevents.gmi"
ALLROOMS = "allrooms.gmi"
import xml.etree.ElementTree as ET
import sys
# https://lxml.de/
from lxml.html.clean import Cleaner
from lxml.html.soupparser import fromstring
def html2gemini(html):
result = ""
for child in html:
if child.tag == "p":
if child.text != None:
result += child.text
elif child.tag == "div":
result += html2gemini(child)
elif child.tag == "hr":
result += "\n"
elif child.tag == "h1":
result += "# %s" % child.text
elif child.tag == "h2":
result += "## %s" % child.text
elif child.tag == "h3":
result += "### %s" % child.text
elif child.tag == "ul" or child.tag == "ol":
result += html2gemini(child)
elif child.tag == "li":
if child.text != None:
result += """
* %s
""" % child.text
elif child.tag == "code":
result += child.text
elif child.tag == "pre":
result += """
```
%s
```
""" % html2gemini(child)
else:
print("Unknown tag %s" % child.tag, file=sys.stderr)
if child.tag == "div" or child.tag == "p" or child.tag == "ul" or child.tag == "ol":
result += """
"""
return result
def cleanroom(name):
components = name.split(".")
first_word = components[1][0].upper() + components[1][1:]
if len(components) == 2:
return first_word
elif len(components) == 3:
return first_word + " " + components[2]
else:
return first_word + " " + " ".join(components[2:])
def typeroom(name):
components = name.split(".")
if components[0] == "S":
return "Stand"
elif components[0] in ("D", "F", "M", "B"):
return "Room"
elif components[0] == "K":
return "Keynote"
elif components[0] == "I": # Infodesk
return ""
elif components[0] == "L": # Lightning talks
return ""
else:
return "Unknown"
def matrixroom(name):
ptype = ""
if typeroom(name) == "Stand":
ptype = "-stand"
return "#%s%s:fosdem.org" % (cleanroom(name).lower(), ptype)
def shortroom(name):
components = name.split(".")
return "".join(components).lower()
def events_of(room):
result = []
for event in events:
if events[event]["room"] == room:
result.append(event)
return result
tree = ET.parse(FILE)
root = tree.getroot()
days = {}
rooms = {}
events = {}
# https://lxml.de/api/lxml.html.clean.Cleaner-class.html
html_cleaner = Cleaner(scripts=True, javascript=True, embedded=True,
meta=True, page_structure=True, links=True,
remove_unknown_tags=True,
frames=True,annoying_tags=True,add_nofollow=True,
style=False)
for day in root.findall("day"):
days[day.attrib["index"]] = {"date": day.attrib["date"]}
for room in day.findall("room"):
rooms[room.attrib["name"]] = {}
for event in room.findall("event"):
speakers = ""
first = True
for speaker in event.findall("persons/person"):
if not first:
speakers += " and "
speakers += speaker.text
first = False
id = event.attrib["id"]
if event.find("abstract").text is None:
abstract = None
else:
abstract = html_cleaner.clean_html(event.find("abstract").text)
events[id] = {"room": room.attrib["name"],
"day": day.attrib["index"],
"start": event.find("start").text,
"duration": event.find("duration").text,
"title": event.find("title").text,
"subtitle": event.find("subtitle").text,
"type": event.find("type").text,
"abstract": abstract,
"speakers": speakers
}
eventfile = open("event-%s.gmi" % event.attrib["id"], "w")
if events[id]["subtitle"] is None:
subtitle = ""
else:
subtitle = """
%s
""" % events[id]["subtitle"]
if events[id]["abstract"] is None:
abstract = ""
else:
abstract_root = fromstring(events[id]["abstract"])
abstract = """
%s
""" % html2gemini(abstract_root)
if events[id]["speakers"] == "":
speakers = ""
else:
speakers = events[id]["speakers"]
print("""# FOSDEM event "%s"
%s
Type %s
%s
Starts on day %s (%s) at %s (Brussels time, UTC+1) in room %s (duration %s)
Matrix room %s
%s
=> . FOSDEM schedule page
""" % (events[id]["title"], speakers, events[id]["type"],
subtitle, events[id]["day"], days[events[id]["day"]]["date"],
events[id]["start"], cleanroom(events[id]["room"]), events[id]["duration"],
matrixroom(events[id]["room"]),
abstract),
file=eventfile)
eventfile.close()
sortedevents = sorted(events.keys(),
key=lambda x: "%s %s" % (events[x]["day"], events[x]["start"]),
reverse=False)
allevents = open(ALLEVENTS, "w")
print("""# All events at FOSDEM
""", file=allevents)
day = None
for event in sortedevents:
if typeroom(events[event]["room"]) not in ["Room", "Keynote"]:
continue
if events[event]["day"] != day:
day = events[event]["day"]
print("""## Day %s (%s)
""" % (events[event]["day"], days[events[event]["day"]]["date"]), file=allevents)
if events[event]["subtitle"] is None:
subtitle = ""
else:
subtitle = """%s
""" % events[event]["subtitle"]
if events[event]["speakers"] == "":
speakers = ""
else:
speakers = "By %s" % events[event]["speakers"]
print("""### %s
%s
%s
Starts at %s (Brussels time, UTC+1) in room %s
=> event-%s.gmi Details on the event
""" % (events[event]["title"], speakers,
subtitle,
events[event]["start"], cleanroom(events[event]["room"]), event),
file=allevents)
print("""
=> . FOSDEM schedule page
""", file=allevents)
allevents.close()
sortedrooms = sorted(rooms.keys(),
key=lambda x: cleanroom(x),
reverse=False)
allrooms = open(ALLROOMS, "w")
print("""# All rooms at FOSDEM
""", file=allrooms)
day = None
for room in sortedrooms:
print("""## %s "%s"
""" % (typeroom(room), cleanroom(room)),
file=allrooms)
room_events = events_of(room)
for event in room_events:
print("""=> event-%s.gmi %s""" % (event, events[event]["title"]), file=allrooms)
print("""
Matrix room : %s
=> https://fosdem.org/2021/schedule/room/%s/ Description of this room on FOSDEM site
""" % (matrixroom(room), shortroom(room)), file=allrooms)
print("""
=> . FOSDEM schedule page
""", file=allrooms)
allrooms.close()