#!/usr/bin/env python3 import sys import re import time import xml.etree.ElementTree as ET INPUT = "/var/gemini/rfc-mirror/rfc-index.xml" OUTPUT = "/var/gemini/rfc-mirror/rfc-index.gmi.tmp" tree = ET.parse(INPUT) output = open(OUTPUT, "w") root = tree.getroot() rfcs = {} for child in root: if child.tag == "{http://www.rfc-editor.org/rfc-index}rfc-entry": num = None for subchild in child: if subchild.tag == "{http://www.rfc-editor.org/rfc-index}doc-id": match = re.match("^RFC([0-9]+)", subchild.text) if match: num = int(match.group(1)) rfcs[num] = {} else: print(" without a RFC: %s" % subchild.text, file=sys.stderr) elif subchild.tag == "{http://www.rfc-editor.org/rfc-index}title": if num is not None: rfcs[num]['title'] = subchild.text print("# Index of RFCs", file=output) print(""" This index was built on %s from the rfc-index.xml file found at rsync::ftp.rfc-editor.org::rfcs-text-only. => https://forge.chapril.org/bortzmeyer/rfc2gemini Source code of the conversion program """ % time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.time())), file=output) for rfc in sorted(rfcs.keys(), reverse=True): print("=> /rfc-mirror/rfc%s.txt RFC %s \"%s\"" % (rfc, rfc, rfcs[rfc]['title']), file=output) output.close()