Web-LangTag/registries/lsr2atom.py

#!/usr/bin/env python3

__version__ = "BETA"
domain = "langtag.net"
tag_prefix = "tag:%s,2007-05:LSR" % domain

# Standard library
import sys
import urllib.request, urllib.parse, urllib.error
# Now a standard part of Python >= 2.5
import xml.etree.ElementTree as ET
import datetime

# External libraries
import psycopg2

max = 10

db_module = psycopg2

def process_type(tree, type="language"):
    request = ("SELECT code,description, added FROM %ss_with_descr" % type) + \
              " ORDER BY added DESC LIMIT %(max)s"
    cursor.execute(
        request,
        {'max': max})
    for tuplee in cursor.fetchall():
        code = tuplee[0]
        description = tuplee[1]
        added = tuplee[2]
        utype = type.capitalize()
        entry = ET.SubElement(tree, "entry")
        title = ET.SubElement(entry, "title")
        title.text = "%s: %s" % (utype, description)
        entry_id = ET.SubElement(entry, "id")
        entry_id.text = tag_prefix + "/" + urllib.parse.quote_plus("%s %s" % (type, code))
        published = ET.SubElement(entry, "published")
        published.text = added.strftime("%Y-%m-%dT00:00:00Z")
        # TODO: records in the LSR are sometimes updated but it is not obvious to see it,
        # since there is only an "Added" field.
        updated = ET.SubElement(entry, "updated")
        updated.text = published.text
        category = ET.SubElement(entry, "category")
        category.attrib["scheme"] = tag_prefix
        category.attrib["term"] = type
        category.attrib["label"] = utype
        link = ET.SubElement(entry, "link")
        link.attrib["rel"] = "alternate"
        link.attrib["href"] = "http://www.%s/registries/registry-html/%s/%s.html" % \
                              (domain, type, code)
        content = ET.SubElement(entry, "content")
        content.attrib["type"] = "text"
        content.text = """
        %s

        %s

        %s

        Added on %s
        """ % (type, code, description, added.strftime("%Y-%m-%d"))
        # TODO: an alternate Content in HTML?

connection = db_module.connect("dbname=lsr")
cursor = connection.cursor()

feed = ET.Element("feed")
feed.attrib["xmlns"] = "http://www.w3.org/2005/Atom"
title = ET.SubElement(feed, "title")
title.text = "Language Tag Registry syndication feed"
updated = ET.SubElement(feed, "updated")
updated.text = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:00Z")
link_html = ET.SubElement(feed, "link")
link_html.attrib["rel"] = "alternate"
link_html.attrib["type"] = "text/html"
link_html.attrib["href"] = "http://www.%s/" % domain
link_self = ET.SubElement(feed, "link")
link_self.attrib["rel"] = "self"
link_self.attrib["type"] = "application/atom+xml"
link_self.attrib["href"] = "http://www.%s/registries/lsr.atom" % domain
author = ET.SubElement(feed, "author")
name = ET.SubElement(author, "name")
name.text = "Stephane Bortzmeyer"
email = ET.SubElement(author, "email")
email.text = "webmaster@langtag.net"
feed_id = ET.SubElement(feed, "id")
feed_id.text = tag_prefix
generator = ET.SubElement(feed, "generator")
generator.text = "%s %s running with Python %s" % \
             ("lsr2atom", __version__, sys.version.split()[0])

process_type(feed, "language")
process_type(feed, "variant")
process_type(feed, "script")
process_type(feed, "region")
process_type(feed, "extlang")
cursor.close()
connection.close()
print("<?xml version=\"1.0\" encoding=\"utf-8\"?>")
print(ET.tostring(feed, encoding="UTF-8").decode())