Web-LangTag/registries/lsr2atom.py

#!/usr/bin/env python3

__version__ = "BETA"
domain = "langtag.net"
tag_prefix = "tag:%s,2007-05:LSR" % domain

# Standard library
import sys
import urllib.request, urllib.parse, urllib.error
# Now a standard part of Python >= 2.5
import xml.etree.ElementTree as ET
import datetime

# External libraries
import psycopg2

max = 30

db_module = psycopg2

def process_type(tree, type="language"):
    request = ("SELECT code,description, added FROM %ss_with_descr" % type) + \
              " ORDER BY added DESC LIMIT %(max)s"
    cursor.execute(
        request,
        {'max': max})
    for tuplee in cursor.fetchall():
        code = tuplee[0]
        description = tuplee[1]
        added = tuplee[2]
        utype = type.capitalize()
        entry = ET.SubElement(tree, "entry")
        title = ET.SubElement(entry, "title")
        title.text = "%s: %s" % (utype, description)
        entry_id = ET.SubElement(entry, "id")
        entry_id.text = tag_prefix + "/" + urllib.parse.quote_plus("%s %s" % (type, code))
        published = ET.SubElement(entry, "published")
        published.text = added.strftime("%Y-%m-%dT00:00:00Z")
        # TODO: records in the LSR are sometimes updated but it is not obvious to see it,
        # since there is only an "Added" field.
        updated = ET.SubElement(entry, "updated")
        updated.text = published.text
        category = ET.SubElement(entry, "category")
        category.attrib["scheme"] = tag_prefix
        category.attrib["term"] = type
        category.attrib["label"] = utype
        link = ET.SubElement(entry, "link")
        link.attrib["rel"] = "alternate"
        link.attrib["href"] = "http://www.%s/registries/registry-html/%s/%s.html" % \
                              (domain, type, code)
        content = ET.SubElement(entry, "content")
        content.attrib["type"] = "text"
        content.text = """
        %s

        %s

        %s

        Added on %s
        """ % (type, code, description, added.strftime("%Y-%m-%d"))
        # TODO: an alternate Content in HTML?
        
connection = db_module.connect("dbname=lsr")
cursor = connection.cursor()

feed = ET.Element("feed")
feed.attrib["xmlns"] = "http://www.w3.org/2005/Atom"
title = ET.SubElement(feed, "title")
title.text = "Language Tag Registry syndication feed"
updated = ET.SubElement(feed, "updated")
updated.text = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:00Z")
link_html = ET.SubElement(feed, "link")
link_html.attrib["rel"] = "alternate"
link_html.attrib["type"] = "text/html"
link_html.attrib["href"] = "http://www.%s/" % domain
link_self = ET.SubElement(feed, "link")
link_self.attrib["rel"] = "self"
link_self.attrib["type"] = "application/atom+xml"
link_self.attrib["href"] = "http://www.%s/registries/lsr.atom" % domain
author = ET.SubElement(feed, "author")
name = ET.SubElement(author, "name")
name.text = "Stephane Bortzmeyer"
email = ET.SubElement(author, "email")
email.text = "webmaster@langtag.net"
feed_id = ET.SubElement(feed, "id")
feed_id.text = tag_prefix
generator = ET.SubElement(feed, "generator")
generator.text = "%s %s running with Python %s" % \
             ("lsr2atom", __version__, sys.version.split()[0])

process_type(feed, "language")
process_type(feed, "variant")
process_type(feed, "script")
process_type(feed, "region")
process_type(feed, "extlang")
cursor.close()
connection.close()
print("<?xml version=\"1.0\" encoding=\"utf-8\"?>")
print(ET.tostring(feed, encoding="UTF-8").decode())
Initial import 2023-06-09 10:02:30 +02:00			`#!/usr/bin/env python3`

			`__version__ = "BETA"`
			`domain = "langtag.net"`
			`tag_prefix = "tag:%s,2007-05:LSR" % domain`

Syndication feed fixed 2023-08-03 16:40:18 +02:00			`# Standard library`
Initial import 2023-06-09 10:02:30 +02:00			`import sys`
			`import urllib.request, urllib.parse, urllib.error`
Syndication feed fixed 2023-08-03 16:40:18 +02:00			`# Now a standard part of Python >= 2.5`
			`import xml.etree.ElementTree as ET`
			`import datetime`

			`# External libraries`
Initial import 2023-06-09 10:02:30 +02:00			`import psycopg2`

More entries in the feed 2023-08-04 10:46:52 +02:00			`max = 30`
Initial import 2023-06-09 10:02:30 +02:00
			`db_module = psycopg2`

			`def process_type(tree, type="language"):`
			`request = ("SELECT code,description, added FROM %ss_with_descr" % type) + \`
			`" ORDER BY added DESC LIMIT %(max)s"`
			`cursor.execute(`
			`request,`
			`{'max': max})`
			`for tuplee in cursor.fetchall():`
			`code = tuplee[0]`
			`description = tuplee[1]`
			`added = tuplee[2]`
			`utype = type.capitalize()`
			`entry = ET.SubElement(tree, "entry")`
			`title = ET.SubElement(entry, "title")`
			`title.text = "%s: %s" % (utype, description)`
			`entry_id = ET.SubElement(entry, "id")`
			`entry_id.text = tag_prefix + "/" + urllib.parse.quote_plus("%s %s" % (type, code))`
			`published = ET.SubElement(entry, "published")`
			`published.text = added.strftime("%Y-%m-%dT00:00:00Z")`
			`# TODO: records in the LSR are sometimes updated but it is not obvious to see it,`
			`# since there is only an "Added" field.`
			`updated = ET.SubElement(entry, "updated")`
			`updated.text = published.text`
			`category = ET.SubElement(entry, "category")`
			`category.attrib["scheme"] = tag_prefix`
			`category.attrib["term"] = type`
			`category.attrib["label"] = utype`
			`link = ET.SubElement(entry, "link")`
			`link.attrib["rel"] = "alternate"`
			`link.attrib["href"] = "http://www.%s/registries/registry-html/%s/%s.html" % \`
			`(domain, type, code)`
			`content = ET.SubElement(entry, "content")`
			`content.attrib["type"] = "text"`
			`content.text = """`
			`%s`

			`%s`

			`%s`

			`Added on %s`
			`""" % (type, code, description, added.strftime("%Y-%m-%d"))`
			`# TODO: an alternate Content in HTML?`

			`connection = db_module.connect("dbname=lsr")`
			`cursor = connection.cursor()`

			`feed = ET.Element("feed")`
			`feed.attrib["xmlns"] = "http://www.w3.org/2005/Atom"`
			`title = ET.SubElement(feed, "title")`
			`title.text = "Language Tag Registry syndication feed"`
			`updated = ET.SubElement(feed, "updated")`
Syndication feed fixed 2023-08-03 16:40:18 +02:00			`updated.text = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:00Z")`
Initial import 2023-06-09 10:02:30 +02:00			`link_html = ET.SubElement(feed, "link")`
			`link_html.attrib["rel"] = "alternate"`
			`link_html.attrib["type"] = "text/html"`
			`link_html.attrib["href"] = "http://www.%s/" % domain`
			`link_self = ET.SubElement(feed, "link")`
			`link_self.attrib["rel"] = "self"`
			`link_self.attrib["type"] = "application/atom+xml"`
			`link_self.attrib["href"] = "http://www.%s/registries/lsr.atom" % domain`
			`author = ET.SubElement(feed, "author")`
			`name = ET.SubElement(author, "name")`
			`name.text = "Stephane Bortzmeyer"`
			`email = ET.SubElement(author, "email")`
			`email.text = "webmaster@langtag.net"`
			`feed_id = ET.SubElement(feed, "id")`
			`feed_id.text = tag_prefix`
			`generator = ET.SubElement(feed, "generator")`
			`generator.text = "%s %s running with Python %s" % \`
			`("lsr2atom", __version__, sys.version.split()[0])`

			`process_type(feed, "language")`
			`process_type(feed, "variant")`
			`process_type(feed, "script")`
			`process_type(feed, "region")`
			`process_type(feed, "extlang")`
			`cursor.close()`
			`connection.close()`
Syndication feed fixed 2023-08-03 16:40:18 +02:00			`print("<?xml version=\"1.0\" encoding=\"utf-8\"?>")`
			`print(ET.tostring(feed, encoding="UTF-8").decode())`