Web-LangTag/registries/lsr2atom.py

101 lines
3.4 KiB
Python
Raw Normal View History

2023-06-09 10:02:30 +02:00
#!/usr/bin/env python3
__version__ = "BETA"
domain = "langtag.net"
tag_prefix = "tag:%s,2007-05:LSR" % domain
2023-08-03 16:40:18 +02:00
# Standard library
2023-06-09 10:02:30 +02:00
import sys
import urllib.request, urllib.parse, urllib.error
2023-08-03 16:40:18 +02:00
# Now a standard part of Python >= 2.5
import xml.etree.ElementTree as ET
import datetime
# External libraries
2023-06-09 10:02:30 +02:00
import psycopg2
2023-08-04 10:46:52 +02:00
max = 30
2023-06-09 10:02:30 +02:00
db_module = psycopg2
def process_type(tree, type="language"):
request = ("SELECT code,description, added FROM %ss_with_descr" % type) + \
" ORDER BY added DESC LIMIT %(max)s"
cursor.execute(
request,
{'max': max})
for tuplee in cursor.fetchall():
code = tuplee[0]
description = tuplee[1]
added = tuplee[2]
utype = type.capitalize()
entry = ET.SubElement(tree, "entry")
title = ET.SubElement(entry, "title")
title.text = "%s: %s" % (utype, description)
entry_id = ET.SubElement(entry, "id")
entry_id.text = tag_prefix + "/" + urllib.parse.quote_plus("%s %s" % (type, code))
published = ET.SubElement(entry, "published")
published.text = added.strftime("%Y-%m-%dT00:00:00Z")
# TODO: records in the LSR are sometimes updated but it is not obvious to see it,
# since there is only an "Added" field.
updated = ET.SubElement(entry, "updated")
updated.text = published.text
category = ET.SubElement(entry, "category")
category.attrib["scheme"] = tag_prefix
category.attrib["term"] = type
category.attrib["label"] = utype
link = ET.SubElement(entry, "link")
link.attrib["rel"] = "alternate"
link.attrib["href"] = "http://www.%s/registries/registry-html/%s/%s.html" % \
(domain, type, code)
content = ET.SubElement(entry, "content")
content.attrib["type"] = "text"
content.text = """
%s
%s
%s
Added on %s
""" % (type, code, description, added.strftime("%Y-%m-%d"))
# TODO: an alternate Content in HTML?
connection = db_module.connect("dbname=lsr")
cursor = connection.cursor()
feed = ET.Element("feed")
feed.attrib["xmlns"] = "http://www.w3.org/2005/Atom"
title = ET.SubElement(feed, "title")
title.text = "Language Tag Registry syndication feed"
updated = ET.SubElement(feed, "updated")
2023-08-03 16:40:18 +02:00
updated.text = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:00Z")
2023-06-09 10:02:30 +02:00
link_html = ET.SubElement(feed, "link")
link_html.attrib["rel"] = "alternate"
link_html.attrib["type"] = "text/html"
link_html.attrib["href"] = "http://www.%s/" % domain
link_self = ET.SubElement(feed, "link")
link_self.attrib["rel"] = "self"
link_self.attrib["type"] = "application/atom+xml"
link_self.attrib["href"] = "http://www.%s/registries/lsr.atom" % domain
author = ET.SubElement(feed, "author")
name = ET.SubElement(author, "name")
name.text = "Stephane Bortzmeyer"
email = ET.SubElement(author, "email")
email.text = "webmaster@langtag.net"
feed_id = ET.SubElement(feed, "id")
feed_id.text = tag_prefix
generator = ET.SubElement(feed, "generator")
generator.text = "%s %s running with Python %s" % \
("lsr2atom", __version__, sys.version.split()[0])
process_type(feed, "language")
process_type(feed, "variant")
process_type(feed, "script")
process_type(feed, "region")
process_type(feed, "extlang")
cursor.close()
connection.close()
2023-08-03 16:40:18 +02:00
print("<?xml version=\"1.0\" encoding=\"utf-8\"?>")
print(ET.tostring(feed, encoding="UTF-8").decode())