diff --git a/INSTALL b/INSTALL deleted file mode 100644 index 3632321..0000000 --- a/INSTALL +++ /dev/null @@ -1,4 +0,0 @@ -Pre-requisites on Debian: - -apt install ghc libghc-hunit-dev libghc-regex-compat-tdfa-dev - diff --git a/INSTALL.md b/INSTALL.md new file mode 100644 index 0000000..9c6c7af --- /dev/null +++ b/INSTALL.md @@ -0,0 +1,22 @@ +# Installation + +GaBuZoMeu is written in [Haskell](https://www.haskell.org/) so you will need an Haskell compiler. Then just type `make all`. + +To see if everything is fine, `make test`. + +## Pre-requisites on Debian operating system + +``` +apt install ghc libghc-hunit-dev libghc-regex-compat-tdfa-dev +``` + +## Bug reports + +On [the forge](https://forge.chapril.org/bortzmeyer/GaBuZoMeu/issues). + +## Creating the database + +If you want to create a database to insert the language subtag +registry (with the `registry2postgresql` or `registry2sqlite` +programs), see the creation scripts under `./SQL/`. + diff --git a/README.md b/README.md index 33fe53b..9f89519 100644 --- a/README.md +++ b/README.md @@ -5,4 +5,20 @@ language tag registry (see RFC 5646). ## Installation -See INSTALL +See `INSTALL.md`. + +## Using with the official language tag registry + +``` +wget https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry +``` + +## Licence + +Free software (BSD 3-clause), see `LICENSE`. + +## Author + +Stéphane Bortzmeyer +stephane+gabuzomeu@bortzmeyer.org + diff --git a/SQL/PostgreSQL/create-db-subtag.sql b/SQL/PostgreSQL/create-db-subtag.sql new file mode 100644 index 0000000..1842c62 --- /dev/null +++ b/SQL/PostgreSQL/create-db-subtag.sql @@ -0,0 +1,186 @@ +DROP VIEW Languages_with_descr; +DROP VIEW Languages_with_all_descr; +DROP VIEW Extlangs_with_descr; +DROP VIEW Extlangs_with_all_descr; +DROP VIEW Scripts_with_descr; +DROP VIEW Scripts_with_all_descr; +DROP VIEW Variants_with_descr; +DROP VIEW Variants_with_all_descr; +DROP VIEW Regions_with_descr; +DROP VIEW Regions_with_all_descr; +DROP TABLE Descriptions_languages; +DROP TABLE Descriptions_extlangs; +DROP TABLE Descriptions_scripts; +DROP TABLE Descriptions_regions; +DROP TABLE Descriptions_variants; +--DROP TABLE Descriptions_extensions; +DROP TABLE Descriptions; +DROP TABLE Prefixes; +DROP TABLE Languages; +DROP TABLE Extlangs; +DROP TABLE Scripts; +DROP TABLE Regions; +DROP TABLE Variants; +DROP TABLE Extensions; + +DROP AGGREGATE concatenate(text); + +CREATE TABLE Scripts ( + id SERIAL PRIMARY KEY NOT NULL, + code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint? + added DATE, + comments TEXT); + +CREATE TABLE Languages ( + id SERIAL PRIMARY KEY NOT NULL, + code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint? + suppressscript TEXT REFERENCES Scripts(code), + preferredvalue TEXT, -- Yes, it should REFERENCES Languages(code), but + -- there is a chicken-and-egg problem. + added DATE, + comments TEXT); + +CREATE TABLE Extlangs ( + id SERIAL PRIMARY KEY NOT NULL, + code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint? + added DATE, + prefix TEXT, -- references Languages(code)? Can a prefix be longer (a language and a script, for instance)? + comments TEXT); + +CREATE TABLE Regions ( + id SERIAL PRIMARY KEY NOT NULL, + code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint? + added DATE, + deprecated DATE, + comments TEXT); + +CREATE TABLE Variants ( + id SERIAL PRIMARY KEY NOT NULL, + code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint? + -- TODO: prefixes + added DATE, + comments TEXT); + +CREATE TABLE Extensions ( + id SERIAL PRIMARY KEY NOT NULL, + code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint? + added DATE, + comments TEXT); + +CREATE TABLE Descriptions ( + id SERIAL PRIMARY KEY NOT NULL, + description TEXT NOT NULL); + +CREATE TABLE Descriptions_Languages ( + description INTEGER NOT NULL REFERENCES Descriptions(id), + lang TEXT NOT NULL REFERENCES Languages(code)); + +CREATE TABLE Descriptions_Extlangs ( + description INTEGER NOT NULL REFERENCES Descriptions(id), + extlang TEXT NOT NULL REFERENCES Extlangs(code)); + +CREATE TABLE Descriptions_Scripts ( + description INTEGER NOT NULL REFERENCES Descriptions(id), + script TEXT NOT NULL REFERENCES Scripts(code)); + +CREATE TABLE Descriptions_Regions ( + description INTEGER NOT NULL REFERENCES Descriptions(id), + region TEXT NOT NULL REFERENCES Regions(code)); + +CREATE TABLE Descriptions_Variants ( + description INTEGER NOT NULL REFERENCES Descriptions(id), + variant TEXT NOT NULL REFERENCES Variants(code)); + +CREATE TABLE Prefixes ( + variant TEXT NOT NULL REFERENCES Variants(code), + prefix TEXT NOT NULL REFERENCES Languages(code) +); + +CREATE OR REPLACE FUNCTION concat2(text, text) RETURNS text AS ' + SELECT CASE WHEN $1 IS NULL OR $1 = '''' THEN $2 + WHEN $2 IS NULL OR $2 = '''' THEN $1 + ELSE $1 || '' / '' || $2 + END; + ' + LANGUAGE SQL; + +CREATE AGGREGATE concatenate ( + sfunc = concat2, + basetype = text, + stype = text, + initcond = '' +); + +CREATE VIEW Languages_with_all_descr AS + SELECT l.code, l.added, l.suppressscript, d.description + FROM languages l,descriptions d,descriptions_languages WHERE + descriptions_languages.lang = l.code AND + descriptions_languages.description = d.id; + +CREATE VIEW Languages_with_descr AS + SELECT l.code, l.added, l.suppressscript, + concatenate(d.description) as description FROM languages l, descriptions d, + descriptions_languages WHERE + descriptions_languages.lang = l.code AND + descriptions_languages.description = d.id + GROUP BY l.code, l.added, l.suppressscript; + +CREATE VIEW Variants_with_all_descr AS + SELECT v.code, v.added, d.description + FROM variants v,descriptions d,descriptions_variants WHERE + descriptions_variants.variant = v.code AND + descriptions_variants.description = d.id; + +CREATE VIEW Variants_with_descr AS + SELECT v.code, v.added, + concatenate(d.description) as description FROM + variants v, descriptions d, + descriptions_variants WHERE + descriptions_variants.variant = v.code AND + descriptions_variants.description = d.id + GROUP BY v.code, v.added; + +CREATE VIEW Scripts_with_all_descr AS + SELECT s.code, s.added, d.description + FROM scripts s,descriptions d,descriptions_scripts WHERE + descriptions_scripts.script = s.code AND + descriptions_scripts.description = d.id; + +CREATE VIEW Scripts_with_descr AS + SELECT s.code, s.added, + concatenate(d.description) as description FROM scripts s, descriptions d, + descriptions_scripts WHERE + descriptions_scripts.script = s.code AND + descriptions_scripts.description = d.id + GROUP BY s.code, s.added; + +CREATE VIEW Regions_with_all_descr AS + SELECT r.code, r.added, d.description + FROM regions r,descriptions d,descriptions_regions WHERE + descriptions_regions.region = r.code AND + descriptions_regions.description = d.id; + +CREATE VIEW Regions_with_descr AS + SELECT r.code, r.added, + concatenate(d.description) as description FROM regions r, descriptions d, + descriptions_regions WHERE + descriptions_regions.region = r.code AND + descriptions_regions.description = d.id + GROUP BY r.code, r.added; + +CREATE VIEW Extlangs_with_all_descr AS + SELECT l.code, l.added, d.description + FROM extlangs l,descriptions d,descriptions_extlangs WHERE + descriptions_extlangs.extlang = l.code AND + descriptions_extlangs.description = d.id; + +CREATE VIEW Extlangs_with_descr AS + SELECT l.code, l.added, + concatenate(d.description) as description FROM extlangs l, descriptions d, + descriptions_extlangs WHERE + descriptions_extlangs.extlang = l.code AND + descriptions_extlangs.description = d.id + GROUP BY l.code, l.added; + +-- You can get all the records with stuff like: +-- select 'language', code, added from languages where added > '2005-10-16' union select 'script', code, added from scripts where added > '2005-10-16'; diff --git a/SQL/PostgreSQL/insert-test.sql b/SQL/PostgreSQL/insert-test.sql new file mode 100644 index 0000000..96ef1ce --- /dev/null +++ b/SQL/PostgreSQL/insert-test.sql @@ -0,0 +1,24 @@ +BEGIN; +INSERT INTO Languages (code, added) VALUES ('pl', '2007-01-24'); +INSERT INTO Descriptions (description) VALUES ('Polish'); +INSERT INTO Descriptions_languages (description, lang) + SELECT currval('Descriptions_id_seq'), 'pl'; +INSERT INTO Languages (code, added) VALUES ('en', '2007-01-24'); +INSERT INTO Descriptions (description) VALUES ('English'); +INSERT INTO Descriptions_languages (description, lang) + SELECT currval('Descriptions_id_seq'), 'en'; +INSERT INTO Languages (code, added) VALUES ('fr', '2007-01-24'); +INSERT INTO Descriptions (description) VALUES ('French'); +INSERT INTO Descriptions_languages (description, lang) + SELECT currval('Descriptions_id_seq'), 'fr'; +INSERT INTO Descriptions (description) VALUES ('Francais'); +INSERT INTO Descriptions_languages (description, lang) + SELECT currval('Descriptions_id_seq'), 'fr'; +INSERT INTO Languages (code, added) VALUES ('es', '2007-01-24'); +INSERT INTO Descriptions (description) VALUES ('Spanish'); +INSERT INTO Descriptions_languages (description, lang) + SELECT currval('Descriptions_id_seq'), 'es'; + +COMMIT; + + diff --git a/SQL/SQLite/create-db-subtag.sql b/SQL/SQLite/create-db-subtag.sql new file mode 100644 index 0000000..572000a --- /dev/null +++ b/SQL/SQLite/create-db-subtag.sql @@ -0,0 +1,85 @@ +-- Process with: + +-- sqlite lsr < create-db-subtag.sql + +DROP TABLE Descriptions_languages; +DROP TABLE Descriptions_scripts; +DROP TABLE Descriptions_regions; +DROP TABLE Descriptions_variants; +DROP TABLE Descriptions; +DROP TABLE Prefixes; +DROP TABLE Languages; +DROP TABLE Extlangs; +DROP TABLE Scripts; +DROP TABLE Regions; +DROP TABLE Variants; + +-- A few things to remember about SQLite: +-- +-- 1) If you declare a column as "INTEGER PRIMARY KEY", it will +-- autoincrement (http://www.sqlite.org/faq.html#q1) +-- +-- 2) Types and constraints are almost always ignored +-- (http://www.sqlite.org/datatype3.html or +-- http://www.sqlite.org/faq.html#q2) + +CREATE TABLE Scripts ( + id INTEGER PRIMARY KEY NOT NULL, + code TEXT UNIQUE NOT NULL, + added DATE, + comments TEXT); + +CREATE TABLE Languages ( + id INTEGER PRIMARY KEY NOT NULL, + code TEXT UNIQUE NOT NULL, + suppressscript TEXT REFERENCES Scripts(code), + preferredvalue TEXT, -- Yes, it should REFERENCES Languages(code), but + -- there is a chicken-and-egg problem. + added DATE, + comments TEXT); + +CREATE TABLE Extlangs ( + id SERIAL PRIMARY KEY NOT NULL, + code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint? + added DATE, + prefix TEXT, -- references Languages(code)? Can a prefix be longer (a language and a script, for instance)? + comments TEXT); + +CREATE TABLE Regions ( + id INTEGER PRIMARY KEY NOT NULL, + code TEXT UNIQUE NOT NULL, + added DATE, + deprecated DATE, + comments TEXT); + +CREATE TABLE Variants ( + id INTEGER PRIMARY KEY NOT NULL, + code TEXT UNIQUE NOT NULL, + added DATE, + comments TEXT); + +CREATE TABLE Descriptions ( + id INTEGER NOT NULL PRIMARY KEY, + description TEXT NOT NULL); + +CREATE TABLE Descriptions_Languages ( + description INTEGER NOT NULL REFERENCES Descriptions(id), + lang TEXT NOT NULL REFERENCES Languages(code)); + +CREATE TABLE Descriptions_Scripts ( + description INTEGER NOT NULL REFERENCES Descriptions(id), + script TEXT NOT NULL REFERENCES Scripts(code)); + +CREATE TABLE Descriptions_Variants ( + description INTEGER NOT NULL REFERENCES Descriptions(id), + variant TEXT NOT NULL REFERENCES Variants(code)); + +CREATE TABLE Descriptions_Regions ( + description INTEGER NOT NULL REFERENCES Descriptions(id), + region TEXT NOT NULL REFERENCES Regions(code)); + +CREATE TABLE Prefixes ( + variant TEXT NOT NULL REFERENCES Variants(code), + prefix TEXT NOT NULL REFERENCES Languages(code) +); + diff --git a/SQL/SQLite/insert-test.sql b/SQL/SQLite/insert-test.sql new file mode 100644 index 0000000..e3c36ee --- /dev/null +++ b/SQL/SQLite/insert-test.sql @@ -0,0 +1,12 @@ +INSERT INTO Languages (id, code, added) VALUES (NULL, 'en', '2007-01-24'); +INSERT INTO Descriptions (description) VALUES ('English'); +INSERT INTO Descriptions_languages (description, lang) + SELECT max(rowid), 'en' FROM Descriptions; +INSERT INTO Languages (id, code, added) VALUES (NULL, 'fr', '2007-01-24'); +INSERT INTO Descriptions (description) VALUES ('French'); +INSERT INTO Descriptions_languages (description, lang) + SELECT max(rowid), 'fr' FROM Descriptions; +INSERT INTO Descriptions (description) VALUES ('Francais'); +INSERT INTO Descriptions_languages (description, lang) + SELECT max(rowid), 'fr' FROM Descriptions; +