* More documentation

* Database creation scripts
This commit is contained in:
Stephane Bortzmeyer 2023-06-09 09:21:21 +02:00
parent 1e2ca40440
commit c881cde89c
7 changed files with 346 additions and 5 deletions

View File

@ -1,4 +0,0 @@
Pre-requisites on Debian:
apt install ghc libghc-hunit-dev libghc-regex-compat-tdfa-dev

22
INSTALL.md Normal file
View File

@ -0,0 +1,22 @@
# Installation
GaBuZoMeu is written in [Haskell](https://www.haskell.org/) so you will need an Haskell compiler. Then just type `make all`.
To see if everything is fine, `make test`.
## Pre-requisites on Debian operating system
```
apt install ghc libghc-hunit-dev libghc-regex-compat-tdfa-dev
```
## Bug reports
On [the forge](https://forge.chapril.org/bortzmeyer/GaBuZoMeu/issues).
## Creating the database
If you want to create a database to insert the language subtag
registry (with the `registry2postgresql` or `registry2sqlite`
programs), see the creation scripts under `./SQL/`.

View File

@ -5,4 +5,20 @@ language tag registry (see RFC 5646).
## Installation
See INSTALL
See `INSTALL.md`.
## Using with the official language tag registry
```
wget https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
```
## Licence
Free software (BSD 3-clause), see `LICENSE`.
## Author
Stéphane Bortzmeyer
stephane+gabuzomeu@bortzmeyer.org

View File

@ -0,0 +1,186 @@
DROP VIEW Languages_with_descr;
DROP VIEW Languages_with_all_descr;
DROP VIEW Extlangs_with_descr;
DROP VIEW Extlangs_with_all_descr;
DROP VIEW Scripts_with_descr;
DROP VIEW Scripts_with_all_descr;
DROP VIEW Variants_with_descr;
DROP VIEW Variants_with_all_descr;
DROP VIEW Regions_with_descr;
DROP VIEW Regions_with_all_descr;
DROP TABLE Descriptions_languages;
DROP TABLE Descriptions_extlangs;
DROP TABLE Descriptions_scripts;
DROP TABLE Descriptions_regions;
DROP TABLE Descriptions_variants;
--DROP TABLE Descriptions_extensions;
DROP TABLE Descriptions;
DROP TABLE Prefixes;
DROP TABLE Languages;
DROP TABLE Extlangs;
DROP TABLE Scripts;
DROP TABLE Regions;
DROP TABLE Variants;
DROP TABLE Extensions;
DROP AGGREGATE concatenate(text);
CREATE TABLE Scripts (
id SERIAL PRIMARY KEY NOT NULL,
code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint?
added DATE,
comments TEXT);
CREATE TABLE Languages (
id SERIAL PRIMARY KEY NOT NULL,
code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint?
suppressscript TEXT REFERENCES Scripts(code),
preferredvalue TEXT, -- Yes, it should REFERENCES Languages(code), but
-- there is a chicken-and-egg problem.
added DATE,
comments TEXT);
CREATE TABLE Extlangs (
id SERIAL PRIMARY KEY NOT NULL,
code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint?
added DATE,
prefix TEXT, -- references Languages(code)? Can a prefix be longer (a language and a script, for instance)?
comments TEXT);
CREATE TABLE Regions (
id SERIAL PRIMARY KEY NOT NULL,
code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint?
added DATE,
deprecated DATE,
comments TEXT);
CREATE TABLE Variants (
id SERIAL PRIMARY KEY NOT NULL,
code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint?
-- TODO: prefixes
added DATE,
comments TEXT);
CREATE TABLE Extensions (
id SERIAL PRIMARY KEY NOT NULL,
code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint?
added DATE,
comments TEXT);
CREATE TABLE Descriptions (
id SERIAL PRIMARY KEY NOT NULL,
description TEXT NOT NULL);
CREATE TABLE Descriptions_Languages (
description INTEGER NOT NULL REFERENCES Descriptions(id),
lang TEXT NOT NULL REFERENCES Languages(code));
CREATE TABLE Descriptions_Extlangs (
description INTEGER NOT NULL REFERENCES Descriptions(id),
extlang TEXT NOT NULL REFERENCES Extlangs(code));
CREATE TABLE Descriptions_Scripts (
description INTEGER NOT NULL REFERENCES Descriptions(id),
script TEXT NOT NULL REFERENCES Scripts(code));
CREATE TABLE Descriptions_Regions (
description INTEGER NOT NULL REFERENCES Descriptions(id),
region TEXT NOT NULL REFERENCES Regions(code));
CREATE TABLE Descriptions_Variants (
description INTEGER NOT NULL REFERENCES Descriptions(id),
variant TEXT NOT NULL REFERENCES Variants(code));
CREATE TABLE Prefixes (
variant TEXT NOT NULL REFERENCES Variants(code),
prefix TEXT NOT NULL REFERENCES Languages(code)
);
CREATE OR REPLACE FUNCTION concat2(text, text) RETURNS text AS '
SELECT CASE WHEN $1 IS NULL OR $1 = '''' THEN $2
WHEN $2 IS NULL OR $2 = '''' THEN $1
ELSE $1 || '' / '' || $2
END;
'
LANGUAGE SQL;
CREATE AGGREGATE concatenate (
sfunc = concat2,
basetype = text,
stype = text,
initcond = ''
);
CREATE VIEW Languages_with_all_descr AS
SELECT l.code, l.added, l.suppressscript, d.description
FROM languages l,descriptions d,descriptions_languages WHERE
descriptions_languages.lang = l.code AND
descriptions_languages.description = d.id;
CREATE VIEW Languages_with_descr AS
SELECT l.code, l.added, l.suppressscript,
concatenate(d.description) as description FROM languages l, descriptions d,
descriptions_languages WHERE
descriptions_languages.lang = l.code AND
descriptions_languages.description = d.id
GROUP BY l.code, l.added, l.suppressscript;
CREATE VIEW Variants_with_all_descr AS
SELECT v.code, v.added, d.description
FROM variants v,descriptions d,descriptions_variants WHERE
descriptions_variants.variant = v.code AND
descriptions_variants.description = d.id;
CREATE VIEW Variants_with_descr AS
SELECT v.code, v.added,
concatenate(d.description) as description FROM
variants v, descriptions d,
descriptions_variants WHERE
descriptions_variants.variant = v.code AND
descriptions_variants.description = d.id
GROUP BY v.code, v.added;
CREATE VIEW Scripts_with_all_descr AS
SELECT s.code, s.added, d.description
FROM scripts s,descriptions d,descriptions_scripts WHERE
descriptions_scripts.script = s.code AND
descriptions_scripts.description = d.id;
CREATE VIEW Scripts_with_descr AS
SELECT s.code, s.added,
concatenate(d.description) as description FROM scripts s, descriptions d,
descriptions_scripts WHERE
descriptions_scripts.script = s.code AND
descriptions_scripts.description = d.id
GROUP BY s.code, s.added;
CREATE VIEW Regions_with_all_descr AS
SELECT r.code, r.added, d.description
FROM regions r,descriptions d,descriptions_regions WHERE
descriptions_regions.region = r.code AND
descriptions_regions.description = d.id;
CREATE VIEW Regions_with_descr AS
SELECT r.code, r.added,
concatenate(d.description) as description FROM regions r, descriptions d,
descriptions_regions WHERE
descriptions_regions.region = r.code AND
descriptions_regions.description = d.id
GROUP BY r.code, r.added;
CREATE VIEW Extlangs_with_all_descr AS
SELECT l.code, l.added, d.description
FROM extlangs l,descriptions d,descriptions_extlangs WHERE
descriptions_extlangs.extlang = l.code AND
descriptions_extlangs.description = d.id;
CREATE VIEW Extlangs_with_descr AS
SELECT l.code, l.added,
concatenate(d.description) as description FROM extlangs l, descriptions d,
descriptions_extlangs WHERE
descriptions_extlangs.extlang = l.code AND
descriptions_extlangs.description = d.id
GROUP BY l.code, l.added;
-- You can get all the records with stuff like:
-- select 'language', code, added from languages where added > '2005-10-16' union select 'script', code, added from scripts where added > '2005-10-16';

View File

@ -0,0 +1,24 @@
BEGIN;
INSERT INTO Languages (code, added) VALUES ('pl', '2007-01-24');
INSERT INTO Descriptions (description) VALUES ('Polish');
INSERT INTO Descriptions_languages (description, lang)
SELECT currval('Descriptions_id_seq'), 'pl';
INSERT INTO Languages (code, added) VALUES ('en', '2007-01-24');
INSERT INTO Descriptions (description) VALUES ('English');
INSERT INTO Descriptions_languages (description, lang)
SELECT currval('Descriptions_id_seq'), 'en';
INSERT INTO Languages (code, added) VALUES ('fr', '2007-01-24');
INSERT INTO Descriptions (description) VALUES ('French');
INSERT INTO Descriptions_languages (description, lang)
SELECT currval('Descriptions_id_seq'), 'fr';
INSERT INTO Descriptions (description) VALUES ('Francais');
INSERT INTO Descriptions_languages (description, lang)
SELECT currval('Descriptions_id_seq'), 'fr';
INSERT INTO Languages (code, added) VALUES ('es', '2007-01-24');
INSERT INTO Descriptions (description) VALUES ('Spanish');
INSERT INTO Descriptions_languages (description, lang)
SELECT currval('Descriptions_id_seq'), 'es';
COMMIT;

View File

@ -0,0 +1,85 @@
-- Process with:
-- sqlite lsr < create-db-subtag.sql
DROP TABLE Descriptions_languages;
DROP TABLE Descriptions_scripts;
DROP TABLE Descriptions_regions;
DROP TABLE Descriptions_variants;
DROP TABLE Descriptions;
DROP TABLE Prefixes;
DROP TABLE Languages;
DROP TABLE Extlangs;
DROP TABLE Scripts;
DROP TABLE Regions;
DROP TABLE Variants;
-- A few things to remember about SQLite:
--
-- 1) If you declare a column as "INTEGER PRIMARY KEY", it will
-- autoincrement (http://www.sqlite.org/faq.html#q1)
--
-- 2) Types and constraints are almost always ignored
-- (http://www.sqlite.org/datatype3.html or
-- http://www.sqlite.org/faq.html#q2)
CREATE TABLE Scripts (
id INTEGER PRIMARY KEY NOT NULL,
code TEXT UNIQUE NOT NULL,
added DATE,
comments TEXT);
CREATE TABLE Languages (
id INTEGER PRIMARY KEY NOT NULL,
code TEXT UNIQUE NOT NULL,
suppressscript TEXT REFERENCES Scripts(code),
preferredvalue TEXT, -- Yes, it should REFERENCES Languages(code), but
-- there is a chicken-and-egg problem.
added DATE,
comments TEXT);
CREATE TABLE Extlangs (
id SERIAL PRIMARY KEY NOT NULL,
code TEXT UNIQUE NOT NULL, -- TODO: a length and content constraint?
added DATE,
prefix TEXT, -- references Languages(code)? Can a prefix be longer (a language and a script, for instance)?
comments TEXT);
CREATE TABLE Regions (
id INTEGER PRIMARY KEY NOT NULL,
code TEXT UNIQUE NOT NULL,
added DATE,
deprecated DATE,
comments TEXT);
CREATE TABLE Variants (
id INTEGER PRIMARY KEY NOT NULL,
code TEXT UNIQUE NOT NULL,
added DATE,
comments TEXT);
CREATE TABLE Descriptions (
id INTEGER NOT NULL PRIMARY KEY,
description TEXT NOT NULL);
CREATE TABLE Descriptions_Languages (
description INTEGER NOT NULL REFERENCES Descriptions(id),
lang TEXT NOT NULL REFERENCES Languages(code));
CREATE TABLE Descriptions_Scripts (
description INTEGER NOT NULL REFERENCES Descriptions(id),
script TEXT NOT NULL REFERENCES Scripts(code));
CREATE TABLE Descriptions_Variants (
description INTEGER NOT NULL REFERENCES Descriptions(id),
variant TEXT NOT NULL REFERENCES Variants(code));
CREATE TABLE Descriptions_Regions (
description INTEGER NOT NULL REFERENCES Descriptions(id),
region TEXT NOT NULL REFERENCES Regions(code));
CREATE TABLE Prefixes (
variant TEXT NOT NULL REFERENCES Variants(code),
prefix TEXT NOT NULL REFERENCES Languages(code)
);

View File

@ -0,0 +1,12 @@
INSERT INTO Languages (id, code, added) VALUES (NULL, 'en', '2007-01-24');
INSERT INTO Descriptions (description) VALUES ('English');
INSERT INTO Descriptions_languages (description, lang)
SELECT max(rowid), 'en' FROM Descriptions;
INSERT INTO Languages (id, code, added) VALUES (NULL, 'fr', '2007-01-24');
INSERT INTO Descriptions (description) VALUES ('French');
INSERT INTO Descriptions_languages (description, lang)
SELECT max(rowid), 'fr' FROM Descriptions;
INSERT INTO Descriptions (description) VALUES ('Francais');
INSERT INTO Descriptions_languages (description, lang)
SELECT max(rowid), 'fr' FROM Descriptions;