forked from bortzmeyer/GaBuZoMeu
Initial import
This commit is contained in:
parent
c9247066e2
commit
2fc541c0a7
129
Grammar.hs
Normal file
129
Grammar.hs
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
{-# LANGUAGE NoMonomorphismRestriction,FlexibleContexts #-}
|
||||||
|
|
||||||
|
module Grammar (module Text.ParserCombinators.Parsec, tag, testTag,
|
||||||
|
getTag) where
|
||||||
|
|
||||||
|
import Text.ParserCombinators.Parsec
|
||||||
|
import Types
|
||||||
|
import Parsers
|
||||||
|
|
||||||
|
-- "Tags for Identifying Languages" RFC 5646
|
||||||
|
-- We use the names of the productions of the grammar in the RFC
|
||||||
|
|
||||||
|
dash = char '-' <?> "dash"
|
||||||
|
|
||||||
|
charI = char 'i' <|> char 'I'
|
||||||
|
charX = char 'x' <|> char 'X'
|
||||||
|
|
||||||
|
alphaNumNotX c =
|
||||||
|
c >= 'A' && c <= 'W' ||
|
||||||
|
c >= 'Y' && c <= 'Z' ||
|
||||||
|
c >= 'a' && c <= 'w' ||
|
||||||
|
c >= 'y' && c <= 'z' ||
|
||||||
|
c >= '0' && c <= '9'
|
||||||
|
|
||||||
|
languageTag :: GenParser Char () Tag
|
||||||
|
languageTag =do
|
||||||
|
value <- try (langtag) <|> try (privateuse) <|> irregular
|
||||||
|
return value
|
||||||
|
|
||||||
|
langtag = do
|
||||||
|
theLanguage <- language
|
||||||
|
theExtlang <- facultative (try (do {dash ; value <- extlang ; notFollowedBy alphaNum;
|
||||||
|
return value}));
|
||||||
|
theScript <- facultative (try (do {dash ; value <- script ; notFollowedBy alphaNum;
|
||||||
|
return value}));
|
||||||
|
theRegion <- facultative (try (do {dash ; value <- region ; notFollowedBy alphaNum;
|
||||||
|
return value}));
|
||||||
|
theVariants <- many (try (do {dash ; value <- variant ; notFollowedBy alphaNum;
|
||||||
|
return value}));
|
||||||
|
theExtensions <- many (try (do {dash ; value <- extension; notFollowedBy alphaNum;
|
||||||
|
return value}))
|
||||||
|
-- TODO: use a context-sensitive grammar (see Parsec
|
||||||
|
-- documentation) to enforce "2.2.6. Extension Subtags" which
|
||||||
|
-- says that an extension cannot be repeated.
|
||||||
|
optional (do {dash; privateuse})
|
||||||
|
eof
|
||||||
|
return (Tag theLanguage theExtlang theScript theRegion theVariants theExtensions)
|
||||||
|
|
||||||
|
language = do
|
||||||
|
value <- do
|
||||||
|
try (do {lang <- countBetween 2 3 letter; notFollowedBy alphaNum; return lang})
|
||||||
|
-- Shortest ISO 639 code
|
||||||
|
-- TODO: returns the extended, too!
|
||||||
|
<|>
|
||||||
|
try (do {lang <- count 4 letter; notFollowedBy alphaNum; return lang}) -- reserved for future use.
|
||||||
|
<|> (countBetween 5 8 letter) -- registered language subtag. TODO: return the value!
|
||||||
|
return value
|
||||||
|
|
||||||
|
-- RFC, section 2.2.2: "Although the ABNF production 'extlang' permits
|
||||||
|
-- up to three extended language tags"
|
||||||
|
extlang = do
|
||||||
|
value <- count 3 letter
|
||||||
|
return value
|
||||||
|
|
||||||
|
script = do
|
||||||
|
value <- count 4 letter -- ISO 15924 code
|
||||||
|
return value
|
||||||
|
|
||||||
|
region = do
|
||||||
|
count 2 letter -- ISO 3166 code
|
||||||
|
<|> count 3 digit -- United Nations M.49 code
|
||||||
|
|
||||||
|
variant = do
|
||||||
|
do {first <- digit ; rest <- countBetween 3 7 alphaNum; return (first:rest)}
|
||||||
|
<|> do {first <- letter ; rest <- countBetween 4 7 alphaNum; return (first:rest)}
|
||||||
|
|
||||||
|
extension = do
|
||||||
|
theSingleton <- singleton
|
||||||
|
theValues <- many1 (try (do {dash ; extensionName}))
|
||||||
|
return (theSingleton, theValues)
|
||||||
|
|
||||||
|
extensionName = do
|
||||||
|
countBetween 2 8 alphaNum
|
||||||
|
|
||||||
|
singleton = satisfy (alphaNumNotX)
|
||||||
|
|
||||||
|
privateuse = do
|
||||||
|
charX
|
||||||
|
values <- many1 (do {dash; countBetween 1 8 alphaNum})
|
||||||
|
eof
|
||||||
|
return (Priv values)
|
||||||
|
|
||||||
|
-- Irregular tags only
|
||||||
|
registeredIrregular = try (istring "en-GB-oed") <|> try (istring "i-ami") <|> try (istring "i-bnn") <|> try (istring "i-default")
|
||||||
|
<|> try (istring "i-enochian") <|> try (istring "i-hak") <|> try (istring "i-klingon") <|> try (istring "i-lux")
|
||||||
|
<|> try (istring "i-mingo") <|> try (istring "i-navajo") <|> try (istring "i-pwn") <|> try (istring "i-tao")
|
||||||
|
<|> try (istring "i-tay") <|> try (istring "i-tsu") <|> try (istring "sgn-BE-fr") <|>try ( istring "sgn-BE-nl")
|
||||||
|
<|> try (istring "sgn-CH-de") <|> try (istring "no-bok")
|
||||||
|
<|> try (istring"no-nyn")
|
||||||
|
<|> try (istring"zh-cmn")
|
||||||
|
<|> try (istring"zh-cmn-Hans")
|
||||||
|
<|> try (istring"zh-cmn-Hant")
|
||||||
|
<|> try (istring"zh-gan")
|
||||||
|
<|> try (istring"zh-min")
|
||||||
|
<|> try (istring"zh-min-nan")
|
||||||
|
<|> try (istring"zh-wuu")
|
||||||
|
<|> try (istring"zh-yue")
|
||||||
|
|
||||||
|
irregular = do
|
||||||
|
value <- registeredIrregular
|
||||||
|
return (GF value)
|
||||||
|
|
||||||
|
tag = do
|
||||||
|
value <- languageTag
|
||||||
|
return value
|
||||||
|
|
||||||
|
-- Utilities
|
||||||
|
testTag :: String -> Bool
|
||||||
|
testTag input
|
||||||
|
= case (parse languageTag "" input) of
|
||||||
|
Left err -> False
|
||||||
|
Right x -> True
|
||||||
|
|
||||||
|
getTag :: String -> Either String Tag
|
||||||
|
getTag input =
|
||||||
|
case (parse tag "" input) of
|
||||||
|
Left err -> Left (input ++ " is NOT well-formed: " ++ (show err) ++
|
||||||
|
"\n")
|
||||||
|
Right theTag -> Right theTag
|
4
INSTALL
Normal file
4
INSTALL
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
Pre-requisites on Debian:
|
||||||
|
|
||||||
|
apt install ghc libghc-hunit-dev libghc-regex-compat-tdfa-dev
|
||||||
|
|
2
LICENSE
2
LICENSE
@ -1,4 +1,4 @@
|
|||||||
Copyright (c) <year> <owner>.
|
Copyright (c) 2006. Stéphane Bortzmeyer
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
38
Makefile
Normal file
38
Makefile
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
GHC=ghc
|
||||||
|
MODULES=Grammar.hs Types.hs
|
||||||
|
REGISTRY_MODULES=Registry/Grammar.hs Registry/Types.hs Registry/Registry.hs Registry/Utils.hs
|
||||||
|
TESTS=broken-tags.txt well-formed-tags.txt
|
||||||
|
ALL_MODULES=${MODULES}
|
||||||
|
REGISTRY=http://www.iana.org/assignments/language-subtag-registry
|
||||||
|
REGISTRYFILE=language-subtag-registry
|
||||||
|
ALL_PROGRAMS=check-wf check-valid tests-from-files display-tag registry2xml registry2txt registry2postgresql check-registry
|
||||||
|
TARBALL=/tmp/gabuzomeu.tar
|
||||||
|
|
||||||
|
default: check-wf check-valid check-registry
|
||||||
|
|
||||||
|
all: ${ALL_PROGRAMS}
|
||||||
|
|
||||||
|
%: %.hs ${ALL_MODULES}
|
||||||
|
${GHC} -o $@ --make $<
|
||||||
|
|
||||||
|
%: Registry/%.hs ${MODULES}
|
||||||
|
${GHC} -o $@ --make $<
|
||||||
|
|
||||||
|
test: test-tags test-regs
|
||||||
|
|
||||||
|
test-tags: tests-from-files ${ALL_MODULES} ${TESTS}
|
||||||
|
./$<
|
||||||
|
|
||||||
|
test-regs: test-registries ${ALL_MODULES} ${REGISTRY_MODULES} ${TESTS}
|
||||||
|
./$<
|
||||||
|
|
||||||
|
${REGISTRYFILE}: check-registry
|
||||||
|
wget -O $@ ${REGISTRY}
|
||||||
|
./check-registry ${REGISTRYFILE}
|
||||||
|
touch ${REGISTRYFILE}
|
||||||
|
|
||||||
|
dist: clean
|
||||||
|
(cd ..; tar cvf ${TARBALL} --exclude hnop --exclude ltr*.txt GaBuZoMeu && gzip --best --force --verbose ${TARBALL})
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f list-grandfathered-tags ${REGISTRYFILE} ${ALL_PROGRAMS} \#*\# *~ *.o *.hi *~ Registry/*.o Registry/*.hi Registry/*~
|
32
Parsers.hs
Normal file
32
Parsers.hs
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
module Parsers where
|
||||||
|
|
||||||
|
import Text.ParserCombinators.Parsec
|
||||||
|
import Data.Maybe (catMaybes)
|
||||||
|
import Data.Char (toUpper)
|
||||||
|
import Control.Monad (liftM)
|
||||||
|
|
||||||
|
-- Parsing functions not in Text.ParserCombinators.Parsec
|
||||||
|
|
||||||
|
-- There is also a good ine in hsemail, manyNtoM
|
||||||
|
countBetween m n p
|
||||||
|
| n < m = error "First bound must be lower or equal than second bound"
|
||||||
|
| otherwise = do
|
||||||
|
xs <- try (count m p)
|
||||||
|
ys <- try (count (n - m) ((option Nothing) (do
|
||||||
|
y <- p
|
||||||
|
return (Just y))))
|
||||||
|
return (xs ++ catMaybes ys)
|
||||||
|
|
||||||
|
facultative p = liftM Just p <|> return Nothing
|
||||||
|
|
||||||
|
-- Case-insensitive parsers stloen from hsemail
|
||||||
|
-- Case-insensitive variant of Parsec's 'char' function.
|
||||||
|
ichar :: Char -> CharParser st Char
|
||||||
|
ichar c = satisfy (\x -> toUpper x == toUpper c)
|
||||||
|
-- Case-insensitive variant of Parsec's 'string' function.
|
||||||
|
istring :: String -> CharParser st String
|
||||||
|
istring cs = mapM ichar cs <?> cs
|
||||||
|
|
||||||
|
--
|
||||||
|
-- End of parsing functions
|
||||||
|
|
@ -1,3 +1,8 @@
|
|||||||
# GaBuZoMeu
|
# GaBuZoMeu
|
||||||
|
|
||||||
GaBuZoMeu is a set of programs to parse and check language tags and language tag registry (see RFC 5646).
|
GaBuZoMeu is a set of programs to parse and check language tags and
|
||||||
|
language tag registry (see RFC 5646).
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
See INSTALL
|
||||||
|
164
Registry/Grammar.hs
Normal file
164
Registry/Grammar.hs
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
module Registry.Grammar where
|
||||||
|
|
||||||
|
import Text.ParserCombinators.Parsec as Parsec hiding (space, spaces, newline)
|
||||||
|
import Parsers
|
||||||
|
import Registry.Types
|
||||||
|
import Data.Maybe
|
||||||
|
import Data.Char(ord, chr)
|
||||||
|
import Numeric
|
||||||
|
import Prelude hiding (lookup)
|
||||||
|
|
||||||
|
-- Too low-level to be in Registry.Types
|
||||||
|
|
||||||
|
data Field = Field {field'name::String, field'value::String} deriving Show
|
||||||
|
|
||||||
|
colon = char ':' <?> "colon"
|
||||||
|
dash = char '-' <?> "dash"
|
||||||
|
space = char ' ' <?> "space"
|
||||||
|
newline = many1 (try (string "\n\r") <|> string "\n" <|> string "\r") <?> "new line"
|
||||||
|
spaces = many space
|
||||||
|
|
||||||
|
doublepercent = do {string "%%"; newline} <?> "double percent line"
|
||||||
|
|
||||||
|
extract :: Field -> (String, String)
|
||||||
|
extract f = (field'name f, field'value f)
|
||||||
|
|
||||||
|
-- TODO: make it case-insensitive
|
||||||
|
member :: String -> [(String, String)] -> Bool
|
||||||
|
member key [] = False
|
||||||
|
member key dict = let head = dict !! 0 in
|
||||||
|
if key == fst head then
|
||||||
|
True
|
||||||
|
else
|
||||||
|
member key (tail dict)
|
||||||
|
|
||||||
|
-- TODO: make it case-insensitive
|
||||||
|
lookup :: String -> [(String, String)] -> Maybe String
|
||||||
|
lookup key [] = Nothing
|
||||||
|
lookup key dict = let head = dict !! 0 in
|
||||||
|
if key == fst head then
|
||||||
|
Just (snd head)
|
||||||
|
else
|
||||||
|
lookup key (tail dict)
|
||||||
|
|
||||||
|
lookups :: String -> [(String, String)] -> [String]
|
||||||
|
lookups key [] = []
|
||||||
|
lookups key dict = let head = dict !! 0 in
|
||||||
|
if key == fst head then
|
||||||
|
(snd head) : lookups key (tail dict)
|
||||||
|
else
|
||||||
|
lookups key (tail dict)
|
||||||
|
|
||||||
|
suppressScript r = lookup "Suppress-Script" r
|
||||||
|
|
||||||
|
recordFactory :: [(String, String)] -> Either String Record
|
||||||
|
recordFactory r =
|
||||||
|
if member "Type" r then
|
||||||
|
let thetype = fromJust (lookup "Type" r) in
|
||||||
|
-- TODO: test that the field exist. fromJust will raise an exception but we can
|
||||||
|
-- provide a better error message if we test
|
||||||
|
case thetype of
|
||||||
|
"language" -> Right (Lang (Language
|
||||||
|
(fromJust (lookup "Subtag" r))
|
||||||
|
(lookups "Description" r)
|
||||||
|
(fromJust (lookup "Added" r))
|
||||||
|
(suppressScript r)
|
||||||
|
(lookup "Preferred-Value" r)
|
||||||
|
(lookup "Macrolanguage" r)
|
||||||
|
(lookup "Deprecated" r)
|
||||||
|
(lookup "Scope" r)
|
||||||
|
))
|
||||||
|
"extlang" -> Right (Extl (Extlang
|
||||||
|
(fromJust (lookup "Subtag" r))
|
||||||
|
(lookups "Description" r)
|
||||||
|
(fromJust (lookup "Added" r))
|
||||||
|
(suppressScript r)
|
||||||
|
(fromJust (lookup "Prefix" r))
|
||||||
|
(lookup "Preferred-Value" r)
|
||||||
|
(lookup "Macrolanguage" r)
|
||||||
|
(lookup "Scope" r)
|
||||||
|
))
|
||||||
|
"script" -> Right (Scr (Script
|
||||||
|
(fromJust (lookup "Subtag" r))
|
||||||
|
(lookups "Description" r)
|
||||||
|
(fromJust (lookup "Added" r))))
|
||||||
|
"region" -> Right (Reg (Region
|
||||||
|
(fromJust (lookup "Subtag" r))
|
||||||
|
(lookups "Description" r)
|
||||||
|
(fromJust (lookup "Added" r))))
|
||||||
|
"variant" -> Right (Var (Variant
|
||||||
|
(fromJust (lookup "Subtag" r))
|
||||||
|
(lookups "Description" r)
|
||||||
|
(fromJust (lookup "Added" r))
|
||||||
|
(lookups "Prefix" r)))
|
||||||
|
"redundant" -> Right (Red (Redundant
|
||||||
|
(fromJust (lookup "Tag" r))
|
||||||
|
(lookups "Description" r)
|
||||||
|
(fromJust (lookup "Added" r))))
|
||||||
|
"grandfathered" -> Right (GF (Grandfathered
|
||||||
|
(fromJust (lookup "Tag" r))
|
||||||
|
(lookups "Description" r)
|
||||||
|
(fromJust (lookup "Added" r))))
|
||||||
|
_ -> Left ("Unknown type \"" ++ thetype ++ "\"")
|
||||||
|
else
|
||||||
|
if member "File-Date" r then
|
||||||
|
Right (Date (fromJust (lookup "File-Date" r)))
|
||||||
|
else
|
||||||
|
Left "Invalid record: no Type or File-Date"
|
||||||
|
|
||||||
|
record = do
|
||||||
|
fields <- many1 field <?> "record"
|
||||||
|
let therecord = map extract fields
|
||||||
|
return (recordFactory therecord)
|
||||||
|
|
||||||
|
fieldname = do
|
||||||
|
start <- alphaNum
|
||||||
|
rest <- many (alphaNum <|> dash)
|
||||||
|
return ([start] ++ rest)
|
||||||
|
|
||||||
|
fieldvalue = many (unicodechar <|> space <|> try (do {newline; space})) <?> "field value"
|
||||||
|
|
||||||
|
unicodechar = satisfy (\thechar ->
|
||||||
|
let c = (ord thechar) in
|
||||||
|
(c >= 0x21 && c <= 0x10ffff))
|
||||||
|
<?> "Character"
|
||||||
|
|
||||||
|
field = do
|
||||||
|
name <- fieldname
|
||||||
|
spaces
|
||||||
|
colon
|
||||||
|
spaces
|
||||||
|
value <- fieldvalue
|
||||||
|
newline
|
||||||
|
return (Field name value)
|
||||||
|
<?> "field"
|
||||||
|
|
||||||
|
registry = do
|
||||||
|
allrecords <- record `sepBy` doublepercent
|
||||||
|
eof
|
||||||
|
return allrecords
|
||||||
|
|
||||||
|
isRight (Right a) = True
|
||||||
|
isRight (Left a) = False
|
||||||
|
|
||||||
|
fromRight (Right a) = a
|
||||||
|
fromRight (Left a) = error "Not a right value"
|
||||||
|
|
||||||
|
isLeft (Left a) = True
|
||||||
|
isLeft (Right a) = False
|
||||||
|
|
||||||
|
fromLeft (Left a) = a
|
||||||
|
fromLeft (Right a) = error "Not a left value"
|
||||||
|
|
||||||
|
parse :: String -> CheckResult
|
||||||
|
parse input =
|
||||||
|
case (Parsec.parse Registry.Grammar.registry "" input) of
|
||||||
|
Left err -> SyntaxError ("Registry is not legal: " ++ (show err))
|
||||||
|
Right reg -> let ok = and (map isRight reg) in
|
||||||
|
if ok then
|
||||||
|
Success (map fromRight reg)
|
||||||
|
else
|
||||||
|
let firstErr = (filter isLeft reg) !! 0 in
|
||||||
|
SyntaxError ("Registry is not legal: " ++
|
||||||
|
fromLeft firstErr)
|
||||||
|
-- TODO: the line number
|
229
Registry/Registry.hs
Normal file
229
Registry/Registry.hs
Normal file
@ -0,0 +1,229 @@
|
|||||||
|
module Registry.Registry where
|
||||||
|
|
||||||
|
import qualified Text.ParserCombinators.Parsec as Parsec
|
||||||
|
import qualified System.IO
|
||||||
|
import Data.Maybe
|
||||||
|
import Registry.Types
|
||||||
|
import Registry.Utils
|
||||||
|
import Registry.Grammar (registry, member, parse)
|
||||||
|
import Types
|
||||||
|
import Grammar
|
||||||
|
|
||||||
|
defaultInfile = "./language-subtag-registry"
|
||||||
|
|
||||||
|
type Result = (Bool, String)
|
||||||
|
|
||||||
|
-- Combines a former result with a new result, yielding a result.
|
||||||
|
combine :: Result -> Result -> Result
|
||||||
|
combine (True, _) (False, s) = (False, s)
|
||||||
|
combine (False, s1) (False, s2) = (False, s1 ++ "; " ++ s2)
|
||||||
|
combine (False, s) (True, _) = (False, s)
|
||||||
|
combine (True, _) (True, _) = (True, "")
|
||||||
|
|
||||||
|
readRegistry filename = do
|
||||||
|
input <- System.IO.readFile filename
|
||||||
|
return (getRegistry input)
|
||||||
|
|
||||||
|
getRegistry input =
|
||||||
|
let reg = Registry.Grammar.parse input in
|
||||||
|
if not (checkOK reg) then
|
||||||
|
error "Illegal registry"
|
||||||
|
else
|
||||||
|
registryOf reg
|
||||||
|
|
||||||
|
isValidLanguage l dict =
|
||||||
|
if length (filter (onlyThisValue "language" l) dict) == 1 then
|
||||||
|
(True, "")
|
||||||
|
else
|
||||||
|
(False, "Unknown language " ++ l)
|
||||||
|
|
||||||
|
isValidScript Nothing _ = (True, "")
|
||||||
|
isValidScript s dict =
|
||||||
|
if length (filter (onlyThisValue "script" (fromJust s)) dict) == 1 then
|
||||||
|
(True, "")
|
||||||
|
else
|
||||||
|
(False, "Unknown script " ++ (fromJust s))
|
||||||
|
|
||||||
|
isValidRegion Nothing _ = (True, "")
|
||||||
|
isValidRegion r dict =
|
||||||
|
if length (filter (onlyThisValue "region" (fromJust r)) dict) == 1 then
|
||||||
|
(True, "")
|
||||||
|
else
|
||||||
|
(False, "Unknown region " ++ (fromJust r))
|
||||||
|
|
||||||
|
isValidVariants [] _ = (True, "")
|
||||||
|
isValidVariants v dict = foldl combine (True, "") (map (isValidVariant dict) v)
|
||||||
|
|
||||||
|
isValidVariant dict v =
|
||||||
|
if length (filter (onlyThisValue "variant" v) dict) == 1 then
|
||||||
|
(True, "")
|
||||||
|
else
|
||||||
|
(False, "Unknown variant " ++ v)
|
||||||
|
|
||||||
|
isValidExtension _ _ = (True, "")
|
||||||
|
-- TODO: a better test
|
||||||
|
|
||||||
|
-- Semantic testing
|
||||||
|
|
||||||
|
-- Only one extension by the same singleton
|
||||||
|
correctNumber :: [Singleton] -> Singleton -> Bool
|
||||||
|
correctNumber l s = length (filter (\c -> (c == s)) l) == 1
|
||||||
|
|
||||||
|
getSingleton :: Types.Extension -> Singleton
|
||||||
|
getSingleton (singleton, names) = singleton
|
||||||
|
|
||||||
|
norepeat :: Extensions -> Bool
|
||||||
|
norepeat extensions =
|
||||||
|
let singletons = map getSingleton extensions in
|
||||||
|
and (map (correctNumber singletons) singletons)
|
||||||
|
|
||||||
|
semanticTests (Tag lang extlang script region variants extensions) =
|
||||||
|
if norepeat extensions then
|
||||||
|
(True, "")
|
||||||
|
else
|
||||||
|
(False, "Repeated extension singleton")
|
||||||
|
semanticTests _ = (True, "") -- No tests for privateuse and grandfathered
|
||||||
|
|
||||||
|
isValid :: Registry -> Tag -> Result
|
||||||
|
isValid reg (Tag l el s r v e) =
|
||||||
|
let languages = filter (onlyThisType "language") reg in
|
||||||
|
let scripts = filter (onlyThisType "script") reg in
|
||||||
|
let regions = filter (onlyThisType "region") reg in
|
||||||
|
let variants = filter (onlyThisType "variant") reg in
|
||||||
|
let extensions = filter (onlyThisType "extension") reg in
|
||||||
|
semanticTests (Tag l el s r v e) `combine`
|
||||||
|
(isValidLanguage l languages) `combine` (isValidScript s scripts) `combine`
|
||||||
|
(isValidRegion r regions) `combine` (isValidVariants v variants) `combine`
|
||||||
|
(isValidExtension e extensions)
|
||||||
|
|
||||||
|
isValid reg (Types.GF tag) =
|
||||||
|
(True, "")
|
||||||
|
-- It is passed well-formedness (syntactic) tests, it is good
|
||||||
|
|
||||||
|
isValid reg (Priv p) =
|
||||||
|
(True, "") -- By definition, private tags are always valid
|
||||||
|
|
||||||
|
testTag :: Registry -> String -> Bool
|
||||||
|
testTag reg tag =
|
||||||
|
case (Parsec.parse Grammar.tag "" tag) of
|
||||||
|
Left err -> False
|
||||||
|
Right t -> if fst (semanticTests t) then
|
||||||
|
fst (isValid reg t)
|
||||||
|
else
|
||||||
|
False
|
||||||
|
|
||||||
|
checkOK (Success p) = True
|
||||||
|
checkOK _ = False
|
||||||
|
|
||||||
|
messageOf (Success p) = error "Check was a success, no message available"
|
||||||
|
messageOf (SyntaxError s) = s
|
||||||
|
messageOf (SemanticError s) = s
|
||||||
|
|
||||||
|
registryOf (Success p) = p
|
||||||
|
registryOf _ = error "Check was a failure, no registry available"
|
||||||
|
|
||||||
|
semanticCheck :: Registry -> CheckResult
|
||||||
|
semanticCheck r =
|
||||||
|
-- TODO: test that the extlangs exist
|
||||||
|
oneDateTest r `combineC` prefixesExist r `combineC`
|
||||||
|
scriptsExist r `combineC` noDuplicates r `combineC` preferredValuesExist r
|
||||||
|
`combineC` macrolanguagesExist r
|
||||||
|
|
||||||
|
oneDateTest reg =
|
||||||
|
if (length (filter onlyDate reg) == 1) then
|
||||||
|
Success reg
|
||||||
|
else
|
||||||
|
SemanticError "One and only one Field-Date record authorized"
|
||||||
|
|
||||||
|
macrolanguageExist reg language =
|
||||||
|
let macrolang = lang'macroLanguage (languageFrom language) in
|
||||||
|
if isJust macrolang then
|
||||||
|
length (filter (onlyThisValue "language" (fromJust macrolang))
|
||||||
|
(filter onlyLang reg)) > 0
|
||||||
|
else
|
||||||
|
True
|
||||||
|
|
||||||
|
macrolanguagesExist reg =
|
||||||
|
let languages = filter onlyLang reg in
|
||||||
|
let missing = filter (not.macrolanguageExist reg) languages in
|
||||||
|
if length missing == 0 then
|
||||||
|
Success reg
|
||||||
|
else
|
||||||
|
SemanticError ("Some languages have a Macrolanguage which does not appear in the registry: " ++
|
||||||
|
(show missing))
|
||||||
|
|
||||||
|
-- Extracts the langage subtag, the first one
|
||||||
|
languageOf tag =
|
||||||
|
takeWhile (/= '-') tag
|
||||||
|
|
||||||
|
prefixExist reg variant =
|
||||||
|
-- TODO: We test only that the language part of the prefix is OK, we should test
|
||||||
|
-- the rest
|
||||||
|
and (map (\r -> length (filter (onlyThisValue "language" (languageOf r))
|
||||||
|
(filter onlyLang reg)) > 0)
|
||||||
|
(variant'prefix (variantFrom variant)))
|
||||||
|
|
||||||
|
prefixesExist reg =
|
||||||
|
let variants = filter onlyVariant reg in
|
||||||
|
let missing = filter (not.prefixExist reg) variants in
|
||||||
|
if length missing == 0 then
|
||||||
|
Success reg
|
||||||
|
else
|
||||||
|
SemanticError ("Some variants have a Prefix which does not appear in the registry: " ++
|
||||||
|
(show missing))
|
||||||
|
|
||||||
|
scriptExist reg language =
|
||||||
|
let script = lang'script (languageFrom language) in
|
||||||
|
if isJust script then
|
||||||
|
length (filter (onlyThisValue "script" (fromJust script))
|
||||||
|
(filter onlyScript reg)) > 0
|
||||||
|
else
|
||||||
|
True
|
||||||
|
|
||||||
|
scriptsExist reg =
|
||||||
|
let languages = filter onlyLang reg in
|
||||||
|
let missing = filter (not.scriptExist reg) languages in
|
||||||
|
if length missing == 0 then
|
||||||
|
Success reg
|
||||||
|
else
|
||||||
|
SemanticError ("Some languages have a Suppress-script which does not appear in the registry: " ++
|
||||||
|
(show missing))
|
||||||
|
|
||||||
|
preferredLangValueExist reg language =
|
||||||
|
let preferredValue = lang'preferredValue (languageFrom language) in
|
||||||
|
if isJust preferredValue then
|
||||||
|
length (filter (onlyThisValue "language" (fromJust preferredValue))
|
||||||
|
(filter onlyLang reg)) > 0
|
||||||
|
else
|
||||||
|
True
|
||||||
|
|
||||||
|
preferredValuesExist reg =
|
||||||
|
-- TODO: test also regions and scripts
|
||||||
|
let languages = filter onlyLang reg in
|
||||||
|
let missing = filter (not.preferredLangValueExist reg) languages in
|
||||||
|
if length missing == 0 then
|
||||||
|
Success reg
|
||||||
|
else
|
||||||
|
SemanticError ("Some records have a Preferred-value which does not appear in the registry: " ++
|
||||||
|
(show missing))
|
||||||
|
|
||||||
|
otherRecord reg category val =
|
||||||
|
tail (filter (onlyThisValue category val) (filter (onlyThisType category) reg))
|
||||||
|
|
||||||
|
duplicate reg category =
|
||||||
|
case category of
|
||||||
|
"language" ->
|
||||||
|
filter (\t -> length t > 0) (map (otherRecord reg category)
|
||||||
|
(map (lang'subtag.languageFrom)
|
||||||
|
(filter (onlyThisType category) reg)))
|
||||||
|
_ -> [] -- TODO: implements them, too
|
||||||
|
|
||||||
|
noDuplicates reg =
|
||||||
|
let duplicates = concat (map (duplicate reg)
|
||||||
|
["language", "script", "variant", "region", "grandfathered"]) in
|
||||||
|
if length duplicates == 0 then
|
||||||
|
Success reg
|
||||||
|
else
|
||||||
|
SemanticError ("Duplicate values: " ++ (show duplicates))
|
||||||
|
|
||||||
|
-- TODO: tests Preferred-value
|
59
Registry/Types.hs
Normal file
59
Registry/Types.hs
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
module Registry.Types where
|
||||||
|
|
||||||
|
-- TODO: import Time and use CalendarTime
|
||||||
|
type DateTime = String
|
||||||
|
|
||||||
|
-- TODO for all: Comments
|
||||||
|
|
||||||
|
data Language = Language {lang'subtag::String, lang'descr::[String],
|
||||||
|
lang'added::DateTime, lang'script::Maybe String,
|
||||||
|
lang'preferredValue::Maybe String,
|
||||||
|
lang'macroLanguage::Maybe String,
|
||||||
|
lang'deprecated::Maybe DateTime,
|
||||||
|
lang'scope::Maybe String}
|
||||||
|
deriving Show
|
||||||
|
|
||||||
|
data Extlang = Extlang {extlang'subtag::String, extlang'descr::[String],
|
||||||
|
extlang'added::DateTime, extlang'script::Maybe String,
|
||||||
|
extlang'prefix::String, -- An extlang can have only one prefix
|
||||||
|
extlang'preferredValue::Maybe String,
|
||||||
|
extlang'macroLanguage::Maybe String,
|
||||||
|
extlang'scope::Maybe String}
|
||||||
|
deriving Show
|
||||||
|
|
||||||
|
data Script = Script {script'subtag::String, script'descr::[String],
|
||||||
|
script'added::DateTime} deriving Show
|
||||||
|
|
||||||
|
data Region = Region {region'subtag::String, region'descr::[String],
|
||||||
|
region'added::DateTime} deriving Show
|
||||||
|
-- TODO: Deprecated and preferredValue
|
||||||
|
|
||||||
|
data Variant = Variant {variant'subtag::String, variant'descr::[String],
|
||||||
|
variant'added::DateTime, variant'prefix::[String]} deriving Show
|
||||||
|
|
||||||
|
data Redundant = Redundant {redundant'tag::String, redundant'descr::[String],
|
||||||
|
redundant'added::DateTime} deriving Show
|
||||||
|
|
||||||
|
data Grandfathered = Grandfathered {gf'tag::String, gf'descr::[String],
|
||||||
|
gf'added::DateTime} deriving Show
|
||||||
|
|
||||||
|
-- TODO: implements them
|
||||||
|
data Extension = Extension {ext'added::DateTime} deriving Show
|
||||||
|
|
||||||
|
data Record = Reg Region | Lang Language | Extl Extlang | Scr Script | Var Variant |
|
||||||
|
Red Redundant | GF Grandfathered | Ext Extension | Date DateTime deriving Show
|
||||||
|
|
||||||
|
type Registry = [Record]
|
||||||
|
|
||||||
|
data CheckResult = Success Registry | SyntaxError String | SemanticError String
|
||||||
|
deriving Show
|
||||||
|
|
||||||
|
combineC (Success r1) (Success r2) = Success r1
|
||||||
|
combineC (Success r) (SyntaxError e) = SyntaxError e
|
||||||
|
combineC (SyntaxError e) (Success r) = SyntaxError e
|
||||||
|
combineC (Success r) (SemanticError e) = SemanticError e
|
||||||
|
combineC (SemanticError e) (Success r) = SemanticError e
|
||||||
|
combineC (SyntaxError e1) (SemanticError e2) = SyntaxError e1
|
||||||
|
combineC (SemanticError e2) (SyntaxError e1) = SyntaxError e1
|
||||||
|
combineC (SyntaxError e1) (SyntaxError e2) = SyntaxError (e1 ++ " ; " ++ e2)
|
||||||
|
combineC (SemanticError e1) (SemanticError e2) = SemanticError (e1 ++ " ; " ++ e2)
|
216
Registry/Utils.hs
Normal file
216
Registry/Utils.hs
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
module Registry.Utils where
|
||||||
|
|
||||||
|
import Registry.Grammar
|
||||||
|
import Registry.Types
|
||||||
|
|
||||||
|
import qualified Data.List as List
|
||||||
|
import Data.Char (toUpper)
|
||||||
|
|
||||||
|
toUpperS :: String -> String
|
||||||
|
toUpperS = map toUpper
|
||||||
|
|
||||||
|
onlyDate (Date d) = True
|
||||||
|
onlyDate _ = False
|
||||||
|
|
||||||
|
onlyRecord (Date d) = False
|
||||||
|
onlyRecord _ = True
|
||||||
|
|
||||||
|
onlyGF (GF r) = True
|
||||||
|
onlyGF _ = False
|
||||||
|
|
||||||
|
onlyLang (Lang r) = True
|
||||||
|
onlyLang _ = False
|
||||||
|
|
||||||
|
onlyExtlang (Extl r) = True
|
||||||
|
onlyExtlang _ = False
|
||||||
|
|
||||||
|
onlyScript (Scr r) = True
|
||||||
|
onlyScript _ = False
|
||||||
|
|
||||||
|
onlyRegion (Reg r) = True
|
||||||
|
onlyRegion _ = False
|
||||||
|
|
||||||
|
onlyVariant (Var r) = True
|
||||||
|
onlyVariant _ = False
|
||||||
|
|
||||||
|
onlyRedundant (Red r) = True
|
||||||
|
onlyRedundant _ = False
|
||||||
|
|
||||||
|
onlyExtension (Red r) = True
|
||||||
|
onlyExtension _ = False
|
||||||
|
|
||||||
|
languageFrom (Lang r) = r
|
||||||
|
languageFrom x = error "Not a language"
|
||||||
|
|
||||||
|
extlangFrom (Extl r) = r
|
||||||
|
extlangFrom x = error "Not an extlang (extended language)"
|
||||||
|
|
||||||
|
scriptFrom (Scr r) = r
|
||||||
|
scriptFrom _ = error "Not a script"
|
||||||
|
|
||||||
|
regionFrom (Reg r) = r
|
||||||
|
regionFrom _ = error "Not a region"
|
||||||
|
|
||||||
|
variantFrom (Var v) = v
|
||||||
|
variantFrom _ = error "Not a variant"
|
||||||
|
|
||||||
|
redundantFrom (Red r) = r
|
||||||
|
redundantFrom _ = error "Not a redundant"
|
||||||
|
|
||||||
|
gfFrom (GF r) = r
|
||||||
|
gfFrom _ = error "Not a grandfathered"
|
||||||
|
|
||||||
|
onlyThisType :: String -> Record -> Bool
|
||||||
|
onlyThisType "language" = onlyLang
|
||||||
|
onlyThisType "extlang" = onlyExtlang
|
||||||
|
onlyThisType "grandfathered" = onlyGF
|
||||||
|
onlyThisType "script" = onlyScript
|
||||||
|
onlyThisType "region" = onlyRegion
|
||||||
|
onlyThisType "variant" = onlyVariant
|
||||||
|
onlyThisType "redundant" = onlyRedundant
|
||||||
|
onlyThisType "extension" = onlyExtension
|
||||||
|
onlyThisType t = error ("Unknown type in the registry: " ++ t)
|
||||||
|
|
||||||
|
onlyThisValue :: String -> String -> Record -> Bool
|
||||||
|
-- Remember that language tags are case-insensitive
|
||||||
|
onlyThisValue "language" v = (\r -> toUpperS (lang'subtag r) == toUpperS (v)).languageFrom
|
||||||
|
onlyThisValue "extlang" v = (\r -> toUpperS (extlang'subtag r) == toUpperS (v)).extlangFrom
|
||||||
|
onlyThisValue "grandfathered" v = (\r -> toUpperS (gf'tag r) == toUpperS (v)).gfFrom
|
||||||
|
onlyThisValue "script" v = (\r -> toUpperS (script'subtag r) == toUpperS (v)).scriptFrom
|
||||||
|
onlyThisValue "region" v = (\r -> toUpperS (region'subtag r) == toUpperS (v)).regionFrom
|
||||||
|
onlyThisValue "variant" v = (\r -> toUpperS (variant'subtag r) == toUpperS (v)).variantFrom
|
||||||
|
onlyThisValue "redundant" v = (\r -> toUpperS (redundant'tag r) == toUpperS (v)).redundantFrom
|
||||||
|
-- TODO: extensions
|
||||||
|
onlyThisValue t _ = error ("Unknown type of this value in the registry: " ++ t)
|
||||||
|
|
||||||
|
toSubtag (Lang l) = (lang'subtag l)
|
||||||
|
toSubtag (Scr s) = (script'subtag s)
|
||||||
|
toSubtag (GF t) = (gf'tag t)
|
||||||
|
toSubtag (Reg r) = (region'subtag r)
|
||||||
|
toSubtag (Var v) = (variant'subtag v)
|
||||||
|
toSubtag (Red r) = (redundant'tag r)
|
||||||
|
|
||||||
|
maybeToString Nothing = ""
|
||||||
|
maybeToString (Just s) = s
|
||||||
|
dateToString d = d
|
||||||
|
descrToString a = concat (List.intersperse " / " a)
|
||||||
|
prefixesToString a = concat (List.intersperse " / " a)
|
||||||
|
toString (Lang l) = (lang'subtag l) ++ "\t" ++ dateToString (lang'added l) ++ "\t" ++
|
||||||
|
descrToString (lang'descr l) ++ "\t" ++
|
||||||
|
(maybeToString (lang'script l)) ++ "\n"
|
||||||
|
toString (GF t) = (gf'tag t) ++ "\n"
|
||||||
|
toString (Scr s) = (script'subtag s) ++ "\t" ++ dateToString (script'added s) ++ "\t" ++
|
||||||
|
descrToString (script'descr s) ++ "\n"
|
||||||
|
toString (Reg r) = (region'subtag r) ++ "\t" ++ dateToString (region'added r) ++ "\t" ++
|
||||||
|
descrToString (region'descr r) ++ "\n"
|
||||||
|
toString (Var v) = (variant'subtag v) ++ "\t" ++ dateToString (variant'added v) ++ "\t" ++
|
||||||
|
descrToString (variant'descr v) ++ "\t" ++
|
||||||
|
prefixesToString (variant'prefix v) ++ "\n"
|
||||||
|
toString (Red r) = (redundant'tag r) ++ "\t" ++ dateToString (redundant'added r) ++ "\t" ++
|
||||||
|
descrToString (redundant'descr r) ++ "\n"
|
||||||
|
|
||||||
|
-- TODO: escape non-XML chars
|
||||||
|
dateToXML d = "<date>" ++ d ++ "</date>"
|
||||||
|
addedToXML d = "<added>" ++ d ++ "</added>"
|
||||||
|
deprecatedToXML Nothing = ""
|
||||||
|
deprecatedToXML (Just d) = "<deprecated>" ++ d ++ "</deprecated>"
|
||||||
|
sscriptToXML Nothing = ""
|
||||||
|
sscriptToXML (Just s) = "<suppress-script>" ++ s ++ "</suppress-script>"
|
||||||
|
prefToXML Nothing = ""
|
||||||
|
prefToXML (Just s) = "<preferred-value>" ++ s ++ "</preferred-value>"
|
||||||
|
macrolToXML Nothing = ""
|
||||||
|
macrolToXML (Just l) = "<macrolanguage>" ++ l ++ "</macrolanguage>"
|
||||||
|
scopeToXML Nothing = ""
|
||||||
|
scopeToXML (Just s) = "<scope>" ++ s ++ "</scope>"
|
||||||
|
onedescrToXML s = "<description>" ++ s ++ "</description>"
|
||||||
|
descrToXML a = concat (map onedescrToXML a)
|
||||||
|
oneprefixToXML s = "<prefix>" ++ s ++ "</prefix>"
|
||||||
|
prefixesToXML a = concat (map oneprefixToXML a)
|
||||||
|
toXML (Date d) = dateToXML d
|
||||||
|
toXML (Lang l) = "<language><subtag>" ++ (lang'subtag l) ++ "</subtag>" ++
|
||||||
|
addedToXML (lang'added l) ++
|
||||||
|
descrToXML (lang'descr l) ++
|
||||||
|
(sscriptToXML (lang'script l)) ++
|
||||||
|
deprecatedToXML (lang'deprecated l) ++
|
||||||
|
prefToXML (lang'preferredValue l) ++
|
||||||
|
macrolToXML (lang'macroLanguage l) ++
|
||||||
|
scopeToXML (lang'scope l) ++
|
||||||
|
"</language>\n"
|
||||||
|
toXML (Extl l) = "<extlang><subtag>" ++ (extlang'subtag l) ++ "</subtag>" ++
|
||||||
|
addedToXML (extlang'added l) ++
|
||||||
|
descrToXML (extlang'descr l) ++
|
||||||
|
(sscriptToXML (extlang'script l)) ++
|
||||||
|
macrolToXML (extlang'macroLanguage l) ++
|
||||||
|
scopeToXML (extlang'scope l) ++
|
||||||
|
"</extlang>\n"
|
||||||
|
toXML (GF t) = "<grandfathered><tag>" ++ (gf'tag t) ++ "</tag>" ++
|
||||||
|
addedToXML (gf'added t) ++
|
||||||
|
descrToXML (gf'descr t) ++ "</grandfathered>\n"
|
||||||
|
toXML (Scr s) = "<script><subtag>" ++ (script'subtag s) ++ "</subtag>" ++
|
||||||
|
addedToXML (script'added s) ++
|
||||||
|
descrToXML (script'descr s) ++ "</script>\n"
|
||||||
|
toXML (Reg r) = "<region><subtag>" ++ (region'subtag r) ++ "</subtag>" ++
|
||||||
|
addedToXML (region'added r) ++
|
||||||
|
descrToXML (region'descr r) ++ "</region>\n"
|
||||||
|
toXML (Var v) = "<variant><subtag>" ++ (variant'subtag v) ++ "</subtag>" ++
|
||||||
|
addedToXML (variant'added v) ++
|
||||||
|
descrToXML (variant'descr v) ++
|
||||||
|
prefixesToXML (variant'prefix v) ++ "</variant>\n"
|
||||||
|
toXML (Red r) = "<redundant><tag>" ++ (redundant'tag r) ++ "</tag>" ++
|
||||||
|
addedToXML (redundant'added r) ++
|
||||||
|
descrToXML (redundant'descr r) ++ "</redundant>\n"
|
||||||
|
toXML (Ext e) = error "No support for extensions"
|
||||||
|
|
||||||
|
-- TODO: escape non-HTML chars
|
||||||
|
html_head s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n<head><link rel=\"stylesheet\" type=\"text/css\" href=\"/ltru.css\" /><title>" ++ s ++ "</title></head><body><h1>" ++
|
||||||
|
s ++ "</h1>"
|
||||||
|
html_tail = "<hr/><p><a href=\"/\">langtag.net Home</a></p></body></html>"
|
||||||
|
linkLanguage langcode =
|
||||||
|
if (length langcode) == 3 then
|
||||||
|
"<a href=\"http://www.sil.org/iso639-3/documentation.asp?id=" ++
|
||||||
|
langcode ++ "\">See the SIL entry</a> (if present)."
|
||||||
|
else if (length langcode) == 2 then
|
||||||
|
"<a href=\"http://www.loc.gov/standards/iso639-2/php/langcodes_name.php?iso_639_1=" ++
|
||||||
|
langcode ++ "\">See the LOC entry</a> (if present)."
|
||||||
|
else
|
||||||
|
""
|
||||||
|
dateToHTML d = "Date: " ++ d
|
||||||
|
addedToHTML d = "Added on " ++ d
|
||||||
|
sscriptToHTML Nothing = ""
|
||||||
|
sscriptToHTML (Just s) = " Suppress the script: <code><a href=\"../script/" ++ s ++
|
||||||
|
".html\">" ++ s ++ "</a></code>"
|
||||||
|
onedescrToHTML s = s
|
||||||
|
descrToHTML a = concat (List.intersperse " / " (map onedescrToHTML a))
|
||||||
|
-- TODO: a prefix is not always a simple language, it can be composed of several subtags
|
||||||
|
oneprefixToHTML s = " Possible prefix: <code><a href=\"../language/" ++ s ++ ".html\">" ++ s ++ "</a></code>"
|
||||||
|
prefixesToHTML a = concat (map oneprefixToHTML a)
|
||||||
|
toHTML (Date d) = "<p>Date: " ++ dateToHTML d ++ "</p>"
|
||||||
|
toHTML (Lang l) = (html_head ((lang'descr l) !! 0)) ++ "<p><code>" ++
|
||||||
|
(lang'subtag l) ++ "</code>. " ++
|
||||||
|
addedToHTML (lang'added l) ++ ". Description: " ++
|
||||||
|
descrToHTML (lang'descr l) ++ ". " ++
|
||||||
|
linkLanguage (lang'subtag l) ++
|
||||||
|
(sscriptToHTML (lang'script l)) ++ "</p>" ++ html_tail ++ "\n"
|
||||||
|
toHTML (GF t) = "<p>" ++ (gf'tag t) ++
|
||||||
|
addedToHTML (gf'added t) ++
|
||||||
|
descrToHTML (gf'descr t) ++ "</p>\n"
|
||||||
|
toHTML (Scr s) = (html_head ((script'descr s) !! 0)) ++ "<p><code>" ++
|
||||||
|
(script'subtag s) ++ "</code>." ++
|
||||||
|
addedToHTML (script'added s) ++ ". Description: " ++
|
||||||
|
descrToHTML (script'descr s) ++ "</p>" ++ html_tail ++ "\n"
|
||||||
|
toHTML (Reg r) = (html_head ((region'descr r) !! 0)) ++ "<p>" ++
|
||||||
|
(region'subtag r) ++ " " ++
|
||||||
|
addedToHTML (region'added r) ++
|
||||||
|
". Description: " ++
|
||||||
|
descrToHTML (region'descr r) ++ "</p>" ++
|
||||||
|
html_tail ++ "\n"
|
||||||
|
toHTML (Var v) = (html_head ((variant'descr v) !! 0)) ++ "<p><code>" ++
|
||||||
|
(variant'subtag v) ++ "</code>. " ++
|
||||||
|
addedToHTML (variant'added v) ++ ". Description: " ++
|
||||||
|
descrToHTML (variant'descr v) ++ ". " ++
|
||||||
|
prefixesToHTML (variant'prefix v) ++ "</p>" ++ html_tail ++ "\n"
|
||||||
|
toHTML (Red r) = (html_head ((redundant'descr r) !! 0)) ++
|
||||||
|
"<p>" ++ (redundant'tag r) ++
|
||||||
|
addedToHTML (redundant'added r) ++
|
||||||
|
descrToHTML (redundant'descr r) ++ "</p>" ++
|
||||||
|
html_tail ++ "\n"
|
88
Registry/check-registry.hs
Normal file
88
Registry/check-registry.hs
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
|
||||||
|
import qualified System.IO
|
||||||
|
import qualified System.Environment
|
||||||
|
import qualified Data.List as List
|
||||||
|
import qualified System.Exit as Exit
|
||||||
|
import qualified System.Console.GetOpt as GetOpt
|
||||||
|
|
||||||
|
import Registry.Grammar
|
||||||
|
import Registry.Types
|
||||||
|
import Registry.Registry
|
||||||
|
|
||||||
|
nothing = putStr ""
|
||||||
|
|
||||||
|
-- Options handling
|
||||||
|
data Flag
|
||||||
|
= Verbose | Help deriving Show
|
||||||
|
options :: [GetOpt.OptDescr Flag]
|
||||||
|
options =
|
||||||
|
[ GetOpt.Option ['v'] ["verbose"] (GetOpt.NoArg Verbose) "Detailed output",
|
||||||
|
GetOpt.Option ['h'] ["help"] (GetOpt.NoArg Help) "Help message"
|
||||||
|
]
|
||||||
|
data OptionStore = ProgramOptions {verbose::Bool, help::Bool}
|
||||||
|
header = do
|
||||||
|
myself <- System.Environment.getProgName
|
||||||
|
return ("Usage: " ++ myself ++ " [-v] registry-file")
|
||||||
|
fatal detailed msg = do
|
||||||
|
myheader <- header
|
||||||
|
if detailed then
|
||||||
|
System.IO.hPutStrLn System.IO.stderr (GetOpt.usageInfo myheader options)
|
||||||
|
else
|
||||||
|
System.IO.hPutStrLn System.IO.stderr (myheader)
|
||||||
|
if msg /= "" then
|
||||||
|
System.IO.hPutStrLn System.IO.stderr msg
|
||||||
|
else
|
||||||
|
nothing
|
||||||
|
Exit.exitWith (Exit.ExitFailure 1)
|
||||||
|
-- Defaults
|
||||||
|
emptyOptionStore :: OptionStore
|
||||||
|
emptyOptionStore =
|
||||||
|
ProgramOptions {verbose = False, help = False}
|
||||||
|
processFlags :: [Flag] -> OptionStore
|
||||||
|
processFlags [] = emptyOptionStore
|
||||||
|
processFlags (first : rest) =
|
||||||
|
case first of
|
||||||
|
Verbose -> restOpts { verbose = True }
|
||||||
|
Help -> restOpts { help = True }
|
||||||
|
where restOpts = processFlags rest
|
||||||
|
programOpts :: [String] -> IO ([Flag], [String])
|
||||||
|
programOpts argv =
|
||||||
|
case GetOpt.getOpt GetOpt.Permute options argv of
|
||||||
|
(o,n,[] ) -> return(o,n)
|
||||||
|
(_,_,errs) -> fatal True (concat errs)
|
||||||
|
getOptionStore :: ([Flag], [String]) -> OptionStore
|
||||||
|
getOptionStore (f, n) = processFlags f
|
||||||
|
--
|
||||||
|
|
||||||
|
main = do
|
||||||
|
myargs <- System.Environment.getArgs
|
||||||
|
results <- programOpts myargs
|
||||||
|
let opts = getOptionStore results
|
||||||
|
let args = snd results
|
||||||
|
if help opts then
|
||||||
|
fatal True ""
|
||||||
|
else
|
||||||
|
nothing
|
||||||
|
if length (args) /= 1 then
|
||||||
|
fatal False "One and only one argument accepted"
|
||||||
|
else
|
||||||
|
nothing
|
||||||
|
let registryfilename = args !! 0
|
||||||
|
input <- System.IO.readFile registryfilename
|
||||||
|
let syntaxTree = parse input
|
||||||
|
if (checkOK syntaxTree) then do
|
||||||
|
let result = semanticCheck (registryOf syntaxTree)
|
||||||
|
if (checkOK result) then do
|
||||||
|
if verbose opts then
|
||||||
|
putStrLn (registryfilename ++ " is OK") -- TODO: display the date
|
||||||
|
else
|
||||||
|
nothing
|
||||||
|
Exit.exitWith Exit.ExitSuccess
|
||||||
|
else do
|
||||||
|
putStrLn ("Semantic error in " ++ registryfilename ++ ": " ++
|
||||||
|
(messageOf result))
|
||||||
|
Exit.exitWith (Exit.ExitFailure 3)
|
||||||
|
else do
|
||||||
|
putStrLn ("Syntax error in " ++ registryfilename ++ ": " ++
|
||||||
|
(messageOf syntaxTree))
|
||||||
|
Exit.exitWith (Exit.ExitFailure 2)
|
37
Registry/registry2mulhtml.hs
Normal file
37
Registry/registry2mulhtml.hs
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
import qualified IO
|
||||||
|
import qualified System
|
||||||
|
|
||||||
|
import Registry.Utils
|
||||||
|
import Registry.Grammar (registry)
|
||||||
|
import Registry.Registry
|
||||||
|
|
||||||
|
infile = "./language-subtag-registry"
|
||||||
|
outdir = "./registry-html"
|
||||||
|
|
||||||
|
display reg thistype thisvalue =
|
||||||
|
-- We should always retrieve only one value, so we can take the head
|
||||||
|
toHTML ((filter (onlyThisValue thistype thisvalue)
|
||||||
|
(filter (onlyThisType thistype) reg)) !! 0)
|
||||||
|
|
||||||
|
toHTMLFiles reg thetype =
|
||||||
|
mapM (toHTMLfile reg thetype) (filter (onlyThisType thetype) reg)
|
||||||
|
|
||||||
|
toHTMLfile reg thistype thisvalue = do
|
||||||
|
let outfile = outdir ++ "/" ++ thistype ++ "/" ++ (toSubtag thisvalue) ++ ".html"
|
||||||
|
f <- IO.openFile (outfile) IO.WriteMode
|
||||||
|
IO.hPutStr f (display reg thistype (toSubtag thisvalue))
|
||||||
|
IO.hClose f
|
||||||
|
|
||||||
|
foreach [] func = return ()
|
||||||
|
foreach a func = do
|
||||||
|
func (head a)
|
||||||
|
foreach (tail a) func
|
||||||
|
|
||||||
|
main = do
|
||||||
|
f <- IO.openFile (infile) IO.ReadMode
|
||||||
|
input <- IO.hGetContents f
|
||||||
|
let theregistry = getRegistry input
|
||||||
|
foreach ["language", "script", "region", "variant",
|
||||||
|
"redundant", "grandfathered"]
|
||||||
|
(toHTMLFiles theregistry)
|
||||||
|
|
23
Registry/registry2postgresql.hs
Normal file
23
Registry/registry2postgresql.hs
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
import qualified System.IO
|
||||||
|
|
||||||
|
import Registry.Utils
|
||||||
|
import Registry.Utils.PostgreSQL
|
||||||
|
import Registry.Grammar (registry)
|
||||||
|
import Registry.Registry
|
||||||
|
|
||||||
|
infile = "./language-subtag-registry"
|
||||||
|
|
||||||
|
displayAll reg thistype =
|
||||||
|
(foldr (++) "" (map toPostgreSQL
|
||||||
|
(filter (onlyThisType thistype) reg)))
|
||||||
|
|
||||||
|
main = do
|
||||||
|
f <- System.IO.openFile (infile) System.IO.ReadMode
|
||||||
|
input <- System.IO.hGetContents f
|
||||||
|
let theregistry = getRegistry input
|
||||||
|
putStrLn "BEGIN;"
|
||||||
|
putStrLn (displayAll theregistry "script")
|
||||||
|
putStrLn (displayAll theregistry "region")
|
||||||
|
putStrLn (displayAll theregistry "language")
|
||||||
|
putStrLn (displayAll theregistry "variant")
|
||||||
|
putStrLn "COMMIT;"
|
24
Registry/registry2sqlite.hs
Normal file
24
Registry/registry2sqlite.hs
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import qualified IO
|
||||||
|
import qualified System
|
||||||
|
|
||||||
|
import Registry.Utils
|
||||||
|
import Registry.Utils.SQLite
|
||||||
|
import Registry.Grammar (registry)
|
||||||
|
import Registry.Registry
|
||||||
|
|
||||||
|
infile = "./language-subtag-registry"
|
||||||
|
|
||||||
|
displayAll reg thistype =
|
||||||
|
(foldr (++) "" (map toSQLite
|
||||||
|
(filter (onlyThisType thistype) reg)))
|
||||||
|
|
||||||
|
main = do
|
||||||
|
f <- IO.openFile (infile) IO.ReadMode
|
||||||
|
input <- IO.hGetContents f
|
||||||
|
let theregistry = getRegistry input
|
||||||
|
putStrLn "BEGIN;"
|
||||||
|
putStrLn (displayAll theregistry "script")
|
||||||
|
putStrLn (displayAll theregistry "region")
|
||||||
|
putStrLn (displayAll theregistry "language")
|
||||||
|
putStrLn (displayAll theregistry "variant")
|
||||||
|
putStrLn "COMMIT;"
|
35
Registry/registry2txt.hs
Normal file
35
Registry/registry2txt.hs
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
import qualified System.IO
|
||||||
|
|
||||||
|
import Registry.Utils
|
||||||
|
import Registry.Grammar (registry)
|
||||||
|
import Registry.Registry
|
||||||
|
|
||||||
|
infile = "./language-subtag-registry"
|
||||||
|
|
||||||
|
displayAll reg thistype =
|
||||||
|
(foldr (++) "" (map toString
|
||||||
|
(filter (onlyThisType thistype) reg)))
|
||||||
|
|
||||||
|
displayOne reg thistype thisvalue =
|
||||||
|
concat (map toString
|
||||||
|
(filter (onlyThisValue thistype thisvalue) reg))
|
||||||
|
|
||||||
|
toTextFile reg thetype = do
|
||||||
|
-- TODO: add the date in the filename
|
||||||
|
let outfile = "lsr-" ++ thetype ++ ".txt"
|
||||||
|
f <- System.IO.openFile (outfile) System.IO.WriteMode
|
||||||
|
System.IO.hPutStr f (displayAll reg thetype)
|
||||||
|
System.IO.hClose f
|
||||||
|
|
||||||
|
foreach [] func = return ()
|
||||||
|
foreach a func = do
|
||||||
|
func (head a)
|
||||||
|
foreach (tail a) func
|
||||||
|
|
||||||
|
main = do
|
||||||
|
f <- System.IO.openFile (infile) System.IO.ReadMode
|
||||||
|
input <- System.IO.hGetContents f
|
||||||
|
let theregistry = getRegistry input
|
||||||
|
foreach ["language", "script", "region", "variant", "redundant", "grandfathered"]
|
||||||
|
(toTextFile theregistry)
|
||||||
|
|
32
Registry/registry2xml.hs
Normal file
32
Registry/registry2xml.hs
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import qualified System.IO
|
||||||
|
import qualified Text.ParserCombinators.Parsec as Parsec
|
||||||
|
|
||||||
|
import Registry.Utils
|
||||||
|
import Registry.Grammar (registry)
|
||||||
|
import Registry.Registry
|
||||||
|
|
||||||
|
infile = "./language-subtag-registry"
|
||||||
|
|
||||||
|
displayAll reg = "<registry>" ++
|
||||||
|
(foldr (++) "" (map toXML reg)) ++
|
||||||
|
"</registry>"
|
||||||
|
|
||||||
|
displayOne reg thistype thisvalue =
|
||||||
|
concat (map toXML
|
||||||
|
(filter (onlyThisValue thistype thisvalue) reg))
|
||||||
|
|
||||||
|
foreach [] func = return ()
|
||||||
|
foreach a func = do
|
||||||
|
func (head a)
|
||||||
|
foreach (tail a) func
|
||||||
|
|
||||||
|
main = do
|
||||||
|
f <- System.IO.openFile (infile) System.IO.ReadMode
|
||||||
|
input <- System.IO.hGetContents f
|
||||||
|
let theregistry = getRegistry input
|
||||||
|
let outfile = "language-subtag-registry.xml"
|
||||||
|
f <- System.IO.openFile (outfile) System.IO.WriteMode
|
||||||
|
System.IO.hPutStr f (displayAll theregistry)
|
||||||
|
System.IO.hClose f
|
||||||
|
|
||||||
|
|
21
Types.hs
Normal file
21
Types.hs
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
-- Haskell types to support the language tags
|
||||||
|
|
||||||
|
module Types where
|
||||||
|
|
||||||
|
type Language = String
|
||||||
|
type Extlang = Maybe String -- Only one extlang authorized
|
||||||
|
type Script = Maybe String
|
||||||
|
type Region = Maybe String
|
||||||
|
type Singleton = Char
|
||||||
|
type Variants = [Variant]
|
||||||
|
type Variant = String
|
||||||
|
type Extension = (Singleton, [String])
|
||||||
|
type Extensions = [Extension]
|
||||||
|
|
||||||
|
type Grandfathered = String
|
||||||
|
type Privateuse = [String]
|
||||||
|
|
||||||
|
data Tag = Tag Language Extlang Script Region Variants Extensions | GF Grandfathered | Priv Privateuse deriving (Show, Eq)
|
||||||
|
-- TODO: warning, Eq must
|
||||||
|
-- be redefined because it needs to take into account case-insensitivity and
|
||||||
|
-- (maybe) canonicalization
|
11
broken-registries/missing-field
Normal file
11
broken-registries/missing-field
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
File-Date: 2006-09-20
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: aa
|
||||||
|
Description: Afar
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Subtag: ab
|
||||||
|
Description: Abkhazian
|
||||||
|
Added: 2005-10-16
|
||||||
|
Suppress-Script: Cyrl
|
12
broken-registries/no-colon
Normal file
12
broken-registries/no-colon
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
File-Date: 2006-09-20
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: aa
|
||||||
|
Description: Afar
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag ab
|
||||||
|
Description: Abkhazian
|
||||||
|
Added: 2005-10-16
|
||||||
|
Suppress-Script: Cyrl
|
12
broken-registries/wrong-type
Normal file
12
broken-registries/wrong-type
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
File-Date: 2006-09-20
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: aa
|
||||||
|
Description: Afar
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: languge
|
||||||
|
Subtag: ab
|
||||||
|
Description: Abkhazian
|
||||||
|
Added: 2005-10-16
|
||||||
|
Suppress-Script: Cyrl
|
30
broken-tags.txt
Normal file
30
broken-tags.txt
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
f
|
||||||
|
f-Latn # Main tag too short
|
||||||
|
fr-fra # Extended tag are no longer well-formed
|
||||||
|
fr-Lat # Extended, obsolete
|
||||||
|
xr-lxs-qut # extlangS
|
||||||
|
xr-lqt-qu # extlang + region
|
||||||
|
fr-Latn-F
|
||||||
|
a-value
|
||||||
|
tlh-a-b-foo
|
||||||
|
i-notexist # grandfathered but not registered: invalid, even if we only test well-formedness
|
||||||
|
abcdefghi-012345678
|
||||||
|
ab-abc-abc-abc-abc
|
||||||
|
ab-abcd-abc
|
||||||
|
ab-ab-abc
|
||||||
|
ab-123-abc
|
||||||
|
a-Hant-ZH
|
||||||
|
a1-Hant-ZH
|
||||||
|
ab-abcde-abc
|
||||||
|
ab-1abc-abc
|
||||||
|
ab-ab-abcd
|
||||||
|
ab-123-abcd
|
||||||
|
ab-abcde-abcd
|
||||||
|
ab-1abc-abcd
|
||||||
|
ab-a-b
|
||||||
|
ab-a-x
|
||||||
|
ab--ab
|
||||||
|
ab-abc-
|
||||||
|
-ab-abc
|
||||||
|
abcd-efg
|
||||||
|
aabbccddE
|
21
check-valid.hs
Normal file
21
check-valid.hs
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
import Grammar
|
||||||
|
import System.Environment (getArgs)
|
||||||
|
import Registry.Registry
|
||||||
|
|
||||||
|
infile = "./language-subtag-registry"
|
||||||
|
|
||||||
|
analyze reg input
|
||||||
|
= case (getTag input) of
|
||||||
|
Left err -> err
|
||||||
|
Right tag -> let result = isValid reg tag in
|
||||||
|
if fst result then
|
||||||
|
(input ++ " is valid\n") -- TODO: indicate the date of the registry
|
||||||
|
else
|
||||||
|
(input ++ " is NOT valid: " ++ snd result ++ "\n")
|
||||||
|
|
||||||
|
main = do
|
||||||
|
myargs <- getArgs
|
||||||
|
theregistry <- readRegistry infile
|
||||||
|
putStr (concat (map (analyze theregistry) myargs))
|
||||||
|
|
||||||
|
|
15
check-wf.hs
Normal file
15
check-wf.hs
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import Grammar
|
||||||
|
import System.Environment (getArgs)
|
||||||
|
|
||||||
|
analyze language input
|
||||||
|
= case (parse language "" input) of
|
||||||
|
Left err -> (input ++ " is NOT well-formed: " ++ (show err) ++
|
||||||
|
"\n")
|
||||||
|
Right x -> (input ++ " is well-formed\n")
|
||||||
|
|
||||||
|
main = do
|
||||||
|
myargs <- getArgs
|
||||||
|
putStr (concat (map (analyze tag) myargs))
|
||||||
|
-- TODO: set return code? What code to use if some are well-formed and some are not?
|
||||||
|
-- TODO: provide a version with -q and -v ?
|
||||||
|
|
18
correct-registries/inexisting-prefix
Normal file
18
correct-registries/inexisting-prefix
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
File-Date: 2006-09-25
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: aa
|
||||||
|
Description: Afar
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: ab
|
||||||
|
Description: Abkhazian
|
||||||
|
Added: 2005-10-16
|
||||||
|
Suppress-Script: Cyrl
|
||||||
|
%%
|
||||||
|
Type: variant
|
||||||
|
Subtag: 1901
|
||||||
|
Description: Traditional German orthography
|
||||||
|
Added: 2005-10-16
|
||||||
|
Prefix: fx
|
12
correct-registries/inexisting-script
Normal file
12
correct-registries/inexisting-script
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
File-Date: 2006-09-25
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: aa
|
||||||
|
Description: Afar
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: ab
|
||||||
|
Description: Abkhazian
|
||||||
|
Added: 2005-10-16
|
||||||
|
Suppress-Script: Cyrk
|
77
display-tag.hs
Normal file
77
display-tag.hs
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
import Grammar
|
||||||
|
import Types
|
||||||
|
|
||||||
|
import Registry.Registry (readRegistry, isValid)
|
||||||
|
import Registry.Types
|
||||||
|
import Registry.Utils (onlyThisValue, onlyThisType, scriptFrom, languageFrom,
|
||||||
|
regionFrom, variantFrom)
|
||||||
|
|
||||||
|
import Data.List (intersperse)
|
||||||
|
import System.Environment (getArgs)
|
||||||
|
|
||||||
|
infile = "./language-subtag-registry"
|
||||||
|
|
||||||
|
langToString registry l =
|
||||||
|
"language \"" ++ concat (intersperse " / "
|
||||||
|
(lang'descr (languageFrom (filter (onlyThisValue
|
||||||
|
"language" l)
|
||||||
|
(filter (onlyThisType "language")
|
||||||
|
registry) !! 0)))) ++ "\""
|
||||||
|
|
||||||
|
scriptToString registry Nothing = ""
|
||||||
|
scriptToString registry (Just s) =
|
||||||
|
let theScript = (filter (onlyThisValue "script" s)
|
||||||
|
(filter (onlyThisType "script")
|
||||||
|
registry)) !! 0 in
|
||||||
|
", " ++ s ++ ": script " ++
|
||||||
|
" \"" ++ concat (intersperse " / " (script'descr
|
||||||
|
(scriptFrom theScript))) ++ "\""
|
||||||
|
|
||||||
|
regionToString registry Nothing = ""
|
||||||
|
regionToString registry (Just r) =
|
||||||
|
let theRegion = (filter (onlyThisValue "region" r)
|
||||||
|
(filter (onlyThisType "region")
|
||||||
|
registry)) !! 0 in
|
||||||
|
", " ++ r ++ ": region " ++
|
||||||
|
" \"" ++ concat (intersperse " / " (region'descr
|
||||||
|
(regionFrom theRegion))) ++ "\""
|
||||||
|
|
||||||
|
variantsToString registry [] = ""
|
||||||
|
variantsToString registry v = ", " ++ concat (intersperse ", "
|
||||||
|
(map (variantToString registry) v))
|
||||||
|
|
||||||
|
variantToString registry v =
|
||||||
|
let theVariant = (filter (onlyThisValue "variant" v)
|
||||||
|
(filter (onlyThisType "variant")
|
||||||
|
registry)) !! 0 in
|
||||||
|
v ++ ": variant " ++
|
||||||
|
" \"" ++ concat (intersperse " / " (variant'descr
|
||||||
|
(variantFrom theVariant))) ++ "\""
|
||||||
|
|
||||||
|
toString :: Registry -> Tag -> String
|
||||||
|
toString registry (Tag l el s r v e) =
|
||||||
|
-- TODO: display extlangs
|
||||||
|
l ++ ": " ++ langToString registry l ++
|
||||||
|
scriptToString registry s ++ regionToString registry r ++
|
||||||
|
variantsToString registry v
|
||||||
|
|
||||||
|
toString registry (Types.GF tag) = tag
|
||||||
|
toString registry (Priv p) =
|
||||||
|
(concat p) ++ " (private tag so no info available)"
|
||||||
|
|
||||||
|
analyze :: Registry -> String -> String
|
||||||
|
analyze reg input
|
||||||
|
= case (getTag input) of
|
||||||
|
Left err -> err
|
||||||
|
Right tag -> let result = isValid reg tag in
|
||||||
|
if fst result then
|
||||||
|
input ++ ": (" ++ (toString reg tag) ++ ")"
|
||||||
|
else
|
||||||
|
(input ++ " is NOT valid: " ++ snd result ++ "\n")
|
||||||
|
|
||||||
|
main = do
|
||||||
|
myargs <- getArgs
|
||||||
|
theregistry <- readRegistry infile
|
||||||
|
putStrLn (concat (intersperse "\n" (map (analyze theregistry) myargs)))
|
||||||
|
|
||||||
|
|
33
invalid-registries/duplicata
Normal file
33
invalid-registries/duplicata
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
File-Date: 2006-09-25
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: aa
|
||||||
|
Description: Afar
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: ab
|
||||||
|
Description: Abkhazian
|
||||||
|
Added: 2005-10-16
|
||||||
|
Suppress-Script: Cyrl
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: ab
|
||||||
|
Description: Abkhazian again
|
||||||
|
Added: 2005-10-16
|
||||||
|
Suppress-Script: Cyrl
|
||||||
|
%%
|
||||||
|
Type: script
|
||||||
|
Subtag: Arab
|
||||||
|
Description: Arabic
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: script
|
||||||
|
Subtag: Armn
|
||||||
|
Description: Armenian
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: script
|
||||||
|
Subtag: Cyrl
|
||||||
|
Description: Cyrillic
|
||||||
|
Added: 2005-10-16
|
18
invalid-registries/inexisting-prefix
Normal file
18
invalid-registries/inexisting-prefix
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
File-Date: 2006-09-25
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: aa
|
||||||
|
Description: Afar
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: ab
|
||||||
|
Description: Abkhazian
|
||||||
|
Added: 2005-10-16
|
||||||
|
Suppress-Script: Cyrk
|
||||||
|
%%
|
||||||
|
Type: script
|
||||||
|
Subtag: Cyrl
|
||||||
|
Description: Cyrillic
|
||||||
|
Added: 2005-10-16
|
||||||
|
|
12
invalid-registries/inexisting-script
Normal file
12
invalid-registries/inexisting-script
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
File-Date: 2006-09-25
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: aa
|
||||||
|
Description: Afar
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: ab
|
||||||
|
Description: Abkhazian
|
||||||
|
Added: 2005-10-16
|
||||||
|
Suppress-Script: Cyrk
|
13
invalid-registries/missing-macrolang
Normal file
13
invalid-registries/missing-macrolang
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
File-Date: 2008-02-20
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: aa
|
||||||
|
Description: Afar
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: foo
|
||||||
|
Description: Test only
|
||||||
|
Macrolanguage: xx
|
||||||
|
Added: 2008-02-20
|
||||||
|
|
28
invalid-registries/missing-pref
Normal file
28
invalid-registries/missing-pref
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
File-Date: 2006-09-25
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: aa
|
||||||
|
Description: Afar
|
||||||
|
Added: 2005-10-16
|
||||||
|
Preferred-value: xx
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: ab
|
||||||
|
Description: Abkhazian
|
||||||
|
Added: 2005-10-16
|
||||||
|
Suppress-Script: Cyrx
|
||||||
|
%%
|
||||||
|
Type: script
|
||||||
|
Subtag: Arab
|
||||||
|
Description: Arabic
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: script
|
||||||
|
Subtag: Armn
|
||||||
|
Description: Armenian
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: script
|
||||||
|
Subtag: Cyrl
|
||||||
|
Description: Cyrillic
|
||||||
|
Added: 2005-10-16
|
10
invalid-registries/no-date
Normal file
10
invalid-registries/no-date
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
Type: language
|
||||||
|
Subtag: aa
|
||||||
|
Description: Afar
|
||||||
|
Added: 2005-10-16
|
||||||
|
%%
|
||||||
|
Type: language
|
||||||
|
Subtag: ab
|
||||||
|
Description: Abkhazian
|
||||||
|
Added: 2005-10-16
|
||||||
|
|
11
invalid-tags.txt
Normal file
11
invalid-tags.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
ax-TZ # Not in the registry, but well-formed
|
||||||
|
fra-Latn # ISO 639 can be 3-letters
|
||||||
|
fra
|
||||||
|
fra-FX
|
||||||
|
abcd-Latn # Language of 4 chars reserved for future use
|
||||||
|
AaBbCcDd-x-y-any-x # Language of 5-8 chars, registered
|
||||||
|
zh-Latm-CN # Typo
|
||||||
|
de-DE-1902 # Wrong variant
|
||||||
|
fr-shadok # Variant
|
||||||
|
ab-c-abc-r-toto-c-abc # 'c' appears twice
|
||||||
|
en-a-bbb-a-ccc # 'a' appears twice
|
26
irregular.hs
Normal file
26
irregular.hs
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import qualified IO
|
||||||
|
import Text.ParserCombinators.Parsec as Parsec
|
||||||
|
import qualified Data.List as List
|
||||||
|
|
||||||
|
import Grammar
|
||||||
|
import Registry.Grammar
|
||||||
|
import Registry.Types
|
||||||
|
|
||||||
|
infile = "./language-subtag-registry"
|
||||||
|
|
||||||
|
onlyIrregularGF (GF r) = isLeft (Grammar.getTag (gf'tag r))
|
||||||
|
onlyIrregularGF _ = False
|
||||||
|
|
||||||
|
displayTag (GF r) = "\"" ++ (gf'tag r) ++ "\" / "
|
||||||
|
|
||||||
|
run parser input
|
||||||
|
= case (Parsec.parse parser "" input) of
|
||||||
|
Left err -> error ("Registry \"" ++ infile ++ "\" is not legal: " ++ (show err) ++ "\n")
|
||||||
|
Right x -> IO.putStr ("-- AUTOMATICALLY GENERATED! Do *not* edit!!! \n" ++
|
||||||
|
(foldr (++) "" (List.intersperse "\n" (map displayTag (filter onlyIrregularGF (map fromRight x))))) ++
|
||||||
|
"\n")
|
||||||
|
|
||||||
|
main = do
|
||||||
|
f <- IO.openFile (infile) IO.ReadMode
|
||||||
|
input <- IO.hGetContents f
|
||||||
|
run registry input
|
76
test-registries.hs
Normal file
76
test-registries.hs
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
import qualified Test.HUnit as HUnit
|
||||||
|
import qualified System.IO as IO
|
||||||
|
import qualified Text.ParserCombinators.Parsec as Parsec
|
||||||
|
import qualified Data.List as List
|
||||||
|
import qualified Data.Maybe as Maybe
|
||||||
|
import qualified Text.Regex as RE
|
||||||
|
import qualified System.Directory as Dir
|
||||||
|
|
||||||
|
import Registry.Grammar
|
||||||
|
import Registry.Types
|
||||||
|
import Registry.Registry
|
||||||
|
|
||||||
|
brokenDir = "./broken-registries"
|
||||||
|
correctDir = "./correct-registries"
|
||||||
|
invalidDir = "./invalid-registries"
|
||||||
|
validDir = "./valid-registries"
|
||||||
|
|
||||||
|
data RegistryText = Registry {filename::FilePath, filecontents::String}
|
||||||
|
|
||||||
|
buildTestCaseSyn :: Bool -> RegistryText -> HUnit.Test
|
||||||
|
buildTestCaseSyn True reg =
|
||||||
|
HUnit.TestCase (HUnit.assertBool ("Should be syntactically OK (" ++ (filename reg) ++ ")")
|
||||||
|
(checkOK (parse (filecontents reg))))
|
||||||
|
buildTestCaseSyn False reg =
|
||||||
|
HUnit.TestCase (HUnit.assertBool ("Should be syntactically wrong (" ++ (filename reg) ++ ")")
|
||||||
|
(not (checkOK (parse (filecontents reg)))))
|
||||||
|
|
||||||
|
buildTestCaseSem :: Bool -> RegistryText -> HUnit.Test
|
||||||
|
buildTestCaseSem True reg =
|
||||||
|
let registry = parse (filecontents reg) in
|
||||||
|
HUnit.TestCase (HUnit.assertBool ("Should be valid (" ++ (filename reg) ++ ")")
|
||||||
|
(checkOK (registry) &&
|
||||||
|
checkOK (semanticCheck (registryOf registry))))
|
||||||
|
buildTestCaseSem False reg =
|
||||||
|
let registry = parse (filecontents reg) in
|
||||||
|
HUnit.TestCase (HUnit.assertBool ("Should be invalid (" ++ (filename reg) ++ ")")
|
||||||
|
(checkOK (registry) &&
|
||||||
|
not (checkOK (semanticCheck (registryOf registry)))))
|
||||||
|
|
||||||
|
endsWith :: String -> String -> Bool
|
||||||
|
endsWith s1 s2 = Maybe.isJust (RE.matchRegex (RE.mkRegex (s2 ++ "$")) s1)
|
||||||
|
|
||||||
|
-- TODO: use mapM instead
|
||||||
|
foreach :: (Monad m) => [a1] -> (a1 -> m a) -> m [a]
|
||||||
|
foreach [] func = do
|
||||||
|
return []
|
||||||
|
foreach x func = do
|
||||||
|
newHead <- func (head x)
|
||||||
|
newTail <- foreach (tail x) func
|
||||||
|
return (newHead : newTail)
|
||||||
|
|
||||||
|
contentOf :: FilePath -> IO RegistryText
|
||||||
|
contentOf f = do
|
||||||
|
content <- IO.readFile f
|
||||||
|
return (Registry f content)
|
||||||
|
|
||||||
|
addDir d f = d ++ "/" ++ f
|
||||||
|
|
||||||
|
filesOf :: FilePath -> IO [RegistryText]
|
||||||
|
filesOf d = do
|
||||||
|
files <- Dir.getDirectoryContents d
|
||||||
|
foreach (map (addDir d) (filter (\n -> n /= "." && n /= ".." && (not (endsWith n "~")))
|
||||||
|
files))
|
||||||
|
contentOf
|
||||||
|
|
||||||
|
main = do
|
||||||
|
brokenFiles <- filesOf brokenDir
|
||||||
|
let testsBroken = map (buildTestCaseSyn False) brokenFiles
|
||||||
|
invalidFiles <- filesOf invalidDir
|
||||||
|
let testsInvalid = map (buildTestCaseSem False) invalidFiles
|
||||||
|
correctFiles <- filesOf correctDir
|
||||||
|
let testsCorrect = map (buildTestCaseSyn True) correctFiles
|
||||||
|
validFiles <- filesOf validDir
|
||||||
|
let testsValid = map (buildTestCaseSem True) validFiles
|
||||||
|
HUnit.runTestTT (HUnit.TestList (testsBroken ++ testsInvalid ++
|
||||||
|
testsCorrect ++ testsValid))
|
54
tests-from-files.hs
Normal file
54
tests-from-files.hs
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
import qualified Grammar
|
||||||
|
import qualified Registry.Registry as Registry
|
||||||
|
|
||||||
|
import qualified Test.HUnit as HUnit
|
||||||
|
import qualified System.IO
|
||||||
|
import qualified Text.Regex as Regex
|
||||||
|
|
||||||
|
wfTagsFile = "./well-formed-tags.txt"
|
||||||
|
brokenTagsFile = "./broken-tags.txt"
|
||||||
|
validTagsFile = "./valid-tags.txt"
|
||||||
|
invalidTagsFile = "./invalid-tags.txt"
|
||||||
|
|
||||||
|
registryfile = "./language-subtag-registry"
|
||||||
|
|
||||||
|
languageTag = Grammar.tag
|
||||||
|
|
||||||
|
getTag line =
|
||||||
|
let fields = Regex.splitRegex (Regex.mkRegex " +") line in
|
||||||
|
fields !! 0
|
||||||
|
|
||||||
|
shouldBeWellFormed tag =
|
||||||
|
HUnit.TestCase (HUnit.assertBool (tag ++ " should be well-formed")
|
||||||
|
(Grammar.testTag tag == True))
|
||||||
|
|
||||||
|
shouldBeBroken tag =
|
||||||
|
HUnit.TestCase (HUnit.assertBool (tag ++ " should *not* be well-formed")
|
||||||
|
(Grammar.testTag tag == False))
|
||||||
|
|
||||||
|
shouldBeValid reg tag =
|
||||||
|
HUnit.TestCase (HUnit.assertBool (tag ++ " should be valid")
|
||||||
|
(Registry.testTag reg tag == True))
|
||||||
|
|
||||||
|
shouldBeInvalid reg tag =
|
||||||
|
HUnit.TestCase (HUnit.assertBool (tag ++ " should *not* be valid")
|
||||||
|
(Registry.testTag reg tag == False))
|
||||||
|
|
||||||
|
tagsFromFile filename = do
|
||||||
|
myContents <- System.IO.readFile filename
|
||||||
|
let myLines = lines myContents
|
||||||
|
return (map getTag myLines)
|
||||||
|
|
||||||
|
main = do
|
||||||
|
brokenTags <- tagsFromFile brokenTagsFile
|
||||||
|
wfTags <- tagsFromFile wfTagsFile
|
||||||
|
theregistry <- Registry.readRegistry registryfile
|
||||||
|
invalidTags <- tagsFromFile invalidTagsFile
|
||||||
|
validTags <- tagsFromFile validTagsFile
|
||||||
|
let tests = HUnit.TestList (map shouldBeBroken (brokenTags) ++
|
||||||
|
map shouldBeWellFormed (wfTags) ++
|
||||||
|
map (shouldBeValid theregistry) (validTags) ++
|
||||||
|
map (shouldBeInvalid theregistry) (invalidTags))
|
||||||
|
HUnit.runTestTT tests
|
||||||
|
|
||||||
|
|
44596
valid-registries/language-subtag-registry
Normal file
44596
valid-registries/language-subtag-registry
Normal file
File diff suppressed because it is too large
Load Diff
48
valid-tags.txt
Normal file
48
valid-tags.txt
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
fr
|
||||||
|
fr-Latn
|
||||||
|
fr-Latn-FR
|
||||||
|
fr-Latn-419
|
||||||
|
fr-FR
|
||||||
|
fr-y-myext-myext2
|
||||||
|
apa-Latn # ISO 639 can be 3-letters
|
||||||
|
apa
|
||||||
|
apa-CA
|
||||||
|
i-klingon # grandfathered with singleton
|
||||||
|
no-bok # grandfathered without singleton
|
||||||
|
mn-Cyrl-MN
|
||||||
|
mN-cYrL-Mn
|
||||||
|
fr-Latn-CA
|
||||||
|
en-US
|
||||||
|
fr-Latn-CA
|
||||||
|
i-enochian # Grand fathered
|
||||||
|
x-fr-CH
|
||||||
|
sr-Latn-CS
|
||||||
|
es-419
|
||||||
|
sl-nedis
|
||||||
|
de-CH-1996
|
||||||
|
de-Latg-1996
|
||||||
|
sl-IT-nedis
|
||||||
|
en-a-bbb-x-a-ccc
|
||||||
|
de-a-value
|
||||||
|
en-x-US
|
||||||
|
az-Arab-x-AZE-derbend
|
||||||
|
es-Latn-CO-x-private
|
||||||
|
ab-x-abc-x-abc # anything goes after x
|
||||||
|
ab-x-abc-a-a # ditto
|
||||||
|
i-default # grandfathered
|
||||||
|
i-klingon # grandfathered
|
||||||
|
en
|
||||||
|
de-AT
|
||||||
|
es-419
|
||||||
|
de-CH-1901
|
||||||
|
sr-Cyrl
|
||||||
|
sr-Cyrl-CS
|
||||||
|
sl-Latn-IT-rozaj
|
||||||
|
en-US-x-twain
|
||||||
|
zh-cmn
|
||||||
|
zh-cmn-Hant
|
||||||
|
zh-cmn-Hant-HK
|
||||||
|
zh-gan
|
||||||
|
zh-yue-Hant-HK
|
||||||
|
en-Latn-GB-boont-r-extended-sequence-x-private
|
||||||
|
en-US-boont
|
54
well-formed-tags.txt
Normal file
54
well-formed-tags.txt
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
fr
|
||||||
|
fr-Latn
|
||||||
|
fr-Latn-FR
|
||||||
|
fr-Latn-419
|
||||||
|
fr-FR
|
||||||
|
ax-TZ # Not in the registry, but well-formed
|
||||||
|
fr-shadok # Variant
|
||||||
|
fr-y-myext-myext2
|
||||||
|
fra-Latn # ISO 639 can be 3-letters
|
||||||
|
fra
|
||||||
|
fra-FX
|
||||||
|
i-klingon # grandfathered with singleton
|
||||||
|
I-kLINgon # tags are case-insensitive...
|
||||||
|
no-bok # grandfathered without singleton
|
||||||
|
mn-Cyrl-MN
|
||||||
|
mN-cYrL-Mn
|
||||||
|
fr-Latn-CA
|
||||||
|
en-US
|
||||||
|
fr-Latn-CA
|
||||||
|
i-enochian # Grand fathered
|
||||||
|
x-fr-CH
|
||||||
|
sr-Latn-CS
|
||||||
|
es-419
|
||||||
|
sl-nedis
|
||||||
|
de-CH-1996
|
||||||
|
de-Latg-1996
|
||||||
|
sl-IT-nedis
|
||||||
|
en-a-bbb-x-a-ccc
|
||||||
|
de-a-value
|
||||||
|
en-Latn-GB-boont-r-extended-sequence-x-private
|
||||||
|
en-x-US
|
||||||
|
az-Arab-x-AZE-derbend
|
||||||
|
es-Latn-CO-x-private
|
||||||
|
en-US-boont
|
||||||
|
ab-x-abc-x-abc # anything goes after x
|
||||||
|
ab-x-abc-a-a # ditto
|
||||||
|
i-default # grandfathered
|
||||||
|
i-klingon # grandfathered
|
||||||
|
abcd-Latn # Language of 4 chars reserved for future use
|
||||||
|
AaBbCcDd-x-y-any-x # Language of 5-8 chars, registered
|
||||||
|
en
|
||||||
|
de-AT
|
||||||
|
es-419
|
||||||
|
de-CH-1901
|
||||||
|
sr-Cyrl
|
||||||
|
sr-Cyrl-CS
|
||||||
|
sl-Latn-IT-rozaj
|
||||||
|
en-US-x-twain
|
||||||
|
zh-cmn
|
||||||
|
zh-cmn-Hant
|
||||||
|
zh-cmn-Hant-HK
|
||||||
|
zh-gan
|
||||||
|
zh-yue-Hant-HK
|
||||||
|
xr-p-lze # Extension
|
Loading…
Reference in New Issue
Block a user