Initial import
This commit is contained in:
parent
c9247066e2
commit
2fc541c0a7
129
Grammar.hs
Normal file
129
Grammar.hs
Normal file
@ -0,0 +1,129 @@
|
||||
{-# LANGUAGE NoMonomorphismRestriction,FlexibleContexts #-}
|
||||
|
||||
module Grammar (module Text.ParserCombinators.Parsec, tag, testTag,
|
||||
getTag) where
|
||||
|
||||
import Text.ParserCombinators.Parsec
|
||||
import Types
|
||||
import Parsers
|
||||
|
||||
-- "Tags for Identifying Languages" RFC 5646
|
||||
-- We use the names of the productions of the grammar in the RFC
|
||||
|
||||
dash = char '-' <?> "dash"
|
||||
|
||||
charI = char 'i' <|> char 'I'
|
||||
charX = char 'x' <|> char 'X'
|
||||
|
||||
alphaNumNotX c =
|
||||
c >= 'A' && c <= 'W' ||
|
||||
c >= 'Y' && c <= 'Z' ||
|
||||
c >= 'a' && c <= 'w' ||
|
||||
c >= 'y' && c <= 'z' ||
|
||||
c >= '0' && c <= '9'
|
||||
|
||||
languageTag :: GenParser Char () Tag
|
||||
languageTag =do
|
||||
value <- try (langtag) <|> try (privateuse) <|> irregular
|
||||
return value
|
||||
|
||||
langtag = do
|
||||
theLanguage <- language
|
||||
theExtlang <- facultative (try (do {dash ; value <- extlang ; notFollowedBy alphaNum;
|
||||
return value}));
|
||||
theScript <- facultative (try (do {dash ; value <- script ; notFollowedBy alphaNum;
|
||||
return value}));
|
||||
theRegion <- facultative (try (do {dash ; value <- region ; notFollowedBy alphaNum;
|
||||
return value}));
|
||||
theVariants <- many (try (do {dash ; value <- variant ; notFollowedBy alphaNum;
|
||||
return value}));
|
||||
theExtensions <- many (try (do {dash ; value <- extension; notFollowedBy alphaNum;
|
||||
return value}))
|
||||
-- TODO: use a context-sensitive grammar (see Parsec
|
||||
-- documentation) to enforce "2.2.6. Extension Subtags" which
|
||||
-- says that an extension cannot be repeated.
|
||||
optional (do {dash; privateuse})
|
||||
eof
|
||||
return (Tag theLanguage theExtlang theScript theRegion theVariants theExtensions)
|
||||
|
||||
language = do
|
||||
value <- do
|
||||
try (do {lang <- countBetween 2 3 letter; notFollowedBy alphaNum; return lang})
|
||||
-- Shortest ISO 639 code
|
||||
-- TODO: returns the extended, too!
|
||||
<|>
|
||||
try (do {lang <- count 4 letter; notFollowedBy alphaNum; return lang}) -- reserved for future use.
|
||||
<|> (countBetween 5 8 letter) -- registered language subtag. TODO: return the value!
|
||||
return value
|
||||
|
||||
-- RFC, section 2.2.2: "Although the ABNF production 'extlang' permits
|
||||
-- up to three extended language tags"
|
||||
extlang = do
|
||||
value <- count 3 letter
|
||||
return value
|
||||
|
||||
script = do
|
||||
value <- count 4 letter -- ISO 15924 code
|
||||
return value
|
||||
|
||||
region = do
|
||||
count 2 letter -- ISO 3166 code
|
||||
<|> count 3 digit -- United Nations M.49 code
|
||||
|
||||
variant = do
|
||||
do {first <- digit ; rest <- countBetween 3 7 alphaNum; return (first:rest)}
|
||||
<|> do {first <- letter ; rest <- countBetween 4 7 alphaNum; return (first:rest)}
|
||||
|
||||
extension = do
|
||||
theSingleton <- singleton
|
||||
theValues <- many1 (try (do {dash ; extensionName}))
|
||||
return (theSingleton, theValues)
|
||||
|
||||
extensionName = do
|
||||
countBetween 2 8 alphaNum
|
||||
|
||||
singleton = satisfy (alphaNumNotX)
|
||||
|
||||
privateuse = do
|
||||
charX
|
||||
values <- many1 (do {dash; countBetween 1 8 alphaNum})
|
||||
eof
|
||||
return (Priv values)
|
||||
|
||||
-- Irregular tags only
|
||||
registeredIrregular = try (istring "en-GB-oed") <|> try (istring "i-ami") <|> try (istring "i-bnn") <|> try (istring "i-default")
|
||||
<|> try (istring "i-enochian") <|> try (istring "i-hak") <|> try (istring "i-klingon") <|> try (istring "i-lux")
|
||||
<|> try (istring "i-mingo") <|> try (istring "i-navajo") <|> try (istring "i-pwn") <|> try (istring "i-tao")
|
||||
<|> try (istring "i-tay") <|> try (istring "i-tsu") <|> try (istring "sgn-BE-fr") <|>try ( istring "sgn-BE-nl")
|
||||
<|> try (istring "sgn-CH-de") <|> try (istring "no-bok")
|
||||
<|> try (istring"no-nyn")
|
||||
<|> try (istring"zh-cmn")
|
||||
<|> try (istring"zh-cmn-Hans")
|
||||
<|> try (istring"zh-cmn-Hant")
|
||||
<|> try (istring"zh-gan")
|
||||
<|> try (istring"zh-min")
|
||||
<|> try (istring"zh-min-nan")
|
||||
<|> try (istring"zh-wuu")
|
||||
<|> try (istring"zh-yue")
|
||||
|
||||
irregular = do
|
||||
value <- registeredIrregular
|
||||
return (GF value)
|
||||
|
||||
tag = do
|
||||
value <- languageTag
|
||||
return value
|
||||
|
||||
-- Utilities
|
||||
testTag :: String -> Bool
|
||||
testTag input
|
||||
= case (parse languageTag "" input) of
|
||||
Left err -> False
|
||||
Right x -> True
|
||||
|
||||
getTag :: String -> Either String Tag
|
||||
getTag input =
|
||||
case (parse tag "" input) of
|
||||
Left err -> Left (input ++ " is NOT well-formed: " ++ (show err) ++
|
||||
"\n")
|
||||
Right theTag -> Right theTag
|
4
INSTALL
Normal file
4
INSTALL
Normal file
@ -0,0 +1,4 @@
|
||||
Pre-requisites on Debian:
|
||||
|
||||
apt install ghc libghc-hunit-dev libghc-regex-compat-tdfa-dev
|
||||
|
2
LICENSE
2
LICENSE
@ -1,4 +1,4 @@
|
||||
Copyright (c) <year> <owner>.
|
||||
Copyright (c) 2006. Stéphane Bortzmeyer
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
|
38
Makefile
Normal file
38
Makefile
Normal file
@ -0,0 +1,38 @@
|
||||
GHC=ghc
|
||||
MODULES=Grammar.hs Types.hs
|
||||
REGISTRY_MODULES=Registry/Grammar.hs Registry/Types.hs Registry/Registry.hs Registry/Utils.hs
|
||||
TESTS=broken-tags.txt well-formed-tags.txt
|
||||
ALL_MODULES=${MODULES}
|
||||
REGISTRY=http://www.iana.org/assignments/language-subtag-registry
|
||||
REGISTRYFILE=language-subtag-registry
|
||||
ALL_PROGRAMS=check-wf check-valid tests-from-files display-tag registry2xml registry2txt registry2postgresql check-registry
|
||||
TARBALL=/tmp/gabuzomeu.tar
|
||||
|
||||
default: check-wf check-valid check-registry
|
||||
|
||||
all: ${ALL_PROGRAMS}
|
||||
|
||||
%: %.hs ${ALL_MODULES}
|
||||
${GHC} -o $@ --make $<
|
||||
|
||||
%: Registry/%.hs ${MODULES}
|
||||
${GHC} -o $@ --make $<
|
||||
|
||||
test: test-tags test-regs
|
||||
|
||||
test-tags: tests-from-files ${ALL_MODULES} ${TESTS}
|
||||
./$<
|
||||
|
||||
test-regs: test-registries ${ALL_MODULES} ${REGISTRY_MODULES} ${TESTS}
|
||||
./$<
|
||||
|
||||
${REGISTRYFILE}: check-registry
|
||||
wget -O $@ ${REGISTRY}
|
||||
./check-registry ${REGISTRYFILE}
|
||||
touch ${REGISTRYFILE}
|
||||
|
||||
dist: clean
|
||||
(cd ..; tar cvf ${TARBALL} --exclude hnop --exclude ltr*.txt GaBuZoMeu && gzip --best --force --verbose ${TARBALL})
|
||||
|
||||
clean:
|
||||
rm -f list-grandfathered-tags ${REGISTRYFILE} ${ALL_PROGRAMS} \#*\# *~ *.o *.hi *~ Registry/*.o Registry/*.hi Registry/*~
|
32
Parsers.hs
Normal file
32
Parsers.hs
Normal file
@ -0,0 +1,32 @@
|
||||
module Parsers where
|
||||
|
||||
import Text.ParserCombinators.Parsec
|
||||
import Data.Maybe (catMaybes)
|
||||
import Data.Char (toUpper)
|
||||
import Control.Monad (liftM)
|
||||
|
||||
-- Parsing functions not in Text.ParserCombinators.Parsec
|
||||
|
||||
-- There is also a good ine in hsemail, manyNtoM
|
||||
countBetween m n p
|
||||
| n < m = error "First bound must be lower or equal than second bound"
|
||||
| otherwise = do
|
||||
xs <- try (count m p)
|
||||
ys <- try (count (n - m) ((option Nothing) (do
|
||||
y <- p
|
||||
return (Just y))))
|
||||
return (xs ++ catMaybes ys)
|
||||
|
||||
facultative p = liftM Just p <|> return Nothing
|
||||
|
||||
-- Case-insensitive parsers stloen from hsemail
|
||||
-- Case-insensitive variant of Parsec's 'char' function.
|
||||
ichar :: Char -> CharParser st Char
|
||||
ichar c = satisfy (\x -> toUpper x == toUpper c)
|
||||
-- Case-insensitive variant of Parsec's 'string' function.
|
||||
istring :: String -> CharParser st String
|
||||
istring cs = mapM ichar cs <?> cs
|
||||
|
||||
--
|
||||
-- End of parsing functions
|
||||
|
@ -1,3 +1,8 @@
|
||||
# GaBuZoMeu
|
||||
|
||||
GaBuZoMeu is a set of programs to parse and check language tags and language tag registry (see RFC 5646).
|
||||
GaBuZoMeu is a set of programs to parse and check language tags and
|
||||
language tag registry (see RFC 5646).
|
||||
|
||||
## Installation
|
||||
|
||||
See INSTALL
|
||||
|
164
Registry/Grammar.hs
Normal file
164
Registry/Grammar.hs
Normal file
@ -0,0 +1,164 @@
|
||||
module Registry.Grammar where
|
||||
|
||||
import Text.ParserCombinators.Parsec as Parsec hiding (space, spaces, newline)
|
||||
import Parsers
|
||||
import Registry.Types
|
||||
import Data.Maybe
|
||||
import Data.Char(ord, chr)
|
||||
import Numeric
|
||||
import Prelude hiding (lookup)
|
||||
|
||||
-- Too low-level to be in Registry.Types
|
||||
|
||||
data Field = Field {field'name::String, field'value::String} deriving Show
|
||||
|
||||
colon = char ':' <?> "colon"
|
||||
dash = char '-' <?> "dash"
|
||||
space = char ' ' <?> "space"
|
||||
newline = many1 (try (string "\n\r") <|> string "\n" <|> string "\r") <?> "new line"
|
||||
spaces = many space
|
||||
|
||||
doublepercent = do {string "%%"; newline} <?> "double percent line"
|
||||
|
||||
extract :: Field -> (String, String)
|
||||
extract f = (field'name f, field'value f)
|
||||
|
||||
-- TODO: make it case-insensitive
|
||||
member :: String -> [(String, String)] -> Bool
|
||||
member key [] = False
|
||||
member key dict = let head = dict !! 0 in
|
||||
if key == fst head then
|
||||
True
|
||||
else
|
||||
member key (tail dict)
|
||||
|
||||
-- TODO: make it case-insensitive
|
||||
lookup :: String -> [(String, String)] -> Maybe String
|
||||
lookup key [] = Nothing
|
||||
lookup key dict = let head = dict !! 0 in
|
||||
if key == fst head then
|
||||
Just (snd head)
|
||||
else
|
||||
lookup key (tail dict)
|
||||
|
||||
lookups :: String -> [(String, String)] -> [String]
|
||||
lookups key [] = []
|
||||
lookups key dict = let head = dict !! 0 in
|
||||
if key == fst head then
|
||||
(snd head) : lookups key (tail dict)
|
||||
else
|
||||
lookups key (tail dict)
|
||||
|
||||
suppressScript r = lookup "Suppress-Script" r
|
||||
|
||||
recordFactory :: [(String, String)] -> Either String Record
|
||||
recordFactory r =
|
||||
if member "Type" r then
|
||||
let thetype = fromJust (lookup "Type" r) in
|
||||
-- TODO: test that the field exist. fromJust will raise an exception but we can
|
||||
-- provide a better error message if we test
|
||||
case thetype of
|
||||
"language" -> Right (Lang (Language
|
||||
(fromJust (lookup "Subtag" r))
|
||||
(lookups "Description" r)
|
||||
(fromJust (lookup "Added" r))
|
||||
(suppressScript r)
|
||||
(lookup "Preferred-Value" r)
|
||||
(lookup "Macrolanguage" r)
|
||||
(lookup "Deprecated" r)
|
||||
(lookup "Scope" r)
|
||||
))
|
||||
"extlang" -> Right (Extl (Extlang
|
||||
(fromJust (lookup "Subtag" r))
|
||||
(lookups "Description" r)
|
||||
(fromJust (lookup "Added" r))
|
||||
(suppressScript r)
|
||||
(fromJust (lookup "Prefix" r))
|
||||
(lookup "Preferred-Value" r)
|
||||
(lookup "Macrolanguage" r)
|
||||
(lookup "Scope" r)
|
||||
))
|
||||
"script" -> Right (Scr (Script
|
||||
(fromJust (lookup "Subtag" r))
|
||||
(lookups "Description" r)
|
||||
(fromJust (lookup "Added" r))))
|
||||
"region" -> Right (Reg (Region
|
||||
(fromJust (lookup "Subtag" r))
|
||||
(lookups "Description" r)
|
||||
(fromJust (lookup "Added" r))))
|
||||
"variant" -> Right (Var (Variant
|
||||
(fromJust (lookup "Subtag" r))
|
||||
(lookups "Description" r)
|
||||
(fromJust (lookup "Added" r))
|
||||
(lookups "Prefix" r)))
|
||||
"redundant" -> Right (Red (Redundant
|
||||
(fromJust (lookup "Tag" r))
|
||||
(lookups "Description" r)
|
||||
(fromJust (lookup "Added" r))))
|
||||
"grandfathered" -> Right (GF (Grandfathered
|
||||
(fromJust (lookup "Tag" r))
|
||||
(lookups "Description" r)
|
||||
(fromJust (lookup "Added" r))))
|
||||
_ -> Left ("Unknown type \"" ++ thetype ++ "\"")
|
||||
else
|
||||
if member "File-Date" r then
|
||||
Right (Date (fromJust (lookup "File-Date" r)))
|
||||
else
|
||||
Left "Invalid record: no Type or File-Date"
|
||||
|
||||
record = do
|
||||
fields <- many1 field <?> "record"
|
||||
let therecord = map extract fields
|
||||
return (recordFactory therecord)
|
||||
|
||||
fieldname = do
|
||||
start <- alphaNum
|
||||
rest <- many (alphaNum <|> dash)
|
||||
return ([start] ++ rest)
|
||||
|
||||
fieldvalue = many (unicodechar <|> space <|> try (do {newline; space})) <?> "field value"
|
||||
|
||||
unicodechar = satisfy (\thechar ->
|
||||
let c = (ord thechar) in
|
||||
(c >= 0x21 && c <= 0x10ffff))
|
||||
<?> "Character"
|
||||
|
||||
field = do
|
||||
name <- fieldname
|
||||
spaces
|
||||
colon
|
||||
spaces
|
||||
value <- fieldvalue
|
||||
newline
|
||||
return (Field name value)
|
||||
<?> "field"
|
||||
|
||||
registry = do
|
||||
allrecords <- record `sepBy` doublepercent
|
||||
eof
|
||||
return allrecords
|
||||
|
||||
isRight (Right a) = True
|
||||
isRight (Left a) = False
|
||||
|
||||
fromRight (Right a) = a
|
||||
fromRight (Left a) = error "Not a right value"
|
||||
|
||||
isLeft (Left a) = True
|
||||
isLeft (Right a) = False
|
||||
|
||||
fromLeft (Left a) = a
|
||||
fromLeft (Right a) = error "Not a left value"
|
||||
|
||||
parse :: String -> CheckResult
|
||||
parse input =
|
||||
case (Parsec.parse Registry.Grammar.registry "" input) of
|
||||
Left err -> SyntaxError ("Registry is not legal: " ++ (show err))
|
||||
Right reg -> let ok = and (map isRight reg) in
|
||||
if ok then
|
||||
Success (map fromRight reg)
|
||||
else
|
||||
let firstErr = (filter isLeft reg) !! 0 in
|
||||
SyntaxError ("Registry is not legal: " ++
|
||||
fromLeft firstErr)
|
||||
-- TODO: the line number
|
229
Registry/Registry.hs
Normal file
229
Registry/Registry.hs
Normal file
@ -0,0 +1,229 @@
|
||||
module Registry.Registry where
|
||||
|
||||
import qualified Text.ParserCombinators.Parsec as Parsec
|
||||
import qualified System.IO
|
||||
import Data.Maybe
|
||||
import Registry.Types
|
||||
import Registry.Utils
|
||||
import Registry.Grammar (registry, member, parse)
|
||||
import Types
|
||||
import Grammar
|
||||
|
||||
defaultInfile = "./language-subtag-registry"
|
||||
|
||||
type Result = (Bool, String)
|
||||
|
||||
-- Combines a former result with a new result, yielding a result.
|
||||
combine :: Result -> Result -> Result
|
||||
combine (True, _) (False, s) = (False, s)
|
||||
combine (False, s1) (False, s2) = (False, s1 ++ "; " ++ s2)
|
||||
combine (False, s) (True, _) = (False, s)
|
||||
combine (True, _) (True, _) = (True, "")
|
||||
|
||||
readRegistry filename = do
|
||||
input <- System.IO.readFile filename
|
||||
return (getRegistry input)
|
||||
|
||||
getRegistry input =
|
||||
let reg = Registry.Grammar.parse input in
|
||||
if not (checkOK reg) then
|
||||
error "Illegal registry"
|
||||
else
|
||||
registryOf reg
|
||||
|
||||
isValidLanguage l dict =
|
||||
if length (filter (onlyThisValue "language" l) dict) == 1 then
|
||||
(True, "")
|
||||
else
|
||||
(False, "Unknown language " ++ l)
|
||||
|
||||
isValidScript Nothing _ = (True, "")
|
||||
isValidScript s dict =
|
||||
if length (filter (onlyThisValue "script" (fromJust s)) dict) == 1 then
|
||||
(True, "")
|
||||
else
|
||||
(False, "Unknown script " ++ (fromJust s))
|
||||
|
||||
isValidRegion Nothing _ = (True, "")
|
||||
isValidRegion r dict =
|
||||
if length (filter (onlyThisValue "region" (fromJust r)) dict) == 1 then
|
||||
(True, "")
|
||||
else
|
||||
(False, "Unknown region " ++ (fromJust r))
|
||||
|
||||
isValidVariants [] _ = (True, "")
|
||||
isValidVariants v dict = foldl combine (True, "") (map (isValidVariant dict) v)
|
||||
|
||||
isValidVariant dict v =
|
||||
if length (filter (onlyThisValue "variant" v) dict) == 1 then
|
||||
(True, "")
|
||||
else
|
||||
(False, "Unknown variant " ++ v)
|
||||
|
||||
isValidExtension _ _ = (True, "")
|
||||
-- TODO: a better test
|
||||
|
||||
-- Semantic testing
|
||||
|
||||
-- Only one extension by the same singleton
|
||||
correctNumber :: [Singleton] -> Singleton -> Bool
|
||||
correctNumber l s = length (filter (\c -> (c == s)) l) == 1
|
||||
|
||||
getSingleton :: Types.Extension -> Singleton
|
||||
getSingleton (singleton, names) = singleton
|
||||
|
||||
norepeat :: Extensions -> Bool
|
||||
norepeat extensions =
|
||||
let singletons = map getSingleton extensions in
|
||||
and (map (correctNumber singletons) singletons)
|
||||
|
||||
semanticTests (Tag lang extlang script region variants extensions) =
|
||||
if norepeat extensions then
|
||||
(True, "")
|
||||
else
|
||||
(False, "Repeated extension singleton")
|
||||
semanticTests _ = (True, "") -- No tests for privateuse and grandfathered
|
||||
|
||||
isValid :: Registry -> Tag -> Result
|
||||
isValid reg (Tag l el s r v e) =
|
||||
let languages = filter (onlyThisType "language") reg in
|
||||
let scripts = filter (onlyThisType "script") reg in
|
||||
let regions = filter (onlyThisType "region") reg in
|
||||
let variants = filter (onlyThisType "variant") reg in
|
||||
let extensions = filter (onlyThisType "extension") reg in
|
||||
semanticTests (Tag l el s r v e) `combine`
|
||||
(isValidLanguage l languages) `combine` (isValidScript s scripts) `combine`
|
||||
(isValidRegion r regions) `combine` (isValidVariants v variants) `combine`
|
||||
(isValidExtension e extensions)
|
||||
|
||||
isValid reg (Types.GF tag) =
|
||||
(True, "")
|
||||
-- It is passed well-formedness (syntactic) tests, it is good
|
||||
|
||||
isValid reg (Priv p) =
|
||||
(True, "") -- By definition, private tags are always valid
|
||||
|
||||
testTag :: Registry -> String -> Bool
|
||||
testTag reg tag =
|
||||
case (Parsec.parse Grammar.tag "" tag) of
|
||||
Left err -> False
|
||||
Right t -> if fst (semanticTests t) then
|
||||
fst (isValid reg t)
|
||||
else
|
||||
False
|
||||
|
||||
checkOK (Success p) = True
|
||||
checkOK _ = False
|
||||
|
||||
messageOf (Success p) = error "Check was a success, no message available"
|
||||
messageOf (SyntaxError s) = s
|
||||
messageOf (SemanticError s) = s
|
||||
|
||||
registryOf (Success p) = p
|
||||
registryOf _ = error "Check was a failure, no registry available"
|
||||
|
||||
semanticCheck :: Registry -> CheckResult
|
||||
semanticCheck r =
|
||||
-- TODO: test that the extlangs exist
|
||||
oneDateTest r `combineC` prefixesExist r `combineC`
|
||||
scriptsExist r `combineC` noDuplicates r `combineC` preferredValuesExist r
|
||||
`combineC` macrolanguagesExist r
|
||||
|
||||
oneDateTest reg =
|
||||
if (length (filter onlyDate reg) == 1) then
|
||||
Success reg
|
||||
else
|
||||
SemanticError "One and only one Field-Date record authorized"
|
||||
|
||||
macrolanguageExist reg language =
|
||||
let macrolang = lang'macroLanguage (languageFrom language) in
|
||||
if isJust macrolang then
|
||||
length (filter (onlyThisValue "language" (fromJust macrolang))
|
||||
(filter onlyLang reg)) > 0
|
||||
else
|
||||
True
|
||||
|
||||
macrolanguagesExist reg =
|
||||
let languages = filter onlyLang reg in
|
||||
let missing = filter (not.macrolanguageExist reg) languages in
|
||||
if length missing == 0 then
|
||||
Success reg
|
||||
else
|
||||
SemanticError ("Some languages have a Macrolanguage which does not appear in the registry: " ++
|
||||
(show missing))
|
||||
|
||||
-- Extracts the langage subtag, the first one
|
||||
languageOf tag =
|
||||
takeWhile (/= '-') tag
|
||||
|
||||
prefixExist reg variant =
|
||||
-- TODO: We test only that the language part of the prefix is OK, we should test
|
||||
-- the rest
|
||||
and (map (\r -> length (filter (onlyThisValue "language" (languageOf r))
|
||||
(filter onlyLang reg)) > 0)
|
||||
(variant'prefix (variantFrom variant)))
|
||||
|
||||
prefixesExist reg =
|
||||
let variants = filter onlyVariant reg in
|
||||
let missing = filter (not.prefixExist reg) variants in
|
||||
if length missing == 0 then
|
||||
Success reg
|
||||
else
|
||||
SemanticError ("Some variants have a Prefix which does not appear in the registry: " ++
|
||||
(show missing))
|
||||
|
||||
scriptExist reg language =
|
||||
let script = lang'script (languageFrom language) in
|
||||
if isJust script then
|
||||
length (filter (onlyThisValue "script" (fromJust script))
|
||||
(filter onlyScript reg)) > 0
|
||||
else
|
||||
True
|
||||
|
||||
scriptsExist reg =
|
||||
let languages = filter onlyLang reg in
|
||||
let missing = filter (not.scriptExist reg) languages in
|
||||
if length missing == 0 then
|
||||
Success reg
|
||||
else
|
||||
SemanticError ("Some languages have a Suppress-script which does not appear in the registry: " ++
|
||||
(show missing))
|
||||
|
||||
preferredLangValueExist reg language =
|
||||
let preferredValue = lang'preferredValue (languageFrom language) in
|
||||
if isJust preferredValue then
|
||||
length (filter (onlyThisValue "language" (fromJust preferredValue))
|
||||
(filter onlyLang reg)) > 0
|
||||
else
|
||||
True
|
||||
|
||||
preferredValuesExist reg =
|
||||
-- TODO: test also regions and scripts
|
||||
let languages = filter onlyLang reg in
|
||||
let missing = filter (not.preferredLangValueExist reg) languages in
|
||||
if length missing == 0 then
|
||||
Success reg
|
||||
else
|
||||
SemanticError ("Some records have a Preferred-value which does not appear in the registry: " ++
|
||||
(show missing))
|
||||
|
||||
otherRecord reg category val =
|
||||
tail (filter (onlyThisValue category val) (filter (onlyThisType category) reg))
|
||||
|
||||
duplicate reg category =
|
||||
case category of
|
||||
"language" ->
|
||||
filter (\t -> length t > 0) (map (otherRecord reg category)
|
||||
(map (lang'subtag.languageFrom)
|
||||
(filter (onlyThisType category) reg)))
|
||||
_ -> [] -- TODO: implements them, too
|
||||
|
||||
noDuplicates reg =
|
||||
let duplicates = concat (map (duplicate reg)
|
||||
["language", "script", "variant", "region", "grandfathered"]) in
|
||||
if length duplicates == 0 then
|
||||
Success reg
|
||||
else
|
||||
SemanticError ("Duplicate values: " ++ (show duplicates))
|
||||
|
||||
-- TODO: tests Preferred-value
|
59
Registry/Types.hs
Normal file
59
Registry/Types.hs
Normal file
@ -0,0 +1,59 @@
|
||||
module Registry.Types where
|
||||
|
||||
-- TODO: import Time and use CalendarTime
|
||||
type DateTime = String
|
||||
|
||||
-- TODO for all: Comments
|
||||
|
||||
data Language = Language {lang'subtag::String, lang'descr::[String],
|
||||
lang'added::DateTime, lang'script::Maybe String,
|
||||
lang'preferredValue::Maybe String,
|
||||
lang'macroLanguage::Maybe String,
|
||||
lang'deprecated::Maybe DateTime,
|
||||
lang'scope::Maybe String}
|
||||
deriving Show
|
||||
|
||||
data Extlang = Extlang {extlang'subtag::String, extlang'descr::[String],
|
||||
extlang'added::DateTime, extlang'script::Maybe String,
|
||||
extlang'prefix::String, -- An extlang can have only one prefix
|
||||
extlang'preferredValue::Maybe String,
|
||||
extlang'macroLanguage::Maybe String,
|
||||
extlang'scope::Maybe String}
|
||||
deriving Show
|
||||
|
||||
data Script = Script {script'subtag::String, script'descr::[String],
|
||||
script'added::DateTime} deriving Show
|
||||
|
||||
data Region = Region {region'subtag::String, region'descr::[String],
|
||||
region'added::DateTime} deriving Show
|
||||
-- TODO: Deprecated and preferredValue
|
||||
|
||||
data Variant = Variant {variant'subtag::String, variant'descr::[String],
|
||||
variant'added::DateTime, variant'prefix::[String]} deriving Show
|
||||
|
||||
data Redundant = Redundant {redundant'tag::String, redundant'descr::[String],
|
||||
redundant'added::DateTime} deriving Show
|
||||
|
||||
data Grandfathered = Grandfathered {gf'tag::String, gf'descr::[String],
|
||||
gf'added::DateTime} deriving Show
|
||||
|
||||
-- TODO: implements them
|
||||
data Extension = Extension {ext'added::DateTime} deriving Show
|
||||
|
||||
data Record = Reg Region | Lang Language | Extl Extlang | Scr Script | Var Variant |
|
||||
Red Redundant | GF Grandfathered | Ext Extension | Date DateTime deriving Show
|
||||
|
||||
type Registry = [Record]
|
||||
|
||||
data CheckResult = Success Registry | SyntaxError String | SemanticError String
|
||||
deriving Show
|
||||
|
||||
combineC (Success r1) (Success r2) = Success r1
|
||||
combineC (Success r) (SyntaxError e) = SyntaxError e
|
||||
combineC (SyntaxError e) (Success r) = SyntaxError e
|
||||
combineC (Success r) (SemanticError e) = SemanticError e
|
||||
combineC (SemanticError e) (Success r) = SemanticError e
|
||||
combineC (SyntaxError e1) (SemanticError e2) = SyntaxError e1
|
||||
combineC (SemanticError e2) (SyntaxError e1) = SyntaxError e1
|
||||
combineC (SyntaxError e1) (SyntaxError e2) = SyntaxError (e1 ++ " ; " ++ e2)
|
||||
combineC (SemanticError e1) (SemanticError e2) = SemanticError (e1 ++ " ; " ++ e2)
|
216
Registry/Utils.hs
Normal file
216
Registry/Utils.hs
Normal file
@ -0,0 +1,216 @@
|
||||
module Registry.Utils where
|
||||
|
||||
import Registry.Grammar
|
||||
import Registry.Types
|
||||
|
||||
import qualified Data.List as List
|
||||
import Data.Char (toUpper)
|
||||
|
||||
toUpperS :: String -> String
|
||||
toUpperS = map toUpper
|
||||
|
||||
onlyDate (Date d) = True
|
||||
onlyDate _ = False
|
||||
|
||||
onlyRecord (Date d) = False
|
||||
onlyRecord _ = True
|
||||
|
||||
onlyGF (GF r) = True
|
||||
onlyGF _ = False
|
||||
|
||||
onlyLang (Lang r) = True
|
||||
onlyLang _ = False
|
||||
|
||||
onlyExtlang (Extl r) = True
|
||||
onlyExtlang _ = False
|
||||
|
||||
onlyScript (Scr r) = True
|
||||
onlyScript _ = False
|
||||
|
||||
onlyRegion (Reg r) = True
|
||||
onlyRegion _ = False
|
||||
|
||||
onlyVariant (Var r) = True
|
||||
onlyVariant _ = False
|
||||
|
||||
onlyRedundant (Red r) = True
|
||||
onlyRedundant _ = False
|
||||
|
||||
onlyExtension (Red r) = True
|
||||
onlyExtension _ = False
|
||||
|
||||
languageFrom (Lang r) = r
|
||||
languageFrom x = error "Not a language"
|
||||
|
||||
extlangFrom (Extl r) = r
|
||||
extlangFrom x = error "Not an extlang (extended language)"
|
||||
|
||||
scriptFrom (Scr r) = r
|
||||
scriptFrom _ = error "Not a script"
|
||||
|
||||
regionFrom (Reg r) = r
|
||||
regionFrom _ = error "Not a region"
|
||||
|
||||
variantFrom (Var v) = v
|
||||
variantFrom _ = error "Not a variant"
|
||||
|
||||
redundantFrom (Red r) = r
|
||||
redundantFrom _ = error "Not a redundant"
|
||||
|
||||
gfFrom (GF r) = r
|
||||
gfFrom _ = error "Not a grandfathered"
|
||||
|
||||
onlyThisType :: String -> Record -> Bool
|
||||
onlyThisType "language" = onlyLang
|
||||
onlyThisType "extlang" = onlyExtlang
|
||||
onlyThisType "grandfathered" = onlyGF
|
||||
onlyThisType "script" = onlyScript
|
||||
onlyThisType "region" = onlyRegion
|
||||
onlyThisType "variant" = onlyVariant
|
||||
onlyThisType "redundant" = onlyRedundant
|
||||
onlyThisType "extension" = onlyExtension
|
||||
onlyThisType t = error ("Unknown type in the registry: " ++ t)
|
||||
|
||||
onlyThisValue :: String -> String -> Record -> Bool
|
||||
-- Remember that language tags are case-insensitive
|
||||
onlyThisValue "language" v = (\r -> toUpperS (lang'subtag r) == toUpperS (v)).languageFrom
|
||||
onlyThisValue "extlang" v = (\r -> toUpperS (extlang'subtag r) == toUpperS (v)).extlangFrom
|
||||
onlyThisValue "grandfathered" v = (\r -> toUpperS (gf'tag r) == toUpperS (v)).gfFrom
|
||||
onlyThisValue "script" v = (\r -> toUpperS (script'subtag r) == toUpperS (v)).scriptFrom
|
||||
onlyThisValue "region" v = (\r -> toUpperS (region'subtag r) == toUpperS (v)).regionFrom
|
||||
onlyThisValue "variant" v = (\r -> toUpperS (variant'subtag r) == toUpperS (v)).variantFrom
|
||||
onlyThisValue "redundant" v = (\r -> toUpperS (redundant'tag r) == toUpperS (v)).redundantFrom
|
||||
-- TODO: extensions
|
||||
onlyThisValue t _ = error ("Unknown type of this value in the registry: " ++ t)
|
||||
|
||||
toSubtag (Lang l) = (lang'subtag l)
|
||||
toSubtag (Scr s) = (script'subtag s)
|
||||
toSubtag (GF t) = (gf'tag t)
|
||||
toSubtag (Reg r) = (region'subtag r)
|
||||
toSubtag (Var v) = (variant'subtag v)
|
||||
toSubtag (Red r) = (redundant'tag r)
|
||||
|
||||
maybeToString Nothing = ""
|
||||
maybeToString (Just s) = s
|
||||
dateToString d = d
|
||||
descrToString a = concat (List.intersperse " / " a)
|
||||
prefixesToString a = concat (List.intersperse " / " a)
|
||||
toString (Lang l) = (lang'subtag l) ++ "\t" ++ dateToString (lang'added l) ++ "\t" ++
|
||||
descrToString (lang'descr l) ++ "\t" ++
|
||||
(maybeToString (lang'script l)) ++ "\n"
|
||||
toString (GF t) = (gf'tag t) ++ "\n"
|
||||
toString (Scr s) = (script'subtag s) ++ "\t" ++ dateToString (script'added s) ++ "\t" ++
|
||||
descrToString (script'descr s) ++ "\n"
|
||||
toString (Reg r) = (region'subtag r) ++ "\t" ++ dateToString (region'added r) ++ "\t" ++
|
||||
descrToString (region'descr r) ++ "\n"
|
||||
toString (Var v) = (variant'subtag v) ++ "\t" ++ dateToString (variant'added v) ++ "\t" ++
|
||||
descrToString (variant'descr v) ++ "\t" ++
|
||||
prefixesToString (variant'prefix v) ++ "\n"
|
||||
toString (Red r) = (redundant'tag r) ++ "\t" ++ dateToString (redundant'added r) ++ "\t" ++
|
||||
descrToString (redundant'descr r) ++ "\n"
|
||||
|
||||
-- TODO: escape non-XML chars
|
||||
dateToXML d = "<date>" ++ d ++ "</date>"
|
||||
addedToXML d = "<added>" ++ d ++ "</added>"
|
||||
deprecatedToXML Nothing = ""
|
||||
deprecatedToXML (Just d) = "<deprecated>" ++ d ++ "</deprecated>"
|
||||
sscriptToXML Nothing = ""
|
||||
sscriptToXML (Just s) = "<suppress-script>" ++ s ++ "</suppress-script>"
|
||||
prefToXML Nothing = ""
|
||||
prefToXML (Just s) = "<preferred-value>" ++ s ++ "</preferred-value>"
|
||||
macrolToXML Nothing = ""
|
||||
macrolToXML (Just l) = "<macrolanguage>" ++ l ++ "</macrolanguage>"
|
||||
scopeToXML Nothing = ""
|
||||
scopeToXML (Just s) = "<scope>" ++ s ++ "</scope>"
|
||||
onedescrToXML s = "<description>" ++ s ++ "</description>"
|
||||
descrToXML a = concat (map onedescrToXML a)
|
||||
oneprefixToXML s = "<prefix>" ++ s ++ "</prefix>"
|
||||
prefixesToXML a = concat (map oneprefixToXML a)
|
||||
toXML (Date d) = dateToXML d
|
||||
toXML (Lang l) = "<language><subtag>" ++ (lang'subtag l) ++ "</subtag>" ++
|
||||
addedToXML (lang'added l) ++
|
||||
descrToXML (lang'descr l) ++
|
||||
(sscriptToXML (lang'script l)) ++
|
||||
deprecatedToXML (lang'deprecated l) ++
|
||||
prefToXML (lang'preferredValue l) ++
|
||||
macrolToXML (lang'macroLanguage l) ++
|
||||
scopeToXML (lang'scope l) ++
|
||||
"</language>\n"
|
||||
toXML (Extl l) = "<extlang><subtag>" ++ (extlang'subtag l) ++ "</subtag>" ++
|
||||
addedToXML (extlang'added l) ++
|
||||
descrToXML (extlang'descr l) ++
|
||||
(sscriptToXML (extlang'script l)) ++
|
||||
macrolToXML (extlang'macroLanguage l) ++
|
||||
scopeToXML (extlang'scope l) ++
|
||||
"</extlang>\n"
|
||||
toXML (GF t) = "<grandfathered><tag>" ++ (gf'tag t) ++ "</tag>" ++
|
||||
addedToXML (gf'added t) ++
|
||||
descrToXML (gf'descr t) ++ "</grandfathered>\n"
|
||||
toXML (Scr s) = "<script><subtag>" ++ (script'subtag s) ++ "</subtag>" ++
|
||||
addedToXML (script'added s) ++
|
||||
descrToXML (script'descr s) ++ "</script>\n"
|
||||
toXML (Reg r) = "<region><subtag>" ++ (region'subtag r) ++ "</subtag>" ++
|
||||
addedToXML (region'added r) ++
|
||||
descrToXML (region'descr r) ++ "</region>\n"
|
||||
toXML (Var v) = "<variant><subtag>" ++ (variant'subtag v) ++ "</subtag>" ++
|
||||
addedToXML (variant'added v) ++
|
||||
descrToXML (variant'descr v) ++
|
||||
prefixesToXML (variant'prefix v) ++ "</variant>\n"
|
||||
toXML (Red r) = "<redundant><tag>" ++ (redundant'tag r) ++ "</tag>" ++
|
||||
addedToXML (redundant'added r) ++
|
||||
descrToXML (redundant'descr r) ++ "</redundant>\n"
|
||||
toXML (Ext e) = error "No support for extensions"
|
||||
|
||||
-- TODO: escape non-HTML chars
|
||||
html_head s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n<head><link rel=\"stylesheet\" type=\"text/css\" href=\"/ltru.css\" /><title>" ++ s ++ "</title></head><body><h1>" ++
|
||||
s ++ "</h1>"
|
||||
html_tail = "<hr/><p><a href=\"/\">langtag.net Home</a></p></body></html>"
|
||||
linkLanguage langcode =
|
||||
if (length langcode) == 3 then
|
||||
"<a href=\"http://www.sil.org/iso639-3/documentation.asp?id=" ++
|
||||
langcode ++ "\">See the SIL entry</a> (if present)."
|
||||
else if (length langcode) == 2 then
|
||||
"<a href=\"http://www.loc.gov/standards/iso639-2/php/langcodes_name.php?iso_639_1=" ++
|
||||
langcode ++ "\">See the LOC entry</a> (if present)."
|
||||
else
|
||||
""
|
||||
dateToHTML d = "Date: " ++ d
|
||||
addedToHTML d = "Added on " ++ d
|
||||
sscriptToHTML Nothing = ""
|
||||
sscriptToHTML (Just s) = " Suppress the script: <code><a href=\"../script/" ++ s ++
|
||||
".html\">" ++ s ++ "</a></code>"
|
||||
onedescrToHTML s = s
|
||||
descrToHTML a = concat (List.intersperse " / " (map onedescrToHTML a))
|
||||
-- TODO: a prefix is not always a simple language, it can be composed of several subtags
|
||||
oneprefixToHTML s = " Possible prefix: <code><a href=\"../language/" ++ s ++ ".html\">" ++ s ++ "</a></code>"
|
||||
prefixesToHTML a = concat (map oneprefixToHTML a)
|
||||
toHTML (Date d) = "<p>Date: " ++ dateToHTML d ++ "</p>"
|
||||
toHTML (Lang l) = (html_head ((lang'descr l) !! 0)) ++ "<p><code>" ++
|
||||
(lang'subtag l) ++ "</code>. " ++
|
||||
addedToHTML (lang'added l) ++ ". Description: " ++
|
||||
descrToHTML (lang'descr l) ++ ". " ++
|
||||
linkLanguage (lang'subtag l) ++
|
||||
(sscriptToHTML (lang'script l)) ++ "</p>" ++ html_tail ++ "\n"
|
||||
toHTML (GF t) = "<p>" ++ (gf'tag t) ++
|
||||
addedToHTML (gf'added t) ++
|
||||
descrToHTML (gf'descr t) ++ "</p>\n"
|
||||
toHTML (Scr s) = (html_head ((script'descr s) !! 0)) ++ "<p><code>" ++
|
||||
(script'subtag s) ++ "</code>." ++
|
||||
addedToHTML (script'added s) ++ ". Description: " ++
|
||||
descrToHTML (script'descr s) ++ "</p>" ++ html_tail ++ "\n"
|
||||
toHTML (Reg r) = (html_head ((region'descr r) !! 0)) ++ "<p>" ++
|
||||
(region'subtag r) ++ " " ++
|
||||
addedToHTML (region'added r) ++
|
||||
". Description: " ++
|
||||
descrToHTML (region'descr r) ++ "</p>" ++
|
||||
html_tail ++ "\n"
|
||||
toHTML (Var v) = (html_head ((variant'descr v) !! 0)) ++ "<p><code>" ++
|
||||
(variant'subtag v) ++ "</code>. " ++
|
||||
addedToHTML (variant'added v) ++ ". Description: " ++
|
||||
descrToHTML (variant'descr v) ++ ". " ++
|
||||
prefixesToHTML (variant'prefix v) ++ "</p>" ++ html_tail ++ "\n"
|
||||
toHTML (Red r) = (html_head ((redundant'descr r) !! 0)) ++
|
||||
"<p>" ++ (redundant'tag r) ++
|
||||
addedToHTML (redundant'added r) ++
|
||||
descrToHTML (redundant'descr r) ++ "</p>" ++
|
||||
html_tail ++ "\n"
|
88
Registry/check-registry.hs
Normal file
88
Registry/check-registry.hs
Normal file
@ -0,0 +1,88 @@
|
||||
|
||||
import qualified System.IO
|
||||
import qualified System.Environment
|
||||
import qualified Data.List as List
|
||||
import qualified System.Exit as Exit
|
||||
import qualified System.Console.GetOpt as GetOpt
|
||||
|
||||
import Registry.Grammar
|
||||
import Registry.Types
|
||||
import Registry.Registry
|
||||
|
||||
nothing = putStr ""
|
||||
|
||||
-- Options handling
|
||||
data Flag
|
||||
= Verbose | Help deriving Show
|
||||
options :: [GetOpt.OptDescr Flag]
|
||||
options =
|
||||
[ GetOpt.Option ['v'] ["verbose"] (GetOpt.NoArg Verbose) "Detailed output",
|
||||
GetOpt.Option ['h'] ["help"] (GetOpt.NoArg Help) "Help message"
|
||||
]
|
||||
data OptionStore = ProgramOptions {verbose::Bool, help::Bool}
|
||||
header = do
|
||||
myself <- System.Environment.getProgName
|
||||
return ("Usage: " ++ myself ++ " [-v] registry-file")
|
||||
fatal detailed msg = do
|
||||
myheader <- header
|
||||
if detailed then
|
||||
System.IO.hPutStrLn System.IO.stderr (GetOpt.usageInfo myheader options)
|
||||
else
|
||||
System.IO.hPutStrLn System.IO.stderr (myheader)
|
||||
if msg /= "" then
|
||||
System.IO.hPutStrLn System.IO.stderr msg
|
||||
else
|
||||
nothing
|
||||
Exit.exitWith (Exit.ExitFailure 1)
|
||||
-- Defaults
|
||||
emptyOptionStore :: OptionStore
|
||||
emptyOptionStore =
|
||||
ProgramOptions {verbose = False, help = False}
|
||||
processFlags :: [Flag] -> OptionStore
|
||||
processFlags [] = emptyOptionStore
|
||||
processFlags (first : rest) =
|
||||
case first of
|
||||
Verbose -> restOpts { verbose = True }
|
||||
Help -> restOpts { help = True }
|
||||
where restOpts = processFlags rest
|
||||
programOpts :: [String] -> IO ([Flag], [String])
|
||||
programOpts argv =
|
||||
case GetOpt.getOpt GetOpt.Permute options argv of
|
||||
(o,n,[] ) -> return(o,n)
|
||||
(_,_,errs) -> fatal True (concat errs)
|
||||
getOptionStore :: ([Flag], [String]) -> OptionStore
|
||||
getOptionStore (f, n) = processFlags f
|
||||
--
|
||||
|
||||
main = do
|
||||
myargs <- System.Environment.getArgs
|
||||
results <- programOpts myargs
|
||||
let opts = getOptionStore results
|
||||
let args = snd results
|
||||
if help opts then
|
||||
fatal True ""
|
||||
else
|
||||
nothing
|
||||
if length (args) /= 1 then
|
||||
fatal False "One and only one argument accepted"
|
||||
else
|
||||
nothing
|
||||
let registryfilename = args !! 0
|
||||
input <- System.IO.readFile registryfilename
|
||||
let syntaxTree = parse input
|
||||
if (checkOK syntaxTree) then do
|
||||
let result = semanticCheck (registryOf syntaxTree)
|
||||
if (checkOK result) then do
|
||||
if verbose opts then
|
||||
putStrLn (registryfilename ++ " is OK") -- TODO: display the date
|
||||
else
|
||||
nothing
|
||||
Exit.exitWith Exit.ExitSuccess
|
||||
else do
|
||||
putStrLn ("Semantic error in " ++ registryfilename ++ ": " ++
|
||||
(messageOf result))
|
||||
Exit.exitWith (Exit.ExitFailure 3)
|
||||
else do
|
||||
putStrLn ("Syntax error in " ++ registryfilename ++ ": " ++
|
||||
(messageOf syntaxTree))
|
||||
Exit.exitWith (Exit.ExitFailure 2)
|
37
Registry/registry2mulhtml.hs
Normal file
37
Registry/registry2mulhtml.hs
Normal file
@ -0,0 +1,37 @@
|
||||
import qualified IO
|
||||
import qualified System
|
||||
|
||||
import Registry.Utils
|
||||
import Registry.Grammar (registry)
|
||||
import Registry.Registry
|
||||
|
||||
infile = "./language-subtag-registry"
|
||||
outdir = "./registry-html"
|
||||
|
||||
display reg thistype thisvalue =
|
||||
-- We should always retrieve only one value, so we can take the head
|
||||
toHTML ((filter (onlyThisValue thistype thisvalue)
|
||||
(filter (onlyThisType thistype) reg)) !! 0)
|
||||
|
||||
toHTMLFiles reg thetype =
|
||||
mapM (toHTMLfile reg thetype) (filter (onlyThisType thetype) reg)
|
||||
|
||||
toHTMLfile reg thistype thisvalue = do
|
||||
let outfile = outdir ++ "/" ++ thistype ++ "/" ++ (toSubtag thisvalue) ++ ".html"
|
||||
f <- IO.openFile (outfile) IO.WriteMode
|
||||
IO.hPutStr f (display reg thistype (toSubtag thisvalue))
|
||||
IO.hClose f
|
||||
|
||||
foreach [] func = return ()
|
||||
foreach a func = do
|
||||
func (head a)
|
||||
foreach (tail a) func
|
||||
|
||||
main = do
|
||||
f <- IO.openFile (infile) IO.ReadMode
|
||||
input <- IO.hGetContents f
|
||||
let theregistry = getRegistry input
|
||||
foreach ["language", "script", "region", "variant",
|
||||
"redundant", "grandfathered"]
|
||||
(toHTMLFiles theregistry)
|
||||
|
23
Registry/registry2postgresql.hs
Normal file
23
Registry/registry2postgresql.hs
Normal file
@ -0,0 +1,23 @@
|
||||
import qualified System.IO
|
||||
|
||||
import Registry.Utils
|
||||
import Registry.Utils.PostgreSQL
|
||||
import Registry.Grammar (registry)
|
||||
import Registry.Registry
|
||||
|
||||
infile = "./language-subtag-registry"
|
||||
|
||||
displayAll reg thistype =
|
||||
(foldr (++) "" (map toPostgreSQL
|
||||
(filter (onlyThisType thistype) reg)))
|
||||
|
||||
main = do
|
||||
f <- System.IO.openFile (infile) System.IO.ReadMode
|
||||
input <- System.IO.hGetContents f
|
||||
let theregistry = getRegistry input
|
||||
putStrLn "BEGIN;"
|
||||
putStrLn (displayAll theregistry "script")
|
||||
putStrLn (displayAll theregistry "region")
|
||||
putStrLn (displayAll theregistry "language")
|
||||
putStrLn (displayAll theregistry "variant")
|
||||
putStrLn "COMMIT;"
|
24
Registry/registry2sqlite.hs
Normal file
24
Registry/registry2sqlite.hs
Normal file
@ -0,0 +1,24 @@
|
||||
import qualified IO
|
||||
import qualified System
|
||||
|
||||
import Registry.Utils
|
||||
import Registry.Utils.SQLite
|
||||
import Registry.Grammar (registry)
|
||||
import Registry.Registry
|
||||
|
||||
infile = "./language-subtag-registry"
|
||||
|
||||
displayAll reg thistype =
|
||||
(foldr (++) "" (map toSQLite
|
||||
(filter (onlyThisType thistype) reg)))
|
||||
|
||||
main = do
|
||||
f <- IO.openFile (infile) IO.ReadMode
|
||||
input <- IO.hGetContents f
|
||||
let theregistry = getRegistry input
|
||||
putStrLn "BEGIN;"
|
||||
putStrLn (displayAll theregistry "script")
|
||||
putStrLn (displayAll theregistry "region")
|
||||
putStrLn (displayAll theregistry "language")
|
||||
putStrLn (displayAll theregistry "variant")
|
||||
putStrLn "COMMIT;"
|
35
Registry/registry2txt.hs
Normal file
35
Registry/registry2txt.hs
Normal file
@ -0,0 +1,35 @@
|
||||
import qualified System.IO
|
||||
|
||||
import Registry.Utils
|
||||
import Registry.Grammar (registry)
|
||||
import Registry.Registry
|
||||
|
||||
infile = "./language-subtag-registry"
|
||||
|
||||
displayAll reg thistype =
|
||||
(foldr (++) "" (map toString
|
||||
(filter (onlyThisType thistype) reg)))
|
||||
|
||||
displayOne reg thistype thisvalue =
|
||||
concat (map toString
|
||||
(filter (onlyThisValue thistype thisvalue) reg))
|
||||
|
||||
toTextFile reg thetype = do
|
||||
-- TODO: add the date in the filename
|
||||
let outfile = "lsr-" ++ thetype ++ ".txt"
|
||||
f <- System.IO.openFile (outfile) System.IO.WriteMode
|
||||
System.IO.hPutStr f (displayAll reg thetype)
|
||||
System.IO.hClose f
|
||||
|
||||
foreach [] func = return ()
|
||||
foreach a func = do
|
||||
func (head a)
|
||||
foreach (tail a) func
|
||||
|
||||
main = do
|
||||
f <- System.IO.openFile (infile) System.IO.ReadMode
|
||||
input <- System.IO.hGetContents f
|
||||
let theregistry = getRegistry input
|
||||
foreach ["language", "script", "region", "variant", "redundant", "grandfathered"]
|
||||
(toTextFile theregistry)
|
||||
|
32
Registry/registry2xml.hs
Normal file
32
Registry/registry2xml.hs
Normal file
@ -0,0 +1,32 @@
|
||||
import qualified System.IO
|
||||
import qualified Text.ParserCombinators.Parsec as Parsec
|
||||
|
||||
import Registry.Utils
|
||||
import Registry.Grammar (registry)
|
||||
import Registry.Registry
|
||||
|
||||
infile = "./language-subtag-registry"
|
||||
|
||||
displayAll reg = "<registry>" ++
|
||||
(foldr (++) "" (map toXML reg)) ++
|
||||
"</registry>"
|
||||
|
||||
displayOne reg thistype thisvalue =
|
||||
concat (map toXML
|
||||
(filter (onlyThisValue thistype thisvalue) reg))
|
||||
|
||||
foreach [] func = return ()
|
||||
foreach a func = do
|
||||
func (head a)
|
||||
foreach (tail a) func
|
||||
|
||||
main = do
|
||||
f <- System.IO.openFile (infile) System.IO.ReadMode
|
||||
input <- System.IO.hGetContents f
|
||||
let theregistry = getRegistry input
|
||||
let outfile = "language-subtag-registry.xml"
|
||||
f <- System.IO.openFile (outfile) System.IO.WriteMode
|
||||
System.IO.hPutStr f (displayAll theregistry)
|
||||
System.IO.hClose f
|
||||
|
||||
|
21
Types.hs
Normal file
21
Types.hs
Normal file
@ -0,0 +1,21 @@
|
||||
-- Haskell types to support the language tags
|
||||
|
||||
module Types where
|
||||
|
||||
type Language = String
|
||||
type Extlang = Maybe String -- Only one extlang authorized
|
||||
type Script = Maybe String
|
||||
type Region = Maybe String
|
||||
type Singleton = Char
|
||||
type Variants = [Variant]
|
||||
type Variant = String
|
||||
type Extension = (Singleton, [String])
|
||||
type Extensions = [Extension]
|
||||
|
||||
type Grandfathered = String
|
||||
type Privateuse = [String]
|
||||
|
||||
data Tag = Tag Language Extlang Script Region Variants Extensions | GF Grandfathered | Priv Privateuse deriving (Show, Eq)
|
||||
-- TODO: warning, Eq must
|
||||
-- be redefined because it needs to take into account case-insensitivity and
|
||||
-- (maybe) canonicalization
|
11
broken-registries/missing-field
Normal file
11
broken-registries/missing-field
Normal file
@ -0,0 +1,11 @@
|
||||
File-Date: 2006-09-20
|
||||
%%
|
||||
Type: language
|
||||
Subtag: aa
|
||||
Description: Afar
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Subtag: ab
|
||||
Description: Abkhazian
|
||||
Added: 2005-10-16
|
||||
Suppress-Script: Cyrl
|
12
broken-registries/no-colon
Normal file
12
broken-registries/no-colon
Normal file
@ -0,0 +1,12 @@
|
||||
File-Date: 2006-09-20
|
||||
%%
|
||||
Type: language
|
||||
Subtag: aa
|
||||
Description: Afar
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: language
|
||||
Subtag ab
|
||||
Description: Abkhazian
|
||||
Added: 2005-10-16
|
||||
Suppress-Script: Cyrl
|
12
broken-registries/wrong-type
Normal file
12
broken-registries/wrong-type
Normal file
@ -0,0 +1,12 @@
|
||||
File-Date: 2006-09-20
|
||||
%%
|
||||
Type: language
|
||||
Subtag: aa
|
||||
Description: Afar
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: languge
|
||||
Subtag: ab
|
||||
Description: Abkhazian
|
||||
Added: 2005-10-16
|
||||
Suppress-Script: Cyrl
|
30
broken-tags.txt
Normal file
30
broken-tags.txt
Normal file
@ -0,0 +1,30 @@
|
||||
f
|
||||
f-Latn # Main tag too short
|
||||
fr-fra # Extended tag are no longer well-formed
|
||||
fr-Lat # Extended, obsolete
|
||||
xr-lxs-qut # extlangS
|
||||
xr-lqt-qu # extlang + region
|
||||
fr-Latn-F
|
||||
a-value
|
||||
tlh-a-b-foo
|
||||
i-notexist # grandfathered but not registered: invalid, even if we only test well-formedness
|
||||
abcdefghi-012345678
|
||||
ab-abc-abc-abc-abc
|
||||
ab-abcd-abc
|
||||
ab-ab-abc
|
||||
ab-123-abc
|
||||
a-Hant-ZH
|
||||
a1-Hant-ZH
|
||||
ab-abcde-abc
|
||||
ab-1abc-abc
|
||||
ab-ab-abcd
|
||||
ab-123-abcd
|
||||
ab-abcde-abcd
|
||||
ab-1abc-abcd
|
||||
ab-a-b
|
||||
ab-a-x
|
||||
ab--ab
|
||||
ab-abc-
|
||||
-ab-abc
|
||||
abcd-efg
|
||||
aabbccddE
|
21
check-valid.hs
Normal file
21
check-valid.hs
Normal file
@ -0,0 +1,21 @@
|
||||
import Grammar
|
||||
import System.Environment (getArgs)
|
||||
import Registry.Registry
|
||||
|
||||
infile = "./language-subtag-registry"
|
||||
|
||||
analyze reg input
|
||||
= case (getTag input) of
|
||||
Left err -> err
|
||||
Right tag -> let result = isValid reg tag in
|
||||
if fst result then
|
||||
(input ++ " is valid\n") -- TODO: indicate the date of the registry
|
||||
else
|
||||
(input ++ " is NOT valid: " ++ snd result ++ "\n")
|
||||
|
||||
main = do
|
||||
myargs <- getArgs
|
||||
theregistry <- readRegistry infile
|
||||
putStr (concat (map (analyze theregistry) myargs))
|
||||
|
||||
|
15
check-wf.hs
Normal file
15
check-wf.hs
Normal file
@ -0,0 +1,15 @@
|
||||
import Grammar
|
||||
import System.Environment (getArgs)
|
||||
|
||||
analyze language input
|
||||
= case (parse language "" input) of
|
||||
Left err -> (input ++ " is NOT well-formed: " ++ (show err) ++
|
||||
"\n")
|
||||
Right x -> (input ++ " is well-formed\n")
|
||||
|
||||
main = do
|
||||
myargs <- getArgs
|
||||
putStr (concat (map (analyze tag) myargs))
|
||||
-- TODO: set return code? What code to use if some are well-formed and some are not?
|
||||
-- TODO: provide a version with -q and -v ?
|
||||
|
18
correct-registries/inexisting-prefix
Normal file
18
correct-registries/inexisting-prefix
Normal file
@ -0,0 +1,18 @@
|
||||
File-Date: 2006-09-25
|
||||
%%
|
||||
Type: language
|
||||
Subtag: aa
|
||||
Description: Afar
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: language
|
||||
Subtag: ab
|
||||
Description: Abkhazian
|
||||
Added: 2005-10-16
|
||||
Suppress-Script: Cyrl
|
||||
%%
|
||||
Type: variant
|
||||
Subtag: 1901
|
||||
Description: Traditional German orthography
|
||||
Added: 2005-10-16
|
||||
Prefix: fx
|
12
correct-registries/inexisting-script
Normal file
12
correct-registries/inexisting-script
Normal file
@ -0,0 +1,12 @@
|
||||
File-Date: 2006-09-25
|
||||
%%
|
||||
Type: language
|
||||
Subtag: aa
|
||||
Description: Afar
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: language
|
||||
Subtag: ab
|
||||
Description: Abkhazian
|
||||
Added: 2005-10-16
|
||||
Suppress-Script: Cyrk
|
77
display-tag.hs
Normal file
77
display-tag.hs
Normal file
@ -0,0 +1,77 @@
|
||||
import Grammar
|
||||
import Types
|
||||
|
||||
import Registry.Registry (readRegistry, isValid)
|
||||
import Registry.Types
|
||||
import Registry.Utils (onlyThisValue, onlyThisType, scriptFrom, languageFrom,
|
||||
regionFrom, variantFrom)
|
||||
|
||||
import Data.List (intersperse)
|
||||
import System.Environment (getArgs)
|
||||
|
||||
infile = "./language-subtag-registry"
|
||||
|
||||
langToString registry l =
|
||||
"language \"" ++ concat (intersperse " / "
|
||||
(lang'descr (languageFrom (filter (onlyThisValue
|
||||
"language" l)
|
||||
(filter (onlyThisType "language")
|
||||
registry) !! 0)))) ++ "\""
|
||||
|
||||
scriptToString registry Nothing = ""
|
||||
scriptToString registry (Just s) =
|
||||
let theScript = (filter (onlyThisValue "script" s)
|
||||
(filter (onlyThisType "script")
|
||||
registry)) !! 0 in
|
||||
", " ++ s ++ ": script " ++
|
||||
" \"" ++ concat (intersperse " / " (script'descr
|
||||
(scriptFrom theScript))) ++ "\""
|
||||
|
||||
regionToString registry Nothing = ""
|
||||
regionToString registry (Just r) =
|
||||
let theRegion = (filter (onlyThisValue "region" r)
|
||||
(filter (onlyThisType "region")
|
||||
registry)) !! 0 in
|
||||
", " ++ r ++ ": region " ++
|
||||
" \"" ++ concat (intersperse " / " (region'descr
|
||||
(regionFrom theRegion))) ++ "\""
|
||||
|
||||
variantsToString registry [] = ""
|
||||
variantsToString registry v = ", " ++ concat (intersperse ", "
|
||||
(map (variantToString registry) v))
|
||||
|
||||
variantToString registry v =
|
||||
let theVariant = (filter (onlyThisValue "variant" v)
|
||||
(filter (onlyThisType "variant")
|
||||
registry)) !! 0 in
|
||||
v ++ ": variant " ++
|
||||
" \"" ++ concat (intersperse " / " (variant'descr
|
||||
(variantFrom theVariant))) ++ "\""
|
||||
|
||||
toString :: Registry -> Tag -> String
|
||||
toString registry (Tag l el s r v e) =
|
||||
-- TODO: display extlangs
|
||||
l ++ ": " ++ langToString registry l ++
|
||||
scriptToString registry s ++ regionToString registry r ++
|
||||
variantsToString registry v
|
||||
|
||||
toString registry (Types.GF tag) = tag
|
||||
toString registry (Priv p) =
|
||||
(concat p) ++ " (private tag so no info available)"
|
||||
|
||||
analyze :: Registry -> String -> String
|
||||
analyze reg input
|
||||
= case (getTag input) of
|
||||
Left err -> err
|
||||
Right tag -> let result = isValid reg tag in
|
||||
if fst result then
|
||||
input ++ ": (" ++ (toString reg tag) ++ ")"
|
||||
else
|
||||
(input ++ " is NOT valid: " ++ snd result ++ "\n")
|
||||
|
||||
main = do
|
||||
myargs <- getArgs
|
||||
theregistry <- readRegistry infile
|
||||
putStrLn (concat (intersperse "\n" (map (analyze theregistry) myargs)))
|
||||
|
||||
|
33
invalid-registries/duplicata
Normal file
33
invalid-registries/duplicata
Normal file
@ -0,0 +1,33 @@
|
||||
File-Date: 2006-09-25
|
||||
%%
|
||||
Type: language
|
||||
Subtag: aa
|
||||
Description: Afar
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: language
|
||||
Subtag: ab
|
||||
Description: Abkhazian
|
||||
Added: 2005-10-16
|
||||
Suppress-Script: Cyrl
|
||||
%%
|
||||
Type: language
|
||||
Subtag: ab
|
||||
Description: Abkhazian again
|
||||
Added: 2005-10-16
|
||||
Suppress-Script: Cyrl
|
||||
%%
|
||||
Type: script
|
||||
Subtag: Arab
|
||||
Description: Arabic
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: script
|
||||
Subtag: Armn
|
||||
Description: Armenian
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: script
|
||||
Subtag: Cyrl
|
||||
Description: Cyrillic
|
||||
Added: 2005-10-16
|
18
invalid-registries/inexisting-prefix
Normal file
18
invalid-registries/inexisting-prefix
Normal file
@ -0,0 +1,18 @@
|
||||
File-Date: 2006-09-25
|
||||
%%
|
||||
Type: language
|
||||
Subtag: aa
|
||||
Description: Afar
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: language
|
||||
Subtag: ab
|
||||
Description: Abkhazian
|
||||
Added: 2005-10-16
|
||||
Suppress-Script: Cyrk
|
||||
%%
|
||||
Type: script
|
||||
Subtag: Cyrl
|
||||
Description: Cyrillic
|
||||
Added: 2005-10-16
|
||||
|
12
invalid-registries/inexisting-script
Normal file
12
invalid-registries/inexisting-script
Normal file
@ -0,0 +1,12 @@
|
||||
File-Date: 2006-09-25
|
||||
%%
|
||||
Type: language
|
||||
Subtag: aa
|
||||
Description: Afar
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: language
|
||||
Subtag: ab
|
||||
Description: Abkhazian
|
||||
Added: 2005-10-16
|
||||
Suppress-Script: Cyrk
|
13
invalid-registries/missing-macrolang
Normal file
13
invalid-registries/missing-macrolang
Normal file
@ -0,0 +1,13 @@
|
||||
File-Date: 2008-02-20
|
||||
%%
|
||||
Type: language
|
||||
Subtag: aa
|
||||
Description: Afar
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: language
|
||||
Subtag: foo
|
||||
Description: Test only
|
||||
Macrolanguage: xx
|
||||
Added: 2008-02-20
|
||||
|
28
invalid-registries/missing-pref
Normal file
28
invalid-registries/missing-pref
Normal file
@ -0,0 +1,28 @@
|
||||
File-Date: 2006-09-25
|
||||
%%
|
||||
Type: language
|
||||
Subtag: aa
|
||||
Description: Afar
|
||||
Added: 2005-10-16
|
||||
Preferred-value: xx
|
||||
%%
|
||||
Type: language
|
||||
Subtag: ab
|
||||
Description: Abkhazian
|
||||
Added: 2005-10-16
|
||||
Suppress-Script: Cyrx
|
||||
%%
|
||||
Type: script
|
||||
Subtag: Arab
|
||||
Description: Arabic
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: script
|
||||
Subtag: Armn
|
||||
Description: Armenian
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: script
|
||||
Subtag: Cyrl
|
||||
Description: Cyrillic
|
||||
Added: 2005-10-16
|
10
invalid-registries/no-date
Normal file
10
invalid-registries/no-date
Normal file
@ -0,0 +1,10 @@
|
||||
Type: language
|
||||
Subtag: aa
|
||||
Description: Afar
|
||||
Added: 2005-10-16
|
||||
%%
|
||||
Type: language
|
||||
Subtag: ab
|
||||
Description: Abkhazian
|
||||
Added: 2005-10-16
|
||||
|
11
invalid-tags.txt
Normal file
11
invalid-tags.txt
Normal file
@ -0,0 +1,11 @@
|
||||
ax-TZ # Not in the registry, but well-formed
|
||||
fra-Latn # ISO 639 can be 3-letters
|
||||
fra
|
||||
fra-FX
|
||||
abcd-Latn # Language of 4 chars reserved for future use
|
||||
AaBbCcDd-x-y-any-x # Language of 5-8 chars, registered
|
||||
zh-Latm-CN # Typo
|
||||
de-DE-1902 # Wrong variant
|
||||
fr-shadok # Variant
|
||||
ab-c-abc-r-toto-c-abc # 'c' appears twice
|
||||
en-a-bbb-a-ccc # 'a' appears twice
|
26
irregular.hs
Normal file
26
irregular.hs
Normal file
@ -0,0 +1,26 @@
|
||||
import qualified IO
|
||||
import Text.ParserCombinators.Parsec as Parsec
|
||||
import qualified Data.List as List
|
||||
|
||||
import Grammar
|
||||
import Registry.Grammar
|
||||
import Registry.Types
|
||||
|
||||
infile = "./language-subtag-registry"
|
||||
|
||||
onlyIrregularGF (GF r) = isLeft (Grammar.getTag (gf'tag r))
|
||||
onlyIrregularGF _ = False
|
||||
|
||||
displayTag (GF r) = "\"" ++ (gf'tag r) ++ "\" / "
|
||||
|
||||
run parser input
|
||||
= case (Parsec.parse parser "" input) of
|
||||
Left err -> error ("Registry \"" ++ infile ++ "\" is not legal: " ++ (show err) ++ "\n")
|
||||
Right x -> IO.putStr ("-- AUTOMATICALLY GENERATED! Do *not* edit!!! \n" ++
|
||||
(foldr (++) "" (List.intersperse "\n" (map displayTag (filter onlyIrregularGF (map fromRight x))))) ++
|
||||
"\n")
|
||||
|
||||
main = do
|
||||
f <- IO.openFile (infile) IO.ReadMode
|
||||
input <- IO.hGetContents f
|
||||
run registry input
|
76
test-registries.hs
Normal file
76
test-registries.hs
Normal file
@ -0,0 +1,76 @@
|
||||
import qualified Test.HUnit as HUnit
|
||||
import qualified System.IO as IO
|
||||
import qualified Text.ParserCombinators.Parsec as Parsec
|
||||
import qualified Data.List as List
|
||||
import qualified Data.Maybe as Maybe
|
||||
import qualified Text.Regex as RE
|
||||
import qualified System.Directory as Dir
|
||||
|
||||
import Registry.Grammar
|
||||
import Registry.Types
|
||||
import Registry.Registry
|
||||
|
||||
brokenDir = "./broken-registries"
|
||||
correctDir = "./correct-registries"
|
||||
invalidDir = "./invalid-registries"
|
||||
validDir = "./valid-registries"
|
||||
|
||||
data RegistryText = Registry {filename::FilePath, filecontents::String}
|
||||
|
||||
buildTestCaseSyn :: Bool -> RegistryText -> HUnit.Test
|
||||
buildTestCaseSyn True reg =
|
||||
HUnit.TestCase (HUnit.assertBool ("Should be syntactically OK (" ++ (filename reg) ++ ")")
|
||||
(checkOK (parse (filecontents reg))))
|
||||
buildTestCaseSyn False reg =
|
||||
HUnit.TestCase (HUnit.assertBool ("Should be syntactically wrong (" ++ (filename reg) ++ ")")
|
||||
(not (checkOK (parse (filecontents reg)))))
|
||||
|
||||
buildTestCaseSem :: Bool -> RegistryText -> HUnit.Test
|
||||
buildTestCaseSem True reg =
|
||||
let registry = parse (filecontents reg) in
|
||||
HUnit.TestCase (HUnit.assertBool ("Should be valid (" ++ (filename reg) ++ ")")
|
||||
(checkOK (registry) &&
|
||||
checkOK (semanticCheck (registryOf registry))))
|
||||
buildTestCaseSem False reg =
|
||||
let registry = parse (filecontents reg) in
|
||||
HUnit.TestCase (HUnit.assertBool ("Should be invalid (" ++ (filename reg) ++ ")")
|
||||
(checkOK (registry) &&
|
||||
not (checkOK (semanticCheck (registryOf registry)))))
|
||||
|
||||
endsWith :: String -> String -> Bool
|
||||
endsWith s1 s2 = Maybe.isJust (RE.matchRegex (RE.mkRegex (s2 ++ "$")) s1)
|
||||
|
||||
-- TODO: use mapM instead
|
||||
foreach :: (Monad m) => [a1] -> (a1 -> m a) -> m [a]
|
||||
foreach [] func = do
|
||||
return []
|
||||
foreach x func = do
|
||||
newHead <- func (head x)
|
||||
newTail <- foreach (tail x) func
|
||||
return (newHead : newTail)
|
||||
|
||||
contentOf :: FilePath -> IO RegistryText
|
||||
contentOf f = do
|
||||
content <- IO.readFile f
|
||||
return (Registry f content)
|
||||
|
||||
addDir d f = d ++ "/" ++ f
|
||||
|
||||
filesOf :: FilePath -> IO [RegistryText]
|
||||
filesOf d = do
|
||||
files <- Dir.getDirectoryContents d
|
||||
foreach (map (addDir d) (filter (\n -> n /= "." && n /= ".." && (not (endsWith n "~")))
|
||||
files))
|
||||
contentOf
|
||||
|
||||
main = do
|
||||
brokenFiles <- filesOf brokenDir
|
||||
let testsBroken = map (buildTestCaseSyn False) brokenFiles
|
||||
invalidFiles <- filesOf invalidDir
|
||||
let testsInvalid = map (buildTestCaseSem False) invalidFiles
|
||||
correctFiles <- filesOf correctDir
|
||||
let testsCorrect = map (buildTestCaseSyn True) correctFiles
|
||||
validFiles <- filesOf validDir
|
||||
let testsValid = map (buildTestCaseSem True) validFiles
|
||||
HUnit.runTestTT (HUnit.TestList (testsBroken ++ testsInvalid ++
|
||||
testsCorrect ++ testsValid))
|
54
tests-from-files.hs
Normal file
54
tests-from-files.hs
Normal file
@ -0,0 +1,54 @@
|
||||
import qualified Grammar
|
||||
import qualified Registry.Registry as Registry
|
||||
|
||||
import qualified Test.HUnit as HUnit
|
||||
import qualified System.IO
|
||||
import qualified Text.Regex as Regex
|
||||
|
||||
wfTagsFile = "./well-formed-tags.txt"
|
||||
brokenTagsFile = "./broken-tags.txt"
|
||||
validTagsFile = "./valid-tags.txt"
|
||||
invalidTagsFile = "./invalid-tags.txt"
|
||||
|
||||
registryfile = "./language-subtag-registry"
|
||||
|
||||
languageTag = Grammar.tag
|
||||
|
||||
getTag line =
|
||||
let fields = Regex.splitRegex (Regex.mkRegex " +") line in
|
||||
fields !! 0
|
||||
|
||||
shouldBeWellFormed tag =
|
||||
HUnit.TestCase (HUnit.assertBool (tag ++ " should be well-formed")
|
||||
(Grammar.testTag tag == True))
|
||||
|
||||
shouldBeBroken tag =
|
||||
HUnit.TestCase (HUnit.assertBool (tag ++ " should *not* be well-formed")
|
||||
(Grammar.testTag tag == False))
|
||||
|
||||
shouldBeValid reg tag =
|
||||
HUnit.TestCase (HUnit.assertBool (tag ++ " should be valid")
|
||||
(Registry.testTag reg tag == True))
|
||||
|
||||
shouldBeInvalid reg tag =
|
||||
HUnit.TestCase (HUnit.assertBool (tag ++ " should *not* be valid")
|
||||
(Registry.testTag reg tag == False))
|
||||
|
||||
tagsFromFile filename = do
|
||||
myContents <- System.IO.readFile filename
|
||||
let myLines = lines myContents
|
||||
return (map getTag myLines)
|
||||
|
||||
main = do
|
||||
brokenTags <- tagsFromFile brokenTagsFile
|
||||
wfTags <- tagsFromFile wfTagsFile
|
||||
theregistry <- Registry.readRegistry registryfile
|
||||
invalidTags <- tagsFromFile invalidTagsFile
|
||||
validTags <- tagsFromFile validTagsFile
|
||||
let tests = HUnit.TestList (map shouldBeBroken (brokenTags) ++
|
||||
map shouldBeWellFormed (wfTags) ++
|
||||
map (shouldBeValid theregistry) (validTags) ++
|
||||
map (shouldBeInvalid theregistry) (invalidTags))
|
||||
HUnit.runTestTT tests
|
||||
|
||||
|
44596
valid-registries/language-subtag-registry
Normal file
44596
valid-registries/language-subtag-registry
Normal file
File diff suppressed because it is too large
Load Diff
48
valid-tags.txt
Normal file
48
valid-tags.txt
Normal file
@ -0,0 +1,48 @@
|
||||
fr
|
||||
fr-Latn
|
||||
fr-Latn-FR
|
||||
fr-Latn-419
|
||||
fr-FR
|
||||
fr-y-myext-myext2
|
||||
apa-Latn # ISO 639 can be 3-letters
|
||||
apa
|
||||
apa-CA
|
||||
i-klingon # grandfathered with singleton
|
||||
no-bok # grandfathered without singleton
|
||||
mn-Cyrl-MN
|
||||
mN-cYrL-Mn
|
||||
fr-Latn-CA
|
||||
en-US
|
||||
fr-Latn-CA
|
||||
i-enochian # Grand fathered
|
||||
x-fr-CH
|
||||
sr-Latn-CS
|
||||
es-419
|
||||
sl-nedis
|
||||
de-CH-1996
|
||||
de-Latg-1996
|
||||
sl-IT-nedis
|
||||
en-a-bbb-x-a-ccc
|
||||
de-a-value
|
||||
en-x-US
|
||||
az-Arab-x-AZE-derbend
|
||||
es-Latn-CO-x-private
|
||||
ab-x-abc-x-abc # anything goes after x
|
||||
ab-x-abc-a-a # ditto
|
||||
i-default # grandfathered
|
||||
i-klingon # grandfathered
|
||||
en
|
||||
de-AT
|
||||
es-419
|
||||
de-CH-1901
|
||||
sr-Cyrl
|
||||
sr-Cyrl-CS
|
||||
sl-Latn-IT-rozaj
|
||||
en-US-x-twain
|
||||
zh-cmn
|
||||
zh-cmn-Hant
|
||||
zh-cmn-Hant-HK
|
||||
zh-gan
|
||||
zh-yue-Hant-HK
|
||||
en-Latn-GB-boont-r-extended-sequence-x-private
|
||||
en-US-boont
|
54
well-formed-tags.txt
Normal file
54
well-formed-tags.txt
Normal file
@ -0,0 +1,54 @@
|
||||
fr
|
||||
fr-Latn
|
||||
fr-Latn-FR
|
||||
fr-Latn-419
|
||||
fr-FR
|
||||
ax-TZ # Not in the registry, but well-formed
|
||||
fr-shadok # Variant
|
||||
fr-y-myext-myext2
|
||||
fra-Latn # ISO 639 can be 3-letters
|
||||
fra
|
||||
fra-FX
|
||||
i-klingon # grandfathered with singleton
|
||||
I-kLINgon # tags are case-insensitive...
|
||||
no-bok # grandfathered without singleton
|
||||
mn-Cyrl-MN
|
||||
mN-cYrL-Mn
|
||||
fr-Latn-CA
|
||||
en-US
|
||||
fr-Latn-CA
|
||||
i-enochian # Grand fathered
|
||||
x-fr-CH
|
||||
sr-Latn-CS
|
||||
es-419
|
||||
sl-nedis
|
||||
de-CH-1996
|
||||
de-Latg-1996
|
||||
sl-IT-nedis
|
||||
en-a-bbb-x-a-ccc
|
||||
de-a-value
|
||||
en-Latn-GB-boont-r-extended-sequence-x-private
|
||||
en-x-US
|
||||
az-Arab-x-AZE-derbend
|
||||
es-Latn-CO-x-private
|
||||
en-US-boont
|
||||
ab-x-abc-x-abc # anything goes after x
|
||||
ab-x-abc-a-a # ditto
|
||||
i-default # grandfathered
|
||||
i-klingon # grandfathered
|
||||
abcd-Latn # Language of 4 chars reserved for future use
|
||||
AaBbCcDd-x-y-any-x # Language of 5-8 chars, registered
|
||||
en
|
||||
de-AT
|
||||
es-419
|
||||
de-CH-1901
|
||||
sr-Cyrl
|
||||
sr-Cyrl-CS
|
||||
sl-Latn-IT-rozaj
|
||||
en-US-x-twain
|
||||
zh-cmn
|
||||
zh-cmn-Hant
|
||||
zh-cmn-Hant-HK
|
||||
zh-gan
|
||||
zh-yue-Hant-HK
|
||||
xr-p-lze # Extension
|
Loading…
Reference in New Issue
Block a user