forked from bortzmeyer/GaBuZoMeu
232 lines
7.5 KiB
Haskell
232 lines
7.5 KiB
Haskell
module Registry.Registry where
|
|
|
|
import qualified Text.ParserCombinators.Parsec as Parsec
|
|
import qualified System.IO
|
|
import Data.Maybe
|
|
import Registry.Types
|
|
import Registry.Utils
|
|
import Registry.Grammar (registry, member, parse)
|
|
import Types
|
|
import Grammar
|
|
|
|
defaultInfile = "./language-subtag-registry"
|
|
|
|
type Result = (Bool, String)
|
|
|
|
-- Combines a former result with a new result, yielding a result.
|
|
combine :: Result -> Result -> Result
|
|
combine (True, _) (False, s) = (False, s)
|
|
combine (False, s1) (False, s2) = (False, s1 ++ "; " ++ s2)
|
|
combine (False, s) (True, _) = (False, s)
|
|
combine (True, _) (True, _) = (True, "")
|
|
|
|
readRegistry filename = do
|
|
input <- System.IO.readFile filename
|
|
return (getRegistry input)
|
|
|
|
getRegistry input =
|
|
let reg = Registry.Grammar.parse input in
|
|
if not (checkOK reg) then
|
|
error "Illegal registry"
|
|
else
|
|
registryOf reg
|
|
|
|
dateOf (Date d) = d
|
|
|
|
isValidLanguage l dict =
|
|
if length (filter (onlyThisValue "language" l) dict) == 1 then
|
|
(True, "")
|
|
else
|
|
(False, "Unknown language " ++ l)
|
|
|
|
isValidScript Nothing _ = (True, "")
|
|
isValidScript s dict =
|
|
if length (filter (onlyThisValue "script" (fromJust s)) dict) == 1 then
|
|
(True, "")
|
|
else
|
|
(False, "Unknown script " ++ (fromJust s))
|
|
|
|
isValidRegion Nothing _ = (True, "")
|
|
isValidRegion r dict =
|
|
if length (filter (onlyThisValue "region" (fromJust r)) dict) == 1 then
|
|
(True, "")
|
|
else
|
|
(False, "Unknown region " ++ (fromJust r))
|
|
|
|
isValidVariants [] _ = (True, "")
|
|
isValidVariants v dict = foldl combine (True, "") (map (isValidVariant dict) v)
|
|
|
|
isValidVariant dict v =
|
|
if length (filter (onlyThisValue "variant" v) dict) == 1 then
|
|
(True, "")
|
|
else
|
|
(False, "Unknown variant " ++ v)
|
|
|
|
isValidExtension _ _ = (True, "")
|
|
-- TODO: a better test
|
|
|
|
-- Semantic testing
|
|
|
|
-- Only one extension by the same singleton
|
|
correctNumber :: [Singleton] -> Singleton -> Bool
|
|
correctNumber l s = length (filter (\c -> (c == s)) l) == 1
|
|
|
|
getSingleton :: Types.Extension -> Singleton
|
|
getSingleton (singleton, names) = singleton
|
|
|
|
norepeat :: Extensions -> Bool
|
|
norepeat extensions =
|
|
let singletons = map getSingleton extensions in
|
|
and (map (correctNumber singletons) singletons)
|
|
|
|
semanticTests (Tag lang extlang script region variants extensions) =
|
|
if norepeat extensions then
|
|
(True, "")
|
|
else
|
|
(False, "Repeated extension singleton")
|
|
semanticTests _ = (True, "") -- No tests for privateuse and grandfathered
|
|
|
|
isValid :: Registry -> Tag -> Result
|
|
isValid reg (Tag l el s r v e) =
|
|
let languages = filter (onlyThisType "language") reg in
|
|
let scripts = filter (onlyThisType "script") reg in
|
|
let regions = filter (onlyThisType "region") reg in
|
|
let variants = filter (onlyThisType "variant") reg in
|
|
let extensions = filter (onlyThisType "extension") reg in
|
|
semanticTests (Tag l el s r v e) `combine`
|
|
(isValidLanguage l languages) `combine` (isValidScript s scripts) `combine`
|
|
(isValidRegion r regions) `combine` (isValidVariants v variants) `combine`
|
|
(isValidExtension e extensions)
|
|
|
|
isValid reg (Types.GF tag) =
|
|
(True, "")
|
|
-- It is passed well-formedness (syntactic) tests, it is good
|
|
|
|
isValid reg (Priv p) =
|
|
(True, "") -- By definition, private tags are always valid
|
|
|
|
testTag :: Registry -> String -> Bool
|
|
testTag reg tag =
|
|
case (Parsec.parse Grammar.tag "" tag) of
|
|
Left err -> False
|
|
Right t -> if fst (semanticTests t) then
|
|
fst (isValid reg t)
|
|
else
|
|
False
|
|
|
|
checkOK (Success p) = True
|
|
checkOK _ = False
|
|
|
|
messageOf (Success p) = error "Check was a success, no message available"
|
|
messageOf (SyntaxError s) = s
|
|
messageOf (SemanticError s) = s
|
|
|
|
registryOf (Success p) = p
|
|
registryOf _ = error "Check was a failure, no registry available"
|
|
|
|
semanticCheck :: Registry -> CheckResult
|
|
semanticCheck r =
|
|
-- TODO: test that the extlangs exist
|
|
oneDateTest r `combineC` prefixesExist r `combineC`
|
|
scriptsExist r `combineC` noDuplicates r `combineC` preferredValuesExist r
|
|
`combineC` macrolanguagesExist r
|
|
|
|
oneDateTest reg =
|
|
if (length (filter onlyDate reg) == 1) then
|
|
Success reg
|
|
else
|
|
SemanticError "One and only one Field-Date record authorized"
|
|
|
|
macrolanguageExist reg language =
|
|
let macrolang = lang'macroLanguage (languageFrom language) in
|
|
if isJust macrolang then
|
|
length (filter (onlyThisValue "language" (fromJust macrolang))
|
|
(filter onlyLang reg)) > 0
|
|
else
|
|
True
|
|
|
|
macrolanguagesExist reg =
|
|
let languages = filter onlyLang reg in
|
|
let missing = filter (not.macrolanguageExist reg) languages in
|
|
if length missing == 0 then
|
|
Success reg
|
|
else
|
|
SemanticError ("Some languages have a Macrolanguage which does not appear in the registry: " ++
|
|
(show missing))
|
|
|
|
-- Extracts the langage subtag, the first one
|
|
languageOf tag =
|
|
takeWhile (/= '-') tag
|
|
|
|
prefixExist reg variant =
|
|
-- TODO: We test only that the language part of the prefix is OK, we should test
|
|
-- the rest
|
|
and (map (\r -> length (filter (onlyThisValue "language" (languageOf r))
|
|
(filter onlyLang reg)) > 0)
|
|
(variant'prefix (variantFrom variant)))
|
|
|
|
prefixesExist reg =
|
|
let variants = filter onlyVariant reg in
|
|
let missing = filter (not.prefixExist reg) variants in
|
|
if length missing == 0 then
|
|
Success reg
|
|
else
|
|
SemanticError ("Some variants have a Prefix which does not appear in the registry: " ++
|
|
(show missing))
|
|
|
|
scriptExist reg language =
|
|
let script = lang'script (languageFrom language) in
|
|
if isJust script then
|
|
length (filter (onlyThisValue "script" (fromJust script))
|
|
(filter onlyScript reg)) > 0
|
|
else
|
|
True
|
|
|
|
scriptsExist reg =
|
|
let languages = filter onlyLang reg in
|
|
let missing = filter (not.scriptExist reg) languages in
|
|
if length missing == 0 then
|
|
Success reg
|
|
else
|
|
SemanticError ("Some languages have a Suppress-script which does not appear in the registry: " ++
|
|
(show missing))
|
|
|
|
preferredLangValueExist reg language =
|
|
let preferredValue = lang'preferredValue (languageFrom language) in
|
|
if isJust preferredValue then
|
|
length (filter (onlyThisValue "language" (fromJust preferredValue))
|
|
(filter onlyLang reg)) > 0
|
|
else
|
|
True
|
|
|
|
preferredValuesExist reg =
|
|
-- TODO: test also regions and scripts
|
|
let languages = filter onlyLang reg in
|
|
let missing = filter (not.preferredLangValueExist reg) languages in
|
|
if length missing == 0 then
|
|
Success reg
|
|
else
|
|
SemanticError ("Some records have a Preferred-value which does not appear in the registry: " ++
|
|
(show missing))
|
|
|
|
otherRecord reg category val =
|
|
tail (filter (onlyThisValue category val) (filter (onlyThisType category) reg))
|
|
|
|
duplicate reg category =
|
|
case category of
|
|
"language" ->
|
|
filter (\t -> length t > 0) (map (otherRecord reg category)
|
|
(map (lang'subtag.languageFrom)
|
|
(filter (onlyThisType category) reg)))
|
|
_ -> [] -- TODO: implements them, too
|
|
|
|
noDuplicates reg =
|
|
let duplicates = concat (map (duplicate reg)
|
|
["language", "script", "variant", "region", "grandfathered"]) in
|
|
if length duplicates == 0 then
|
|
Success reg
|
|
else
|
|
SemanticError ("Duplicate values: " ++ (show duplicates))
|
|
|
|
-- TODO: tests Preferred-value
|