GaBuZoMeu/Registry/Registry.hs
2023-10-01 15:46:34 +02:00

232 lines
7.5 KiB
Haskell

module Registry.Registry where
import qualified Text.ParserCombinators.Parsec as Parsec
import qualified System.IO
import Data.Maybe
import Registry.Types
import Registry.Utils
import Registry.Grammar (registry, member, parse)
import Types
import Grammar
defaultInfile = "./language-subtag-registry"
type Result = (Bool, String)
-- Combines a former result with a new result, yielding a result.
combine :: Result -> Result -> Result
combine (True, _) (False, s) = (False, s)
combine (False, s1) (False, s2) = (False, s1 ++ "; " ++ s2)
combine (False, s) (True, _) = (False, s)
combine (True, _) (True, _) = (True, "")
readRegistry filename = do
input <- System.IO.readFile filename
return (getRegistry input)
getRegistry input =
let reg = Registry.Grammar.parse input in
if not (checkOK reg) then
error "Illegal registry"
else
registryOf reg
dateOf (Date d) = d
isValidLanguage l dict =
if length (filter (onlyThisValue "language" l) dict) == 1 then
(True, "")
else
(False, "Unknown language " ++ l)
isValidScript Nothing _ = (True, "")
isValidScript s dict =
if length (filter (onlyThisValue "script" (fromJust s)) dict) == 1 then
(True, "")
else
(False, "Unknown script " ++ (fromJust s))
isValidRegion Nothing _ = (True, "")
isValidRegion r dict =
if length (filter (onlyThisValue "region" (fromJust r)) dict) == 1 then
(True, "")
else
(False, "Unknown region " ++ (fromJust r))
isValidVariants [] _ = (True, "")
isValidVariants v dict = foldl combine (True, "") (map (isValidVariant dict) v)
isValidVariant dict v =
if length (filter (onlyThisValue "variant" v) dict) == 1 then
(True, "")
else
(False, "Unknown variant " ++ v)
isValidExtension _ _ = (True, "")
-- TODO: a better test
-- Semantic testing
-- Only one extension by the same singleton
correctNumber :: [Singleton] -> Singleton -> Bool
correctNumber l s = length (filter (\c -> (c == s)) l) == 1
getSingleton :: Types.Extension -> Singleton
getSingleton (singleton, names) = singleton
norepeat :: Extensions -> Bool
norepeat extensions =
let singletons = map getSingleton extensions in
and (map (correctNumber singletons) singletons)
semanticTests (Tag lang extlang script region variants extensions) =
if norepeat extensions then
(True, "")
else
(False, "Repeated extension singleton")
semanticTests _ = (True, "") -- No tests for privateuse and grandfathered
isValid :: Registry -> Tag -> Result
isValid reg (Tag l el s r v e) =
let languages = filter (onlyThisType "language") reg in
let scripts = filter (onlyThisType "script") reg in
let regions = filter (onlyThisType "region") reg in
let variants = filter (onlyThisType "variant") reg in
let extensions = filter (onlyThisType "extension") reg in
semanticTests (Tag l el s r v e) `combine`
(isValidLanguage l languages) `combine` (isValidScript s scripts) `combine`
(isValidRegion r regions) `combine` (isValidVariants v variants) `combine`
(isValidExtension e extensions)
isValid reg (Types.GF tag) =
(True, "")
-- It is passed well-formedness (syntactic) tests, it is good
isValid reg (Priv p) =
(True, "") -- By definition, private tags are always valid
testTag :: Registry -> String -> Bool
testTag reg tag =
case (Parsec.parse Grammar.tag "" tag) of
Left err -> False
Right t -> if fst (semanticTests t) then
fst (isValid reg t)
else
False
checkOK (Success p) = True
checkOK _ = False
messageOf (Success p) = error "Check was a success, no message available"
messageOf (SyntaxError s) = s
messageOf (SemanticError s) = s
registryOf (Success p) = p
registryOf _ = error "Check was a failure, no registry available"
semanticCheck :: Registry -> CheckResult
semanticCheck r =
-- TODO: test that the extlangs exist
oneDateTest r `combineC` prefixesExist r `combineC`
scriptsExist r `combineC` noDuplicates r `combineC` preferredValuesExist r
`combineC` macrolanguagesExist r
oneDateTest reg =
if (length (filter onlyDate reg) == 1) then
Success reg
else
SemanticError "One and only one Field-Date record authorized"
macrolanguageExist reg language =
let macrolang = lang'macroLanguage (languageFrom language) in
if isJust macrolang then
length (filter (onlyThisValue "language" (fromJust macrolang))
(filter onlyLang reg)) > 0
else
True
macrolanguagesExist reg =
let languages = filter onlyLang reg in
let missing = filter (not.macrolanguageExist reg) languages in
if length missing == 0 then
Success reg
else
SemanticError ("Some languages have a Macrolanguage which does not appear in the registry: " ++
(show missing))
-- Extracts the langage subtag, the first one
languageOf tag =
takeWhile (/= '-') tag
prefixExist reg variant =
-- TODO: We test only that the language part of the prefix is OK, we should test
-- the rest
and (map (\r -> length (filter (onlyThisValue "language" (languageOf r))
(filter onlyLang reg)) > 0)
(variant'prefix (variantFrom variant)))
prefixesExist reg =
let variants = filter onlyVariant reg in
let missing = filter (not.prefixExist reg) variants in
if length missing == 0 then
Success reg
else
SemanticError ("Some variants have a Prefix which does not appear in the registry: " ++
(show missing))
scriptExist reg language =
let script = lang'script (languageFrom language) in
if isJust script then
length (filter (onlyThisValue "script" (fromJust script))
(filter onlyScript reg)) > 0
else
True
scriptsExist reg =
let languages = filter onlyLang reg in
let missing = filter (not.scriptExist reg) languages in
if length missing == 0 then
Success reg
else
SemanticError ("Some languages have a Suppress-script which does not appear in the registry: " ++
(show missing))
preferredLangValueExist reg language =
let preferredValue = lang'preferredValue (languageFrom language) in
if isJust preferredValue then
length (filter (onlyThisValue "language" (fromJust preferredValue))
(filter onlyLang reg)) > 0
else
True
preferredValuesExist reg =
-- TODO: test also regions and scripts
let languages = filter onlyLang reg in
let missing = filter (not.preferredLangValueExist reg) languages in
if length missing == 0 then
Success reg
else
SemanticError ("Some records have a Preferred-value which does not appear in the registry: " ++
(show missing))
otherRecord reg category val =
tail (filter (onlyThisValue category val) (filter (onlyThisType category) reg))
duplicate reg category =
case category of
"language" ->
filter (\t -> length t > 0) (map (otherRecord reg category)
(map (lang'subtag.languageFrom)
(filter (onlyThisType category) reg)))
_ -> [] -- TODO: implements them, too
noDuplicates reg =
let duplicates = concat (map (duplicate reg)
["language", "script", "variant", "region", "grandfathered"]) in
if length duplicates == 0 then
Success reg
else
SemanticError ("Duplicate values: " ++ (show duplicates))
-- TODO: tests Preferred-value