GaBuZoMeu/Registry/Registry.hs

230 lines
7.5 KiB
Haskell
Raw Normal View History

2023-06-08 21:49:35 +02:00
module Registry.Registry where
import qualified Text.ParserCombinators.Parsec as Parsec
import qualified System.IO
import Data.Maybe
import Registry.Types
import Registry.Utils
import Registry.Grammar (registry, member, parse)
import Types
import Grammar
defaultInfile = "./language-subtag-registry"
type Result = (Bool, String)
-- Combines a former result with a new result, yielding a result.
combine :: Result -> Result -> Result
combine (True, _) (False, s) = (False, s)
combine (False, s1) (False, s2) = (False, s1 ++ "; " ++ s2)
combine (False, s) (True, _) = (False, s)
combine (True, _) (True, _) = (True, "")
readRegistry filename = do
input <- System.IO.readFile filename
return (getRegistry input)
getRegistry input =
let reg = Registry.Grammar.parse input in
if not (checkOK reg) then
error "Illegal registry"
else
registryOf reg
isValidLanguage l dict =
if length (filter (onlyThisValue "language" l) dict) == 1 then
(True, "")
else
(False, "Unknown language " ++ l)
isValidScript Nothing _ = (True, "")
isValidScript s dict =
if length (filter (onlyThisValue "script" (fromJust s)) dict) == 1 then
(True, "")
else
(False, "Unknown script " ++ (fromJust s))
isValidRegion Nothing _ = (True, "")
isValidRegion r dict =
if length (filter (onlyThisValue "region" (fromJust r)) dict) == 1 then
(True, "")
else
(False, "Unknown region " ++ (fromJust r))
isValidVariants [] _ = (True, "")
isValidVariants v dict = foldl combine (True, "") (map (isValidVariant dict) v)
isValidVariant dict v =
if length (filter (onlyThisValue "variant" v) dict) == 1 then
(True, "")
else
(False, "Unknown variant " ++ v)
isValidExtension _ _ = (True, "")
-- TODO: a better test
-- Semantic testing
-- Only one extension by the same singleton
correctNumber :: [Singleton] -> Singleton -> Bool
correctNumber l s = length (filter (\c -> (c == s)) l) == 1
getSingleton :: Types.Extension -> Singleton
getSingleton (singleton, names) = singleton
norepeat :: Extensions -> Bool
norepeat extensions =
let singletons = map getSingleton extensions in
and (map (correctNumber singletons) singletons)
semanticTests (Tag lang extlang script region variants extensions) =
if norepeat extensions then
(True, "")
else
(False, "Repeated extension singleton")
semanticTests _ = (True, "") -- No tests for privateuse and grandfathered
isValid :: Registry -> Tag -> Result
isValid reg (Tag l el s r v e) =
let languages = filter (onlyThisType "language") reg in
let scripts = filter (onlyThisType "script") reg in
let regions = filter (onlyThisType "region") reg in
let variants = filter (onlyThisType "variant") reg in
let extensions = filter (onlyThisType "extension") reg in
semanticTests (Tag l el s r v e) `combine`
(isValidLanguage l languages) `combine` (isValidScript s scripts) `combine`
(isValidRegion r regions) `combine` (isValidVariants v variants) `combine`
(isValidExtension e extensions)
isValid reg (Types.GF tag) =
(True, "")
-- It is passed well-formedness (syntactic) tests, it is good
isValid reg (Priv p) =
(True, "") -- By definition, private tags are always valid
testTag :: Registry -> String -> Bool
testTag reg tag =
case (Parsec.parse Grammar.tag "" tag) of
Left err -> False
Right t -> if fst (semanticTests t) then
fst (isValid reg t)
else
False
checkOK (Success p) = True
checkOK _ = False
messageOf (Success p) = error "Check was a success, no message available"
messageOf (SyntaxError s) = s
messageOf (SemanticError s) = s
registryOf (Success p) = p
registryOf _ = error "Check was a failure, no registry available"
semanticCheck :: Registry -> CheckResult
semanticCheck r =
-- TODO: test that the extlangs exist
oneDateTest r `combineC` prefixesExist r `combineC`
scriptsExist r `combineC` noDuplicates r `combineC` preferredValuesExist r
`combineC` macrolanguagesExist r
oneDateTest reg =
if (length (filter onlyDate reg) == 1) then
Success reg
else
SemanticError "One and only one Field-Date record authorized"
macrolanguageExist reg language =
let macrolang = lang'macroLanguage (languageFrom language) in
if isJust macrolang then
length (filter (onlyThisValue "language" (fromJust macrolang))
(filter onlyLang reg)) > 0
else
True
macrolanguagesExist reg =
let languages = filter onlyLang reg in
let missing = filter (not.macrolanguageExist reg) languages in
if length missing == 0 then
Success reg
else
SemanticError ("Some languages have a Macrolanguage which does not appear in the registry: " ++
(show missing))
-- Extracts the langage subtag, the first one
languageOf tag =
takeWhile (/= '-') tag
prefixExist reg variant =
-- TODO: We test only that the language part of the prefix is OK, we should test
-- the rest
and (map (\r -> length (filter (onlyThisValue "language" (languageOf r))
(filter onlyLang reg)) > 0)
(variant'prefix (variantFrom variant)))
prefixesExist reg =
let variants = filter onlyVariant reg in
let missing = filter (not.prefixExist reg) variants in
if length missing == 0 then
Success reg
else
SemanticError ("Some variants have a Prefix which does not appear in the registry: " ++
(show missing))
scriptExist reg language =
let script = lang'script (languageFrom language) in
if isJust script then
length (filter (onlyThisValue "script" (fromJust script))
(filter onlyScript reg)) > 0
else
True
scriptsExist reg =
let languages = filter onlyLang reg in
let missing = filter (not.scriptExist reg) languages in
if length missing == 0 then
Success reg
else
SemanticError ("Some languages have a Suppress-script which does not appear in the registry: " ++
(show missing))
preferredLangValueExist reg language =
let preferredValue = lang'preferredValue (languageFrom language) in
if isJust preferredValue then
length (filter (onlyThisValue "language" (fromJust preferredValue))
(filter onlyLang reg)) > 0
else
True
preferredValuesExist reg =
-- TODO: test also regions and scripts
let languages = filter onlyLang reg in
let missing = filter (not.preferredLangValueExist reg) languages in
if length missing == 0 then
Success reg
else
SemanticError ("Some records have a Preferred-value which does not appear in the registry: " ++
(show missing))
otherRecord reg category val =
tail (filter (onlyThisValue category val) (filter (onlyThisType category) reg))
duplicate reg category =
case category of
"language" ->
filter (\t -> length t > 0) (map (otherRecord reg category)
(map (lang'subtag.languageFrom)
(filter (onlyThisType category) reg)))
_ -> [] -- TODO: implements them, too
noDuplicates reg =
let duplicates = concat (map (duplicate reg)
["language", "script", "variant", "region", "grandfathered"]) in
if length duplicates == 0 then
Success reg
else
SemanticError ("Duplicate values: " ++ (show duplicates))
-- TODO: tests Preferred-value