module Registry.Grammar where import Text.ParserCombinators.Parsec as Parsec hiding (space, spaces, newline) import Parsers import Registry.Types import Data.Maybe import Data.Char(ord, chr) import Numeric import Prelude hiding (lookup) -- Too low-level to be in Registry.Types data Field = Field {field'name::String, field'value::String} deriving Show colon = char ':' "colon" dash = char '-' "dash" space = char ' ' "space" newline = many1 (try (string "\n\r") <|> string "\n" <|> string "\r") "new line" spaces = many space doublepercent = do {string "%%"; newline} "double percent line" extract :: Field -> (String, String) extract f = (field'name f, field'value f) -- TODO: make it case-insensitive member :: String -> [(String, String)] -> Bool member key [] = False member key dict = let head = dict !! 0 in if key == fst head then True else member key (tail dict) -- TODO: make it case-insensitive lookup :: String -> [(String, String)] -> Maybe String lookup key [] = Nothing lookup key dict = let head = dict !! 0 in if key == fst head then Just (snd head) else lookup key (tail dict) lookups :: String -> [(String, String)] -> [String] lookups key [] = [] lookups key dict = let head = dict !! 0 in if key == fst head then (snd head) : lookups key (tail dict) else lookups key (tail dict) suppressScript r = lookup "Suppress-Script" r recordFactory :: [(String, String)] -> Either String Record recordFactory r = if member "Type" r then let thetype = fromJust (lookup "Type" r) in -- TODO: test that the field exist. fromJust will raise an exception but we can -- provide a better error message if we test case thetype of "language" -> Right (Lang (Language (fromJust (lookup "Subtag" r)) (lookups "Description" r) (fromJust (lookup "Added" r)) (suppressScript r) (lookup "Preferred-Value" r) (lookup "Macrolanguage" r) (lookup "Deprecated" r) (lookup "Scope" r) )) "extlang" -> Right (Extl (Extlang (fromJust (lookup "Subtag" r)) (lookups "Description" r) (fromJust (lookup "Added" r)) (suppressScript r) (fromJust (lookup "Prefix" r)) (lookup "Preferred-Value" r) (lookup "Macrolanguage" r) (lookup "Scope" r) )) "script" -> Right (Scr (Script (fromJust (lookup "Subtag" r)) (lookups "Description" r) (fromJust (lookup "Added" r)))) "region" -> Right (Reg (Region (fromJust (lookup "Subtag" r)) (lookups "Description" r) (fromJust (lookup "Added" r)))) "variant" -> Right (Var (Variant (fromJust (lookup "Subtag" r)) (lookups "Description" r) (fromJust (lookup "Added" r)) (lookups "Prefix" r))) "redundant" -> Right (Red (Redundant (fromJust (lookup "Tag" r)) (lookups "Description" r) (fromJust (lookup "Added" r)))) "grandfathered" -> Right (GF (Grandfathered (fromJust (lookup "Tag" r)) (lookups "Description" r) (fromJust (lookup "Added" r)))) _ -> Left ("Unknown type \"" ++ thetype ++ "\"") else if member "File-Date" r then Right (Date (fromJust (lookup "File-Date" r))) else Left "Invalid record: no Type or File-Date" record = do fields <- many1 field "record" let therecord = map extract fields return (recordFactory therecord) fieldname = do start <- alphaNum rest <- many (alphaNum <|> dash) return ([start] ++ rest) fieldvalue = many (unicodechar <|> space <|> try (do {newline; space})) "field value" unicodechar = satisfy (\thechar -> let c = (ord thechar) in (c >= 0x21 && c <= 0x10ffff)) "Character" field = do name <- fieldname spaces colon spaces value <- fieldvalue newline return (Field name value) "field" registry = do allrecords <- record `sepBy` doublepercent eof return allrecords isRight (Right a) = True isRight (Left a) = False fromRight (Right a) = a fromRight (Left a) = error "Not a right value" isLeft (Left a) = True isLeft (Right a) = False fromLeft (Left a) = a fromLeft (Right a) = error "Not a left value" parse :: String -> CheckResult parse input = case (Parsec.parse Registry.Grammar.registry "" input) of Left err -> SyntaxError ("Registry is not legal: " ++ (show err)) Right reg -> let ok = and (map isRight reg) in if ok then Success (map fromRight reg) else let firstErr = (filter isLeft reg) !! 0 in SyntaxError ("Registry is not legal: " ++ fromLeft firstErr) -- TODO: the line number