GaBuZoMeu/Registry/Grammar.hs
2024-06-18 10:44:00 +02:00

172 lines
5.6 KiB
Haskell

module Registry.Grammar where
import Text.ParserCombinators.Parsec as Parsec hiding (space, spaces, newline)
import Parsers
import Registry.Types
import Data.Maybe
import Data.Char(ord, chr)
import Numeric
import Prelude hiding (lookup)
-- Too low-level to be in Registry.Types
data Field = Field {field'name::String, field'value::String} deriving Show
colon = char ':' <?> "colon"
dash = char '-' <?> "dash"
space = char ' ' <?> "space"
newline = many1 (try (string "\n\r") <|> string "\n" <|> string "\r") <?> "new line"
spaces = many space
doublepercent = do {string "%%"; newline} <?> "double percent line"
extract :: Field -> (String, String)
extract f = (field'name f, field'value f)
-- TODO: make it case-insensitive
member :: String -> [(String, String)] -> Bool
member key [] = False
member key dict = let head = dict !! 0 in
if key == fst head then
True
else
member key (tail dict)
-- TODO: make it case-insensitive
lookup :: String -> [(String, String)] -> Maybe String
lookup key [] = Nothing
lookup key dict = let head = dict !! 0 in
if key == fst head then
Just (snd head)
else
lookup key (tail dict)
lookups :: String -> [(String, String)] -> [String]
lookups key [] = []
lookups key dict = let head = dict !! 0 in
if key == fst head then
(snd head) : lookups key (tail dict)
else
lookups key (tail dict)
suppressScript r = lookup "Suppress-Script" r
recordFactory :: [(String, String)] -> Either String Record
recordFactory r =
if member "Type" r then
let thetype = fromJust (lookup "Type" r) in
-- TODO: test that the field exist. fromJust will raise an exception but we can
-- provide a better error message if we test
case thetype of
"language" -> Right (Lang (Language
(fromJust (lookup "Subtag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))
(suppressScript r)
(lookup "Preferred-Value" r)
(lookup "Macrolanguage" r)
(lookup "Deprecated" r)
(lookup "Scope" r)
(lookups "Comments" r)
))
"extlang" -> Right (Extl (Extlang
(fromJust (lookup "Subtag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))
(suppressScript r)
(fromJust (lookup "Prefix" r))
(lookup "Preferred-Value" r)
(lookup "Macrolanguage" r)
(lookup "Scope" r)
))
"script" -> Right (Scr (Script
(fromJust (lookup "Subtag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))
(lookup "Deprecated" r)
(lookups "Comments" r)))
"region" -> Right (Reg (Region
(fromJust (lookup "Subtag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))
(lookup "Deprecated" r)
(lookups "Comments" r)))
"variant" -> Right (Var (Variant
(fromJust (lookup "Subtag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))
(lookups "Prefix" r)
(lookup "Deprecated" r)
(lookups "Comments" r)))
"redundant" -> Right (Red (Redundant
(fromJust (lookup "Tag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))))
"grandfathered" -> Right (GF (Grandfathered
(fromJust (lookup "Tag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))))
_ -> Left ("Unknown type \"" ++ thetype ++ "\"")
else
if member "File-Date" r then
Right (Date (fromJust (lookup "File-Date" r)))
else
Left "Invalid record: no Type or File-Date"
record = do
fields <- many1 field <?> "record"
let therecord = map extract fields
return (recordFactory therecord)
fieldname = do
start <- alphaNum
rest <- many (alphaNum <|> dash)
return ([start] ++ rest)
fieldvalue = many (unicodechar <|> space <|> try (do {newline; space})) <?> "field value"
unicodechar = satisfy (\thechar ->
let c = (ord thechar) in
(c >= 0x21 && c <= 0x10ffff))
<?> "Character"
field = do
name <- fieldname
spaces
colon
spaces
value <- fieldvalue
newline
return (Field name value)
<?> "field"
registry = do
allrecords <- record `sepBy` doublepercent
eof
return allrecords
isRight (Right a) = True
isRight (Left a) = False
fromRight (Right a) = a
fromRight (Left a) = error "Not a right value"
isLeft (Left a) = True
isLeft (Right a) = False
fromLeft (Left a) = a
fromLeft (Right a) = error "Not a left value"
parse :: String -> CheckResult
parse input =
case (Parsec.parse Registry.Grammar.registry "" input) of
Left err -> SyntaxError ("Registry is not legal: " ++ (show err))
Right reg -> let ok = and (map isRight reg) in
if ok then
Success (map fromRight reg)
else
let firstErr = (filter isLeft reg) !! 0 in
SyntaxError ("Registry is not legal: " ++
fromLeft firstErr)
-- TODO: the line number