GaBuZoMeu/Registry/Grammar.hs

184 lines
6.0 KiB
Haskell
Raw Normal View History

2023-06-08 21:49:35 +02:00
module Registry.Grammar where
import Text.ParserCombinators.Parsec as Parsec hiding (space, spaces, newline)
import Parsers
import Registry.Types
import Data.Maybe
import Data.Char(ord, chr)
import Numeric
import Prelude hiding (lookup)
-- Too low-level to be in Registry.Types
data Field = Field {field'name::String, field'value::String} deriving Show
colon = char ':' <?> "colon"
dash = char '-' <?> "dash"
space = char ' ' <?> "space"
newline = many1 (try (string "\n\r") <|> string "\n" <|> string "\r") <?> "new line"
spaces = many space
doublepercent = do {string "%%"; newline} <?> "double percent line"
extract :: Field -> (String, String)
extract f = (field'name f, field'value f)
-- TODO: make it case-insensitive
member :: String -> [(String, String)] -> Bool
member key [] = False
member key dict = let head = dict !! 0 in
if key == fst head then
True
else
member key (tail dict)
-- TODO: make it case-insensitive
lookup :: String -> [(String, String)] -> Maybe String
lookup key [] = Nothing
lookup key dict = let head = dict !! 0 in
if key == fst head then
Just (snd head)
else
lookup key (tail dict)
lookupsmaybe :: String -> [(String, String)] -> Maybe [String]
lookupsmaybe key [] = Nothing
lookupsmaybe key dict = let head = dict !! 0 in
if key == fst head then
let rest = lookupsmaybe key (tail dict) in
if isJust rest then
Just ((snd head) : fromJust rest)
else
Just ([snd head])
else
lookupsmaybe key (tail dict)
2023-06-08 21:49:35 +02:00
lookups :: String -> [(String, String)] -> [String]
lookups key [] = []
lookups key dict = let head = dict !! 0 in
if key == fst head then
(snd head) : lookups key (tail dict)
else
lookups key (tail dict)
suppressScript r = lookup "Suppress-Script" r
recordFactory :: [(String, String)] -> Either String Record
recordFactory r =
if member "Type" r then
let thetype = fromJust (lookup "Type" r) in
-- TODO: test that the field exist. fromJust will raise an exception but we can
-- provide a better error message if we test
case thetype of
"language" -> Right (Lang (Language
(fromJust (lookup "Subtag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))
(suppressScript r)
(lookup "Preferred-Value" r)
(lookup "Macrolanguage" r)
(lookup "Deprecated" r)
(lookup "Scope" r)
(lookupsmaybe "Comments" r)
2023-06-08 21:49:35 +02:00
))
"extlang" -> Right (Extl (Extlang
(fromJust (lookup "Subtag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))
(suppressScript r)
(fromJust (lookup "Prefix" r))
(lookup "Preferred-Value" r)
(lookup "Macrolanguage" r)
(lookup "Scope" r)
))
"script" -> Right (Scr (Script
(fromJust (lookup "Subtag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))
2023-09-30 17:31:16 +02:00
(lookup "Deprecated" r)
(lookupsmaybe "Comments" r)))
2023-06-08 21:49:35 +02:00
"region" -> Right (Reg (Region
(fromJust (lookup "Subtag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))
2023-09-30 17:31:16 +02:00
(lookup "Deprecated" r)
(lookupsmaybe "Comments" r)))
2023-06-08 21:49:35 +02:00
"variant" -> Right (Var (Variant
(fromJust (lookup "Subtag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))
(lookups "Prefix" r)
2023-09-30 17:31:16 +02:00
(lookup "Deprecated" r)
(lookupsmaybe "Comments" r)))
2023-06-08 21:49:35 +02:00
"redundant" -> Right (Red (Redundant
(fromJust (lookup "Tag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))))
"grandfathered" -> Right (GF (Grandfathered
(fromJust (lookup "Tag" r))
(lookups "Description" r)
(fromJust (lookup "Added" r))))
_ -> Left ("Unknown type \"" ++ thetype ++ "\"")
else
if member "File-Date" r then
Right (Date (fromJust (lookup "File-Date" r)))
else
Left "Invalid record: no Type or File-Date"
record = do
fields <- many1 field <?> "record"
let therecord = map extract fields
return (recordFactory therecord)
fieldname = do
start <- alphaNum
rest <- many (alphaNum <|> dash)
return ([start] ++ rest)
fieldvalue = many (unicodechar <|> space <|> try (do {newline; space})) <?> "field value"
unicodechar = satisfy (\thechar ->
let c = (ord thechar) in
(c >= 0x21 && c <= 0x10ffff))
<?> "Character"
field = do
name <- fieldname
spaces
colon
spaces
value <- fieldvalue
newline
return (Field name value)
<?> "field"
registry = do
allrecords <- record `sepBy` doublepercent
eof
return allrecords
isRight (Right a) = True
isRight (Left a) = False
fromRight (Right a) = a
fromRight (Left a) = error "Not a right value"
isLeft (Left a) = True
isLeft (Right a) = False
fromLeft (Left a) = a
fromLeft (Right a) = error "Not a left value"
parse :: String -> CheckResult
parse input =
case (Parsec.parse Registry.Grammar.registry "" input) of
Left err -> SyntaxError ("Registry is not legal: " ++ (show err))
Right reg -> let ok = and (map isRight reg) in
if ok then
Success (map fromRight reg)
else
let firstErr = (filter isLeft reg) !! 0 in
SyntaxError ("Registry is not legal: " ++
fromLeft firstErr)
-- TODO: the line number