164 lines
5.4 KiB
Haskell
164 lines
5.4 KiB
Haskell
|
module Registry.Grammar where
|
||
|
|
||
|
import Text.ParserCombinators.Parsec as Parsec hiding (space, spaces, newline)
|
||
|
import Parsers
|
||
|
import Registry.Types
|
||
|
import Data.Maybe
|
||
|
import Data.Char(ord, chr)
|
||
|
import Numeric
|
||
|
import Prelude hiding (lookup)
|
||
|
|
||
|
-- Too low-level to be in Registry.Types
|
||
|
|
||
|
data Field = Field {field'name::String, field'value::String} deriving Show
|
||
|
|
||
|
colon = char ':' <?> "colon"
|
||
|
dash = char '-' <?> "dash"
|
||
|
space = char ' ' <?> "space"
|
||
|
newline = many1 (try (string "\n\r") <|> string "\n" <|> string "\r") <?> "new line"
|
||
|
spaces = many space
|
||
|
|
||
|
doublepercent = do {string "%%"; newline} <?> "double percent line"
|
||
|
|
||
|
extract :: Field -> (String, String)
|
||
|
extract f = (field'name f, field'value f)
|
||
|
|
||
|
-- TODO: make it case-insensitive
|
||
|
member :: String -> [(String, String)] -> Bool
|
||
|
member key [] = False
|
||
|
member key dict = let head = dict !! 0 in
|
||
|
if key == fst head then
|
||
|
True
|
||
|
else
|
||
|
member key (tail dict)
|
||
|
|
||
|
-- TODO: make it case-insensitive
|
||
|
lookup :: String -> [(String, String)] -> Maybe String
|
||
|
lookup key [] = Nothing
|
||
|
lookup key dict = let head = dict !! 0 in
|
||
|
if key == fst head then
|
||
|
Just (snd head)
|
||
|
else
|
||
|
lookup key (tail dict)
|
||
|
|
||
|
lookups :: String -> [(String, String)] -> [String]
|
||
|
lookups key [] = []
|
||
|
lookups key dict = let head = dict !! 0 in
|
||
|
if key == fst head then
|
||
|
(snd head) : lookups key (tail dict)
|
||
|
else
|
||
|
lookups key (tail dict)
|
||
|
|
||
|
suppressScript r = lookup "Suppress-Script" r
|
||
|
|
||
|
recordFactory :: [(String, String)] -> Either String Record
|
||
|
recordFactory r =
|
||
|
if member "Type" r then
|
||
|
let thetype = fromJust (lookup "Type" r) in
|
||
|
-- TODO: test that the field exist. fromJust will raise an exception but we can
|
||
|
-- provide a better error message if we test
|
||
|
case thetype of
|
||
|
"language" -> Right (Lang (Language
|
||
|
(fromJust (lookup "Subtag" r))
|
||
|
(lookups "Description" r)
|
||
|
(fromJust (lookup "Added" r))
|
||
|
(suppressScript r)
|
||
|
(lookup "Preferred-Value" r)
|
||
|
(lookup "Macrolanguage" r)
|
||
|
(lookup "Deprecated" r)
|
||
|
(lookup "Scope" r)
|
||
|
))
|
||
|
"extlang" -> Right (Extl (Extlang
|
||
|
(fromJust (lookup "Subtag" r))
|
||
|
(lookups "Description" r)
|
||
|
(fromJust (lookup "Added" r))
|
||
|
(suppressScript r)
|
||
|
(fromJust (lookup "Prefix" r))
|
||
|
(lookup "Preferred-Value" r)
|
||
|
(lookup "Macrolanguage" r)
|
||
|
(lookup "Scope" r)
|
||
|
))
|
||
|
"script" -> Right (Scr (Script
|
||
|
(fromJust (lookup "Subtag" r))
|
||
|
(lookups "Description" r)
|
||
|
(fromJust (lookup "Added" r))))
|
||
|
"region" -> Right (Reg (Region
|
||
|
(fromJust (lookup "Subtag" r))
|
||
|
(lookups "Description" r)
|
||
|
(fromJust (lookup "Added" r))))
|
||
|
"variant" -> Right (Var (Variant
|
||
|
(fromJust (lookup "Subtag" r))
|
||
|
(lookups "Description" r)
|
||
|
(fromJust (lookup "Added" r))
|
||
|
(lookups "Prefix" r)))
|
||
|
"redundant" -> Right (Red (Redundant
|
||
|
(fromJust (lookup "Tag" r))
|
||
|
(lookups "Description" r)
|
||
|
(fromJust (lookup "Added" r))))
|
||
|
"grandfathered" -> Right (GF (Grandfathered
|
||
|
(fromJust (lookup "Tag" r))
|
||
|
(lookups "Description" r)
|
||
|
(fromJust (lookup "Added" r))))
|
||
|
_ -> Left ("Unknown type \"" ++ thetype ++ "\"")
|
||
|
else
|
||
|
if member "File-Date" r then
|
||
|
Right (Date (fromJust (lookup "File-Date" r)))
|
||
|
else
|
||
|
Left "Invalid record: no Type or File-Date"
|
||
|
|
||
|
record = do
|
||
|
fields <- many1 field <?> "record"
|
||
|
let therecord = map extract fields
|
||
|
return (recordFactory therecord)
|
||
|
|
||
|
fieldname = do
|
||
|
start <- alphaNum
|
||
|
rest <- many (alphaNum <|> dash)
|
||
|
return ([start] ++ rest)
|
||
|
|
||
|
fieldvalue = many (unicodechar <|> space <|> try (do {newline; space})) <?> "field value"
|
||
|
|
||
|
unicodechar = satisfy (\thechar ->
|
||
|
let c = (ord thechar) in
|
||
|
(c >= 0x21 && c <= 0x10ffff))
|
||
|
<?> "Character"
|
||
|
|
||
|
field = do
|
||
|
name <- fieldname
|
||
|
spaces
|
||
|
colon
|
||
|
spaces
|
||
|
value <- fieldvalue
|
||
|
newline
|
||
|
return (Field name value)
|
||
|
<?> "field"
|
||
|
|
||
|
registry = do
|
||
|
allrecords <- record `sepBy` doublepercent
|
||
|
eof
|
||
|
return allrecords
|
||
|
|
||
|
isRight (Right a) = True
|
||
|
isRight (Left a) = False
|
||
|
|
||
|
fromRight (Right a) = a
|
||
|
fromRight (Left a) = error "Not a right value"
|
||
|
|
||
|
isLeft (Left a) = True
|
||
|
isLeft (Right a) = False
|
||
|
|
||
|
fromLeft (Left a) = a
|
||
|
fromLeft (Right a) = error "Not a left value"
|
||
|
|
||
|
parse :: String -> CheckResult
|
||
|
parse input =
|
||
|
case (Parsec.parse Registry.Grammar.registry "" input) of
|
||
|
Left err -> SyntaxError ("Registry is not legal: " ++ (show err))
|
||
|
Right reg -> let ok = and (map isRight reg) in
|
||
|
if ok then
|
||
|
Success (map fromRight reg)
|
||
|
else
|
||
|
let firstErr = (filter isLeft reg) !! 0 in
|
||
|
SyntaxError ("Registry is not legal: " ++
|
||
|
fromLeft firstErr)
|
||
|
-- TODO: the line number
|