From 5c564df4c14f3a9205a5eb706c5a45baa1afaba5 Mon Sep 17 00:00:00 2001 From: Stephane Bortzmeyer Date: Sun, 1 Oct 2023 12:18:00 +0200 Subject: [PATCH] Escape characters that are special for XML and HTML --- Registry/Utils.hs | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/Registry/Utils.hs b/Registry/Utils.hs index c1e5c9a..8f0e622 100644 --- a/Registry/Utils.hs +++ b/Registry/Utils.hs @@ -60,6 +60,14 @@ redundantFrom _ = error "Not a redundant" gfFrom (GF r) = r gfFrom _ = error "Not a grandfathered" +escapexml :: String -> String +escapexml = concatMap fixChar + where + fixChar '<' = "<" + fixChar '>' = ">" + fixChar '&' = "&" + fixChar c = [c] + onlyThisType :: String -> Record -> Bool onlyThisType "language" = onlyLang onlyThisType "extlang" = onlyExtlang @@ -80,7 +88,7 @@ onlyThisValue "script" v = (\r -> toUpperS (script'subtag r) == toUpperS (v)).sc onlyThisValue "region" v = (\r -> toUpperS (region'subtag r) == toUpperS (v)).regionFrom onlyThisValue "variant" v = (\r -> toUpperS (variant'subtag r) == toUpperS (v)).variantFrom onlyThisValue "redundant" v = (\r -> toUpperS (redundant'tag r) == toUpperS (v)).redundantFrom --- TODO: extensions +-- Extensions not managed? See issue #7. onlyThisValue t _ = error ("Unknown type of this value in the registry: " ++ t) toSubtag (Lang l) = (lang'subtag l) @@ -113,13 +121,12 @@ toString (Var v) = (variant'subtag v) ++ "\t" ++ dateToString (variant'added v) toString (Red r) = (redundant'tag r) ++ "\t" ++ dateToString (redundant'added r) ++ "\t" ++ descrToString (redundant'descr r) ++ "\n" --- TODO: escape non-XML chars dateToXML d = "" ++ d ++ "" addedToXML d = "" ++ d ++ "" deprecatedToXML Nothing = "" deprecatedToXML (Just d) = "" ++ d ++ "" commentToXML Nothing = "" -commentToXML (Just c) = "" ++ c ++ "" +commentToXML (Just c) = "" ++ (escapexml c) ++ "" sscriptToXML Nothing = "" sscriptToXML (Just s) = "" ++ s ++ "" prefToXML Nothing = "" @@ -128,7 +135,7 @@ macrolToXML Nothing = "" macrolToXML (Just l) = "" ++ l ++ "" scopeToXML Nothing = "" scopeToXML (Just s) = "" ++ s ++ "" -onedescrToXML s = "" ++ s ++ "" +onedescrToXML s = "" ++ (escapexml s) ++ "" descrToXML a = concat (map onedescrToXML a) oneprefixToXML s = "" ++ s ++ "" prefixesToXML a = concat (map oneprefixToXML a) @@ -171,9 +178,8 @@ toXML (Red r) = "" ++ (redundant'tag r) ++ "" ++ descrToXML (redundant'descr r) ++ "\n" toXML (Ext e) = error "No support for extensions" --- TODO: escape non-HTML chars -html_head s = "\n\n\n" ++ s ++ "

" ++ - s ++ "

" +html_head s = "\n\n\n" ++ (escapexml s) ++ "

" ++ + (escapexml s) ++ "

" html_tail = "

langtag.net Home

" linkLanguage langcode = if (length langcode) == 3 then @@ -189,13 +195,13 @@ addedToHTML d = "Added on " ++ d sscriptToHTML Nothing = "" sscriptToHTML (Just s) = " Suppress the script: " ++ s ++ "" -onedescrToHTML s = s +onedescrToHTML s = (escapexml s) descrToHTML a = concat (List.intersperse " / " (map onedescrToHTML a)) --- TODO: a prefix is not always a simple language, it can be composed of several subtags +-- But a prefix is not always a simple language, it can be composed of several subtags. See issue #3 oneprefixToHTML s = " Possible prefix: " ++ s ++ "" prefixesToHTML a = concat (map oneprefixToHTML a) commentToHTML Nothing = "" -commentToHTML (Just s) = " Comment: " ++ s +commentToHTML (Just s) = " Comment: " ++ (escapexml s) deprecatedToHTML Nothing = "" deprecatedToHTML (Just s) = " Deprecated: " ++ s