Escape characters that are special for XML and HTML

This commit is contained in:
Stephane Bortzmeyer 2023-10-01 12:18:00 +02:00
parent 770b7a1e14
commit 5c564df4c1

View File

@ -60,6 +60,14 @@ redundantFrom _ = error "Not a redundant"
gfFrom (GF r) = r gfFrom (GF r) = r
gfFrom _ = error "Not a grandfathered" gfFrom _ = error "Not a grandfathered"
escapexml :: String -> String
escapexml = concatMap fixChar
where
fixChar '<' = "&lt;"
fixChar '>' = "&gt;"
fixChar '&' = "&amp;"
fixChar c = [c]
onlyThisType :: String -> Record -> Bool onlyThisType :: String -> Record -> Bool
onlyThisType "language" = onlyLang onlyThisType "language" = onlyLang
onlyThisType "extlang" = onlyExtlang onlyThisType "extlang" = onlyExtlang
@ -80,7 +88,7 @@ onlyThisValue "script" v = (\r -> toUpperS (script'subtag r) == toUpperS (v)).sc
onlyThisValue "region" v = (\r -> toUpperS (region'subtag r) == toUpperS (v)).regionFrom onlyThisValue "region" v = (\r -> toUpperS (region'subtag r) == toUpperS (v)).regionFrom
onlyThisValue "variant" v = (\r -> toUpperS (variant'subtag r) == toUpperS (v)).variantFrom onlyThisValue "variant" v = (\r -> toUpperS (variant'subtag r) == toUpperS (v)).variantFrom
onlyThisValue "redundant" v = (\r -> toUpperS (redundant'tag r) == toUpperS (v)).redundantFrom onlyThisValue "redundant" v = (\r -> toUpperS (redundant'tag r) == toUpperS (v)).redundantFrom
-- TODO: extensions -- Extensions not managed? See issue #7.
onlyThisValue t _ = error ("Unknown type of this value in the registry: " ++ t) onlyThisValue t _ = error ("Unknown type of this value in the registry: " ++ t)
toSubtag (Lang l) = (lang'subtag l) toSubtag (Lang l) = (lang'subtag l)
@ -113,13 +121,12 @@ toString (Var v) = (variant'subtag v) ++ "\t" ++ dateToString (variant'added v)
toString (Red r) = (redundant'tag r) ++ "\t" ++ dateToString (redundant'added r) ++ "\t" ++ toString (Red r) = (redundant'tag r) ++ "\t" ++ dateToString (redundant'added r) ++ "\t" ++
descrToString (redundant'descr r) ++ "\n" descrToString (redundant'descr r) ++ "\n"
-- TODO: escape non-XML chars
dateToXML d = "<date>" ++ d ++ "</date>" dateToXML d = "<date>" ++ d ++ "</date>"
addedToXML d = "<added>" ++ d ++ "</added>" addedToXML d = "<added>" ++ d ++ "</added>"
deprecatedToXML Nothing = "" deprecatedToXML Nothing = ""
deprecatedToXML (Just d) = "<deprecated>" ++ d ++ "</deprecated>" deprecatedToXML (Just d) = "<deprecated>" ++ d ++ "</deprecated>"
commentToXML Nothing = "" commentToXML Nothing = ""
commentToXML (Just c) = "<comments>" ++ c ++ "</comments>" commentToXML (Just c) = "<comments>" ++ (escapexml c) ++ "</comments>"
sscriptToXML Nothing = "" sscriptToXML Nothing = ""
sscriptToXML (Just s) = "<suppress-script>" ++ s ++ "</suppress-script>" sscriptToXML (Just s) = "<suppress-script>" ++ s ++ "</suppress-script>"
prefToXML Nothing = "" prefToXML Nothing = ""
@ -128,7 +135,7 @@ macrolToXML Nothing = ""
macrolToXML (Just l) = "<macrolanguage>" ++ l ++ "</macrolanguage>" macrolToXML (Just l) = "<macrolanguage>" ++ l ++ "</macrolanguage>"
scopeToXML Nothing = "" scopeToXML Nothing = ""
scopeToXML (Just s) = "<scope>" ++ s ++ "</scope>" scopeToXML (Just s) = "<scope>" ++ s ++ "</scope>"
onedescrToXML s = "<description>" ++ s ++ "</description>" onedescrToXML s = "<description>" ++ (escapexml s) ++ "</description>"
descrToXML a = concat (map onedescrToXML a) descrToXML a = concat (map onedescrToXML a)
oneprefixToXML s = "<prefix>" ++ s ++ "</prefix>" oneprefixToXML s = "<prefix>" ++ s ++ "</prefix>"
prefixesToXML a = concat (map oneprefixToXML a) prefixesToXML a = concat (map oneprefixToXML a)
@ -171,9 +178,8 @@ toXML (Red r) = "<redundant><tag>" ++ (redundant'tag r) ++ "</tag>" ++
descrToXML (redundant'descr r) ++ "</redundant>\n" descrToXML (redundant'descr r) ++ "</redundant>\n"
toXML (Ext e) = error "No support for extensions" toXML (Ext e) = error "No support for extensions"
-- TODO: escape non-HTML chars html_head s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n<head><link rel=\"stylesheet\" type=\"text/css\" href=\"/ltru.css\" /><title>" ++ (escapexml s) ++ "</title></head><body><h1>" ++
html_head s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n<head><link rel=\"stylesheet\" type=\"text/css\" href=\"/ltru.css\" /><title>" ++ s ++ "</title></head><body><h1>" ++ (escapexml s) ++ "</h1>"
s ++ "</h1>"
html_tail = "<hr/><p><a href=\"/\">langtag.net Home</a></p></body></html>" html_tail = "<hr/><p><a href=\"/\">langtag.net Home</a></p></body></html>"
linkLanguage langcode = linkLanguage langcode =
if (length langcode) == 3 then if (length langcode) == 3 then
@ -189,13 +195,13 @@ addedToHTML d = "Added on " ++ d
sscriptToHTML Nothing = "" sscriptToHTML Nothing = ""
sscriptToHTML (Just s) = " Suppress the script: <code><a href=\"../script/" ++ s ++ sscriptToHTML (Just s) = " Suppress the script: <code><a href=\"../script/" ++ s ++
".html\">" ++ s ++ "</a></code>" ".html\">" ++ s ++ "</a></code>"
onedescrToHTML s = s onedescrToHTML s = (escapexml s)
descrToHTML a = concat (List.intersperse " / " (map onedescrToHTML a)) descrToHTML a = concat (List.intersperse " / " (map onedescrToHTML a))
-- TODO: a prefix is not always a simple language, it can be composed of several subtags -- But a prefix is not always a simple language, it can be composed of several subtags. See issue #3
oneprefixToHTML s = " Possible prefix: <code><a href=\"../language/" ++ s ++ ".html\">" ++ s ++ "</a></code>" oneprefixToHTML s = " Possible prefix: <code><a href=\"../language/" ++ s ++ ".html\">" ++ s ++ "</a></code>"
prefixesToHTML a = concat (map oneprefixToHTML a) prefixesToHTML a = concat (map oneprefixToHTML a)
commentToHTML Nothing = "" commentToHTML Nothing = ""
commentToHTML (Just s) = " Comment: " ++ s commentToHTML (Just s) = " Comment: " ++ (escapexml s)
deprecatedToHTML Nothing = "" deprecatedToHTML Nothing = ""
deprecatedToHTML (Just s) = " Deprecated: " ++ s deprecatedToHTML (Just s) = " Deprecated: " ++ s