v1.2.0

* Removes "^" to disable text decoration line-wise. * CSS is no longer incorporated in the HTML page. * Perform sanity checks against unauthorized file access. * Properly close tags when the page exists in a non-null mode. * Split HTML generation in two: parsing and translating. * Create classes to handle gemtext parsing and translating. * Create class to generate back gemtext (for future test cases). * Fix: 404 doesn't occur for an empty file. * Page 404 fully generated by HtmGem itself.
2023-08-25 13:53:12 +02:00 · 2021-03-19 09:56:54 +01:00 · 2021-03-19 09:56:54 +01:00 · cef2417f91
commit cef2417f91
parent b2e09c54f5 f29cf3a476
5 changed files with 423 additions and 311 deletions
--- a/CHANGELOG.gmi
+++ b/CHANGELOG.gmi
@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.
 => https://keepachangelog.com/en/1.0.0/ The format is based on keep a Changelog.
 => https://semver.org/spec/v2.0.0.html And this project adheres to Semantic Versioning.

+## [1.2.0] - 2021-03-19
+* Removes "^" to disable text decoration line-wise.
+* CSS is no longer incorporated in the HTML page.
+* Perform sanity checks against unauthorized file access.
+* Properly close tags when the page exists in a non-null mode.
+* Split HTML generation in two: parsing and translating.
+* Create classes to handle gemtext parsing and translating.
+* Create class to generate back gemtext (for future test cases).
+* Fix: 404 doesn't occur for an empty file.
+* Page 404 fully generated by HtmGem itself.
+
 ## [1.1.0] - 2021-03-14
 * File download when using "source" as a style.
 * Improves the regex.
--- a/index.gmi
+++ b/index.gmi
@ -118,8 +118,7 @@ La décoration du texte, qui interprête le **gras** par exemple, ne fait pas pa
 ### Désactiver la décoration du texte

 On peut :
-* commencer la ligne par **^**,
-* faire un bloc non décoré avec **^^^**,
+* désactiver et activer la décoration du texte avec une ligne **^^^**,
 * ajouter ce qui suit à la **réécriture** d’URL :
 > &textDecoration=0

--- a/index.php
+++ b/index.php
@ -1,305 +1,72 @@
 <?php

-mb_internal_encoding("UTF-8");
-mb_regex_encoding("UTF-8");
+require_once "lib-htmgem.php";

-define("NARROW_NO_BREAK_SPACE", "&#8239;");
-define("DASHES"
-    ,"‒" # U+2012 Figure Dash
-    ."–" # U+2013 En Dash
-    ."—" # U+2014 Em Dash
-    ."⸺" # U+2E3A Two-Em Dash
-    ."⸻" # U+2E3B Three-Em Dash (Three times larger than a single char)
-);
-
-$style = @$_REQUEST['style'];
-
-# to false only if textDecoration=0 in the URL
-$textDecoration = "0" != @$_REQUEST['textDecoration'];
-
-/* The url argument is always absolute compared to the document root
- * The leading slash is removed. so url=/foo/bar and url=foo/bar ar the same.
- */
+# The url argument is always absolute compared to the document root.
 $url = @$_REQUEST["url"];

-######################################## Installation page
+/* Installation page
+ *
+ * Accessing directly /htmgem will make display the self-hosted documentation
+ * contained in "index.gmi". If it's removed, diplay an empty page with a
+ * comment
+ */
 if (empty($url)) {
    if (!file_exists("index.gmi")) {
        http_response_code(403);
        die("<!-- index.gmi missing -->");
    }
-?>
-<!DOCTYPE html>
-<html lang="fr">
-<head>
-<title>Installation de HtmGem</title>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-<style>
-<?php include("css/htmgem.css"); ?>
-</style>
-</head>
-<body>
-<?php
-    echo translateGemToHtml(@file_get_contents("index.gmi"));
-    echo "</body>\n</html>\n";
-    die();
+    $t = new \htmgem\GemTextTranslate_html(@file_get_contents("index.gmi"));
+    echo $t->getFullHtml();
+    exit();
 }
-######################################## /Installation page
+
+$documentRoot = $_SERVER['DOCUMENT_ROOT'];

 # Removes the headling and trailling slashes, to be sure there's not any.
 $filePath = rtrim($_SERVER['DOCUMENT_ROOT'], "/")."/".ltrim($url, "/");

-$fileContents = @file_get_contents($filePath);
+switch(true) {
+    case false:
+    case !realPath($filePath):
+    case !preg_match("/\.gmi$/", $url): # not finishing by .gmi
+    case strpos($filePath, $documentRoot)!==0: # not in web directory
+        $go404 = true;
+        // Says 404 even if the file exists to not give any information.
+        break;
+    default:
+        $go404 = false;
+}

-
-######################################## 404 page
-if (!file_exists($filePath) || !preg_match("/\.gmi$/", $url)) {
+/* 404 page
+ */
+if ($go404) {
    error_log("HtmGem: 404 $url $filePath");
-    http_response_code(404); ?>
-<!DOCTYPE html>
-<html lang="fr">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-<style>
-<?php include("css/htmgem.css"); ?>
-</style>
-</head>
-<body>
-<?php
-    $text404 = <<<EOF
+    http_response_code(404);
+    $page404 = <<<EOF
 # ⚠ Page non trouvée

 **$url**

-=> $url Recharger 🔄
-
-=> /
+=> .. 🔄 🔄
 EOF;
-echo translateGemToHtml($text404);
-echo "</body>\n</html>";
-die();
+    $t = new \htmgem\GemTextTranslate_html($page404);
+    echo $t->getFullHtml();
+    exit();
 }
-######################################## /404 page

+# to false only if textDecoration=0 in the URL
+$textDecoration = "0" != @$_REQUEST['textDecoration'];
+
+$fileContents = @file_get_contents($filePath);
 # Removes the Byte Order Mark
 $fileContents = preg_replace("/\xEF\xBB\xBF/", "", $fileContents);


-/**
- * Replaces markups things like __underlined__ to <u>underlined</u>.
- * @param $instruction the characters to replace, ex. _
- * @param $markup the markup to replace to, ex. "u" to get <u>…</u>
- * @param &$text where to replace.
+/* CSS and special style management
 */
-function markupPreg($instruction, $markup, &$text) {
-    $output = $text;
-
-    # Replaces couples "__word__" into "<i>word</i>".
-    $output = mb_ereg_replace("${instruction}(.+?)${instruction}", "<{$markup}>\\1</{$markup}>", $output);
-
-    # Replaces a remaining __ into "<i>…</i>" to the end of the line.
-    $output = mb_ereg_replace("${instruction}(.+)?", "<{$markup}>\\1</{$markup}>", $output);
-
-    $text = $output;
-}
-
-
-/**
- * Adds text attributes sucj as underline, bold, … to $line
- * @param $line the line to process
- */
-function addTextAttributes(&$line) {
-    global $textDecoration;
-    if (!$textDecoration) return;
-    markupPreg("__",   "u",   $line);
-    markupPreg("\*\*", "strong",   $line);
-    markupPreg("//",   "em",   $line);
-    markupPreg("~~",   "del", $line);
-}
-
-/**
- * Prepares the raw text to be displayed in HTML environment:
- * * Escapes the HTML entities yet contained in the Gemtext.
- * * Puts thin unbrakable spaces before some characters.
- * @param $text1, $text2 texts to process
- */
-function htmlPrepare(&$text) {
-    $text = htmlspecialchars($text, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
-    $text = mb_ereg_replace("\ ([?!:;»€$])", NARROW_NO_BREAK_SPACE."\\1", $text);
-    $text = mb_ereg_replace("([«])\ ", "\\1".NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable
-
-    # Warning: using a monospace font editor may not display dashes as they should be!
-    # Adds no-break spaces to stick the (EM/EN dashes) to words : aaaaaa – bb – ccccc ==> aaaaaa –$bb$– ccccc
-    $text = mb_ereg_replace("([".DASHES."]) ([^".DASHES.".]+) ([".DASHES."])", "\\1".NARROW_NO_BREAK_SPACE."\\2".NARROW_NO_BREAK_SPACE."\\3", $text);
-
-    # Adds no-break space to stick the (EM/EN dashes) to words : aaaaaa – bb. ==> aaaaaa –$bb.
-    $text = mb_ereg_replace("([—–]) ([^.]+)\.", "\\1".NARROW_NO_BREAK_SPACE."\\2.", $text);
-}
-
-function translateGemToHtml($fileContents) {
-    $fileLines = preg_split("/\n/", $fileContents);
-    if (empty($fileLines[-1])) array_pop($fileLines); # Don't output a last empty line
-    ob_start();
-    $mode = null;
-    $mode_textAttributes = true;
-    foreach ($fileLines as $line) {
-        $reDoCount = 0;
-        $mode_textAttributes_temp = false;
-        while (true) {
-            if ($reDoCount>2) {
-                error_log("HtmGem: Too many loops, mode == '$mode'");
-                $mode = null;
-                $reDoCount = 0;
-                break;
-            }
-            $reDoCount += 1;
-            $line1 = substr($line, 0, 1); // $line can be modified
-            $line2 = substr($line, 0, 2); // in the meantime.
-            $line3 = substr($line, 0, 3);
-            if (is_null($mode)) {
-                if (empty($line)) {
-                    echo "<p>&nbsp;</p>\n";
-                } elseif ('^^^' == $line3) {
-                    $mode_textAttributes = !$mode_textAttributes;
-                } elseif ('^' == $line1 and !$mode_textAttributes_temp) {
-                    if (preg_match("/^\^\s*(.+)$/", $line, $parts)) {
-                        $line = $parts[1];
-                        $mode_textAttributes_temp = true;
-                    } else {
-                        $mode = "raw";
-                    }
-                    continue;
-                } elseif ("#" == $line1) {
-                    if (preg_match("/^(#{1,3})\s*(.+)/", $line, $sharps)) {
-                        $h_level = strlen($sharps[1]);
-                        $text = $sharps[2];
-                        htmlPrepare($text);
-                        switch ($h_level) {
-                            case 1: echo "<h1>".$text."</h1>\n"; break;
-                            case 2: echo "<h2>".$text."</h2>\n"; break;
-                            case 3: echo "<h3>".$text."</h3>\n"; break;
-                        }
-                    } else {
-                        $mode = "raw";
-                        continue;
-                    }
-                } elseif ("=>" == $line2) {
-                    if (preg_match("/^=>\s*([^\s]+)(?:\s+(.*))?$/", $line, $linkParts)) {
-                        $url_link = $linkParts[1];
-                        $url_label = @$linkParts[2];
-                        preg_match("/^([^:]+):/", $url_link, $matches);
-                        $url_protocol = @$matches[1];
-                        if (empty($url_protocol)) $url_protocol = "local";
-                        if (empty(trim($url_label))) {
-                            $url_label = $url_link;
-                        } else {
-                            // the label is humain-made, apply formatting
-                            htmlPrepare($url_label);
-                            if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($url_label);
-                        }
-                        echo "<p><a class='$url_protocol' href='$url_link'>$url_label</a></p>\n";
-                    } else {
-                        $mode = "raw";
-                        continue;
-                    }
-                } elseif ("```" == $line3) {
-                    preg_match("/^```\s*(.*)$/", $line, $matches);
-                    $alt_text = trim($matches[1]);
-                    if (empty($alt_text)) {
-                        echo "<pre>\n";
-                    } else {
-                        echo "<pre alt='$alt_text' title='$alt_text'>\n";
-                    }
-                    $mode="pre";
-                } elseif (">" == $line1) {
-                    echo "<blockquote>\n";
-                    $mode = "quote";
-                    continue;
-                } elseif ("*" == $line1) {
-                    echo "<ul>\n";
-                    $mode = "ul";
-                    continue;
-                } else {
-                    $mode = "raw";
-                    continue;
-                }
-            } else {
-                if ("raw"==$mode) {
-                    if (empty($line)) {
-                        $line = "&nbsp;";
-                    } else {
-                        htmlPrepare($line);
-                        if ($mode_textAttributes xor $mode_textAttributes_temp)
-                            addTextAttributes($line);
-                    }
-                    echo "<p>$line</p>\n";
-                    $mode = null;
-                } elseif ("pre"==$mode) {
-                    if ("```" == $line3) {
-                        echo "</pre>\n";
-                        $mode = null;
-                    } else {
-                        htmlPrepare($line);
-                        echo $line."\n";
-                    }
-                } elseif ("quote"==$mode) {
-                    if (">" == $line1) {
-                        preg_match("/^>\s*(.*)$/", $line, $quoteParts);
-                        $quote = $quoteParts[1];
-                        if (empty($quote))
-                            echo "<p>&nbsp;</p>\n";
-                        else {
-                            htmlPrepare($quote);
-                            if ($mode_textAttributes xor $mode_textAttributes_temp)
-                                addTextAttributes($line);
-                            echo "<p>".$quote."</p>\n";
-                        }
-                    } else {
-                        echo "</blockquote>\n";
-                        $mode = null;
-                        continue;
-                    }
-                } elseif ("ul"==$mode) {
-                    if ("*" == $line1) {
-                        preg_match("/^\*\s*(.*)$/", $line, $ulParts);
-                        $li = $ulParts[1];
-                        if (empty($li)) {
-                            echo "<li>&nbsp;\n";
-                        } else {
-                            htmlPrepare($li);
-                            if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($li);
-                            echo "<li>".$li."\n";
-                        }
-                    } else {
-                        echo "</ul>\n";
-                        $mode = null;
-                        continue;
-                    }
-                } else {
-                    die("Unexpected mode: $mode!");
-                }
-            }
-            break; // exits the while(true) as no continue occured
-        } // while(true)
-    }
-    $html = ob_get_contents();
-    ob_clean();
-    return $html;
-}
-
-# Gets the page title: the first occurrence with # at the line start
-mb_ereg("#\s*([^\n]+)\n", $fileContents, $matches);
-$page_title = @$matches[1];
-
-###################################### CSS Management
-/**
-* if &style=source displays the source directly and stops.
-* if there's a filename.css besides filename.gmi, use the css and stops.
-* if &style=<NOTHING> then embbed the default style, and stops.
-* if &style=<word not beginngin by slash> then use htmgem/word.css
-* if &style=/… then use the … as as stylesheet.
-**/

+$style = @$_REQUEST['style'];
 if ("source" == $style) {
    $basename = basename($filePath);
    header("Cache-Control: public");
@ -307,9 +74,12 @@ if ("source" == $style) {
    header("Content-Type: text/plain");
    header("Content-Transfer-Encoding: binary");
    header('Content-Length: ' . filesize($filePath));
-    readfile($filePath);
+    echo $fileContents;
    exit();
 } elseif ("pre" == $style) {
+    # Gets the page title: the first occurrence with # at the line start
+    mb_ereg("#\s*([^\n]+)\n", $fileContents, $matches);
+    $page_title = @$matches[1];
    $fileContents = htmlspecialchars($fileContents, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
    echo <<<EOL
 <!DOCTYPE html>
@ -318,52 +88,32 @@ if ("source" == $style) {
 <title>$page_title</title>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 </head>
-<pre>$fileContents</pre>
+<pre>
+$fileContents</pre>
 </body>
 </html>
 EOL;
-} else {
+    exit();
+}
+
+$t = new \htmgem\GemTextTranslate_html($fileContents, $textDecoration);
+if ("none" == $style) {
+    $t->addCss("");
+} elseif ("/" == @$style[0]) {
+    $t->addCss($style);
+} elseif (empty($style)) {
    $parts = pathinfo($filePath);
    $localCss = $parts["filename"].".css";
    $localCssFilePath = $parts["dirname"]."/".$localCss;
    if (file_exists($localCssFilePath)) {
        # Warning, using htmhem.php?url=… will make $localCss not found
        # as the path is relative to htmgem.php and not / !
-        $cssContent = "<link type='text/css' rel='StyleSheet' href='$localCss'>";
-    } else {
-        if (empty($style)) {
-            $cssContent =
-                 "<style>\n"
-                .@file_get_contents("css/htmgem.css")
-                ."</style>\n";
-        } else {
-            if ("none" == $style) {
-                $cssContent = "";
-            } else {
-                if ("/" == $style[0])
-                    $href = $style;
-                else
-                    $href = "/htmgem/css/$style.css";
-                $cssContent = "<link type='text/css' rel='StyleSheet' href='$href'>";
-            }
-        }
+        $t->addCss($localCss);
    }
-    echo <<<EOL
-<!DOCTYPE html>
-<html lang="fr">
-<head>
-<title>$page_title</title>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-$cssContent
-</head>
-<body>
-EOL;
-
-    echo "\n".translateGemToHtml($fileContents);
-    echo "</body>\n</html>\n";
+} else { #TODO: regex check for $style
+    $t->addCss("/htmgem/css/$style.css");
 }

-
-ob_end_flush();
+echo $t->getFullHtml();

 ?>
--- a/lib-htmgem.php
+++ b/lib-htmgem.php
@ -0,0 +1,350 @@
+<?php
+
+namespace htmgem;
+
+mb_internal_encoding("UTF-8");
+mb_regex_encoding("UTF-8");
+
+/**
+ * Parses the gemtext and generates the internal format version
+ * @param str $fileContents the gemtext to parse
+ */
+function gemtextParser($fileContents) {
+    $fileLines = explode("\n", $fileContents);
+    $mode = null;
+    $current = array();
+    foreach ($fileLines as $line) {
+        $reDoCount = 0;
+        $mode_textAttributes_temp = false;
+        while (true) {
+            /* The continue instruction is used to make another turn when there is a transition
+             * between two modes. */
+            if ($reDoCount>1) {
+                die("HtmGem: Too many loops, mode == '$mode'");
+            }
+            $reDoCount += 1;
+            $line1 = substr($line, 0, 1); // $line can be modified
+            $line2 = substr($line, 0, 2); // in the meantime.
+            $line3 = substr($line, 0, 3);
+            if (is_null($mode)) {
+                if ('^^^' == $line3) {
+                    yield array("mode" => "^^^");
+                } elseif ("#" == $line1) {
+                    preg_match("/^(#{1,3})\s*(.+)/", $line, $matches);
+                    yield array("mode" => $matches[1], "title" => trim($matches[2]));
+                } elseif ("=>" == $line2) {
+                    preg_match("/^=>\s*([^\s]+)(?:\s+(.*))?$/", $line, $matches);
+                    yield array("mode" => "=>", "link" => trim($matches[1]), "text" => trim(@$matches[2]));
+                } elseif ("```" == $line3) {
+                    preg_match("/^```\s*(.*)$/", $line, $matches);
+                    $current = array("mode" => "```", "alt" => trim($matches[1]), "texts" => array());
+                    $mode="```";
+                } elseif (">" == $line1) {
+                    preg_match("/^>\s*(.*)$/", $line, $matches);
+                    $current = array("mode" => ">", "texts" => array(trim($matches[1])));
+                    $mode = ">";
+                } elseif ("*" == $line1) {
+                    preg_match("/^\*\s*(.*)$/", $line, $matches);
+                    $current = array("mode" => "*", "texts" => array(trim($matches[1])));
+                    $mode = "*";
+                } else {
+                    // text_line
+                    yield array("mode"=>"", "text" => trim($line));
+                }
+            } else {
+                if ("```"==$mode) {
+                    if ("```" == $line3) {
+                        yield $current;
+                        $current = array();
+                        $mode = null;
+                    } else {
+                        $current["texts"] []= $line; // No trim() as it’s a preformated text!
+                    }
+                } elseif (">"==$mode) {
+                    if (">" == $line1) {
+                        preg_match("/^>\s*(.*)$/", $line, $matches);
+                        $current["texts"] []= trim($matches[1]);
+                    } else {
+                        yield $current;
+                        $current = array();
+                        $mode = null;
+                        continue;
+                    }
+                } elseif ("*"==$mode) {
+                    if ("*" == $line1) {
+                        preg_match("/^\*\s*(.*)$/", $line, $matches);
+                        $current["texts"] []= trim($matches[1]);
+                    } else {
+                        yield $current;
+                        $current = array();
+                        $mode = null;
+                        continue;
+                    }
+                } else {
+                    die("Unexpected mode: $mode!");
+                }
+            }
+            break; // exits the while(true) as no continue occured
+        } // while(true)
+    }// foreach
+    if ($current) yield $current; # File ends before the block.
+} // gemtextParser
+
+
+/**
+ * Translates the internal format into a gemtext.
+ * Uses cases:
+ *
+ * - test suites
+ * - serialisation easier with a text content
+ * - normalization (trimming spaces for instance)
+ */
+class GemtextTranslate_gemtext {
+
+    function __construct($parsedGemtext) {
+        $this->parsedGemtext = $parsedGemtext;
+        $this->translate();
+    }
+
+    protected function translate() {
+        ob_start();
+        foreach ($this->parsedGemtext as $node) {
+            $mode = $node["mode"];
+            switch($mode) {
+                case "":
+                    echo $node["text"]."\n";
+                    break;
+                case "*":
+                    foreach ($node["texts"] as $text) {
+                        echo "* $text\n";
+                    }
+                    break;
+                case "```":
+                    print("```\n");
+                    foreach ($node["texts"] as $text) {
+                        echo "$text\n";
+                    }
+                    print("```\n");
+                    break;
+                case ">":
+                    foreach ($node["texts"] as $text) {
+                        echo "> $text\n";
+                    }
+                    break;
+                case "=>":
+                    $linkText = $node["text"];
+                    if (!empty($linkText)) $linkText = " $linkText";
+                    print("=> ".$node["link"].$linkText."\n");
+                    break;
+                case "#":
+                case "##":
+                case "###":
+                    print("$mode ".$node["title"]."\n");
+                    break;
+                case "^^^":
+                    print("^^^\n");
+                    break;
+                default:
+                    die("Unknown mode: '{$node["mode"]}'\n");
+            }
+        }
+
+        $this->translatedGemtext = ob_get_contents();
+        ob_end_clean();
+    }
+
+    public function __toString() {
+        return $this->translatedGemtext;
+    }
+} // GemtextTranslate_gemtext
+
+
+/**
+ * Translates the internal format to HTML
+ */
+class GemtextTranslate_html {
+
+    protected $cssList = array();
+    protected $pageTitle = "";
+    public $translatedGemtext;
+
+    function __construct($parsedGemtext, $textDecorationEnabled=true) {
+        if (empty($parsedGemtext))
+            $parsedGemtext = "";
+        elseif (is_string($parsedGemtext))
+            // to delete the last empty line, <p>&nbsp;</p> in HTML
+            $parsedGemtext = rtrim($parsedGemtext);
+            // The text must be parsed
+            $parsedGemtext = gemtextParser($parsedGemtext);
+        $this->parsedGemtext = $parsedGemtext;
+        $this->translate($textDecorationEnabled);
+    }
+
+    function addCss($css) {
+        $this->cssList []= $css;
+    }
+
+    const NARROW_NO_BREAK_SPACE = "&#8239;";
+    const DASHES
+        ="‒" # U+2012 Figure Dash
+        ."–" # U+2013 En Dash
+        ."—" # U+2014 Em Dash
+        ."⸺" # U+2E3A Two-Em Dash
+        ."⸻" # U+2E3B Three-Em Dash (Three times larger than a single char)
+    ;
+
+    /**
+     * Replaces markups things like __underlined__ to <u>underlined</u>.
+     * @param $instruction the characters to replace, ex. _
+     * @param $markup the markup to replace to, ex. "u" to get <u>…</u>
+     * @param &$text where to replace.
+     */
+    protected static function markupPreg($instruction, $markup, &$text) {
+        $output = $text;
+
+        # Replaces couples "__word__" into "<i>word</i>".
+        $output = mb_ereg_replace("${instruction}(.+?)${instruction}", "<{$markup}>\\1</{$markup}>", $output);
+
+        # Replaces a remaining __ into "<i>…</i>" to the end of the line.
+        $output = mb_ereg_replace("${instruction}(.+)?", "<{$markup}>\\1</{$markup}>", $output);
+
+        $text = $output;
+    }
+
+    /**
+     * Adds text attributes sucj as underline, bold, … to $line
+     * @param $line the line to process
+     */
+    protected static function addTextDecoration(&$line) {
+        self::markupPreg("__",   "u",      $line);
+        self::markupPreg("\*\*", "strong", $line);
+        self::markupPreg("//",   "em",     $line);
+        self::markupPreg("~~",   "del",    $line);
+    }
+
+    /**
+     * Prepares the raw text to be displayed in HTML environment:
+     * * Escapes the HTML entities yet contained in the Gemtext.
+     * * Puts thin unbrakable spaces before some characters.
+     * @param $text1, $text2 texts to process
+     */
+    protected static function htmlPrepare(&$text) {
+        if (empty($text)) {
+            $text = "&nbsp;";
+        } else {
+            $text = htmlspecialchars($text, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
+            $text = mb_ereg_replace("\ ([?!:;»€$])", self::NARROW_NO_BREAK_SPACE."\\1", $text);
+            $text = mb_ereg_replace("([«])\ ", "\\1".self::NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable
+
+            # Warning: using a monospace font editor may not display dashes as they should be!
+            # Adds no-break spaces to stick the (EM/EN dashes) to words : aaaaaa – bb – ccccc ==> aaaaaa –$bb$– ccccc
+            $text = mb_ereg_replace("([".self::DASHES."]) ([^".self::DASHES.".]+) ([".self::DASHES."])", "\\1".self::NARROW_NO_BREAK_SPACE."\\2".self::NARROW_NO_BREAK_SPACE."\\3", $text);
+
+            # Adds no-break space to stick the (EM/EN dashes) to words : aaaaaa – bb. ==> aaaaaa –$bb.
+            $text = mb_ereg_replace("([—–]) ([^.]+)\.", "\\1".self::NARROW_NO_BREAK_SPACE."\\2.", $text);
+        }
+    }
+
+    public function translate($textDecoration=true) {
+        ob_start();
+        foreach ($this->parsedGemtext as $node) {
+            $mode = $node["mode"];
+            switch($mode) {
+                case "":
+                    $text = $node["text"];
+                    self::htmlPrepare($text);
+                    if ($textDecoration) self::addTextDecoration($text);
+                    echo "<p>$text</p>\n";
+                    break;
+                case "*":
+                    echo "<ul>\n";
+                    foreach ($node["texts"] as $text) {
+                        self::htmlPrepare($text);
+                        if ($textDecoration) self::addTextDecoration($text);
+                        print("<li>$text\n");
+                    }
+                    echo "</ul>\n";
+                    break;
+                case "```":
+                    $text = implode("\n", $node["texts"]);
+                    self::htmlPrepare($text);
+                    echo "<pre>\n$text\n</pre>\n";
+                    break;
+                case ">":
+                    $text = implode("\n", $node["texts"]);
+                    self::htmlPrepare($text);
+                    if ($textDecoration) self::addTextDecoration($text);
+                    echo "<blockquote>\n$text\n</blockquote>\n";
+                    break;
+                case "=>":
+                    $link = $node["link"];
+                    $linkText = $node["text"];
+                    if (empty($linkText)) {
+                        $linkText = $link;
+                        self::htmlPrepare($linkText);
+                    } else {
+                        self::htmlPrepare($linkText);
+                        if ($textDecoration) self::addTextDecoration($linkText);
+                    }
+                    preg_match("/^([^:]+):/", $link, $matches);
+                    $protocol = @$matches[1];
+                    if (empty($protocol)) $protocol = "local";
+                    echo "<p><a class='$protocol' href='$link'>$linkText</a></p>\n";
+                    break;
+                case "#":
+                    $title = $node["title"];
+                    self::htmlPrepare($title);
+                    if (empty($this->pageTitle)) $this->pageTitle = $title;
+                    echo "<h1>$title</h1>\n";
+                    break;
+                case "##":
+                    $title = $node["title"];
+                    self::htmlPrepare($title);
+                    echo "<h2>$title</h2>\n";
+                    break;
+                case "###":
+                    $title = $node["title"];
+                    self::htmlPrepare($title);
+                    echo "<h3>$title</h3>\n";
+                    break;
+                case "^^^":
+                    $this->textDecorationEnabled = !$this->textDecorationEnabled;
+                    break;
+                default:
+                    die("Unknown mode: '{$node["mode"]}'\n");
+            }
+        }
+
+        $this->translatedGemtext = ob_get_contents();
+        ob_end_clean();
+    }
+
+    function getFullHtml() {
+        if (!$this->cssList)
+            $css = array("/htmgem/css/htmgem.css");
+        else
+            $css = $this->cssList;
+        echo <<<EOL
+<!DOCTYPE html>
+<html>
+<head>
+<title>{$this->pageTitle}</title>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+EOL;
+        foreach ($css as $c) {
+            echo "<link type='text/css' rel='StyleSheet' href='$c'>\n";
+        }
+        echo <<<EOL
+</head>
+<body>\n
+EOL;
+        echo $this->translatedGemtext;
+        echo "</body>\n</html>\n";
+    }
+
+    public function __toString() {
+        return $this->translatedGemtext;
+    }
+} // GemTextTranslate_html
+
+?>
--- a/tutogemtext.gmi
+++ b/tutogemtext.gmi
@ -53,9 +53,11 @@ Il ne peut exister qu’un lien par ligne. Et la ligne est dédiée à ça ! Voi

 La décoration du texte ne fait pas partie des spécifications de Gemini.

-^ Cette **ligne** utilise la //décoration du texte// qui peut ~~barrer~~ ou __souligner__ des mots.
+Cette **ligne** utilise la //décoration du texte// qui peut ~~barrer~~ ou __souligner__ des mots.
 Cette **ligne** utilise la //décoration du texte// qui peut ~~barrer~~ ou __souligner__ des mots.

+On peut désactiver et activer la décoration du texte avec **^^^** sur une ligne.
+
 Et voilà ! Vous savez tout ce qui est à savoir 🥳 Les spécifications complètes sont sur le site de Gemini :
 => https://gemini.circumlunar.space/docs/specification.gmi