"dir1" --> "/dir1" * --> "dir2" --> "/dir1/dir2" * --> "page.gmi" --> "/dir2/page.gmi" */ function split_path_links($path, $prefix="") { $parts = array_filter(explode("/", $path), 'strlen'); if (empty($parts)) return array(); if ("/"==$path[0]) $stack = "/"; else $stack = ""; $output = array(); $slash = ""; foreach ($parts as $part) { $stack .= $slash.$part; $output[$part] = $prefix.$stack; $slash = "/"; } return $output; } /** * Parses the gemtext and generates the internal format version * @param str $fileContents the gemtext to parse */ function gemtextParser($fileContents) { if (empty($fileContents)) return array(); $fileContents = rtrim($fileContents); // removes last empty line $fileLines = mb_split("\n|\r\n?", $fileContents); // Unix, Mac, Windows line feeds $mode = null; $current = array(); foreach ($fileLines as $line) { $reDoCount = 0; $mode_textAttributes_temp = false; while (true) { /* The continue instruction is used to make another turn when there is a transition * between two modes. */ if ($reDoCount>1) { die("HtmGem: Too many loops, mode == '$mode'"); } $reDoCount += 1; $line1 = substr($line, 0, 1); // $line can be modified $line2 = substr($line, 0, 2); // in the meantime. $line3 = substr($line, 0, 3); if (is_null($mode)) { if ('^^^' == $line3) { yield array("mode" => "^^^"); } elseif ("#" == $line1) { preg_match("/^(#{1,3})\s*(.+)?/", $line, $matches); yield array("mode" => $matches[1], "title" => trim($matches[2]??"")); } elseif ("=>" == $line2) { preg_match("/^=>\s*([^\s]+)(?:\s+(.*))?$/", $line, $matches); yield array("mode" => "=>", "link" => trim($matches[1]??""), "text" => trim($matches[2]??"")); } elseif ("```" == $line3) { preg_match("/^```\s*(.*)$/", $line, $matches); $current = array("mode" => "```", "alt" => trim($matches[1]), "texts" => array()); $mode="```"; } elseif (">" == $line1) { preg_match("/^>\s*(.*)$/", $line, $matches); $current = array("mode" => ">", "texts" => array(trim($matches[1]))); $mode = ">"; } elseif ("*" == $line1) { preg_match("/^\*\s*(.*)$/", $line, $matches); $current = array("mode" => "*", "texts" => array(trim($matches[1]))); $mode = "*"; } else { // text_line yield array("mode"=>"", "text" => rtrim($line)); } } else { if ("```"==$mode) { if ("```" == $line3) { yield $current; $current = array(); $mode = null; } else { $current["texts"] []= rtrim($line); // No ltrim() as it’s a preformated text! } } elseif (">"==$mode) { if (">" == $line1) { preg_match("/^>\s*(.*)$/", $line, $matches); $current["texts"] []= trim($matches[1]); } else { yield $current; $current = array(); $mode = null; continue; } } elseif ("*"==$mode) { if ("*" == $line1) { preg_match("/^\*\s*(.*)$/", $line, $matches); $current["texts"] []= trim($matches[1]); } else { yield $current; $current = array(); $mode = null; continue; } } else { die("Unexpected mode: $mode!"); } } break; // exits the while(true) as no continue occured } // while(true) }// foreach if ($current) yield $current; # File ends before the block. } // gemtextParser /** * Translates the internal format into a gemtext. * Uses cases: * * - test suites * - serialisation easier with a text content * - normalization (trimming spaces for instance) */ class GemtextTranslate_gemtext { function __construct($parsedGemtext) { if (empty($parsedGemtext)) $parsedGemtext = ""; // to delete the last empty lines $parsedGemtext = rtrim($parsedGemtext); // The text must be parsed $this->parsedGemtext = gemtextParser($parsedGemtext); $this->translate(); } protected function translate() { $output = ""; foreach ($this->parsedGemtext as $node) { $mode = $node["mode"]; switch($mode) { case "": $output .= $node["text"]."\n"; break; case "*": foreach ($node["texts"] as $text) { $output .= "* $text\n"; } break; case "```": $alt = $node["alt"]; if (empty($alt)) $output .= "```\n"; else $output .= "``` $alt\n"; foreach ($node["texts"] as $text) { $output .= "$text\n"; } $output .= "```\n"; break; case ">": foreach ($node["texts"] as $text) { if (empty($text)) $output .= ">\n"; else $output .= "> $text\n"; } break; case "=>": $linkText = $node["text"]; $link = $node["link"]; if (!empty($linkText)) $linkText = " $linkText"; if (!empty($link)) $link = " $link"; $output .= "=>".$link.$linkText."\n"; break; case "#": case "##": case "###": $output .= "$mode ".$node["title"]."\n"; break; case "^^^": $output .= "^^^\n"; break; default: die("Unknown mode: '{$node["mode"]}'\n"); } } $this->translatedGemtext = $output; } public function __toString() { return $this->translatedGemtext; } } // GemtextTranslate_gemtext /** * Translates the internal format to HTML */ class GemtextTranslate_html { protected $cssList = array(); protected $pageTitle = ""; public $translatedGemtext; /** * @param $parsedGemtext the gemtext internal format * @param $textDecoration bool to interpret or not the text decoration * @param $urlPrefix the prefix to prepend if the URL rewriting is not on * @param $currentPageDir the current directory, to be used without URL rewriting */ function __construct($parsedGemtext, $textDecoration=true, $urlPrefix=null, $currentPageDir=null) { $this->urlPrefix = $urlPrefix; $this->currentPageDir = $currentPageDir; if (empty($parsedGemtext)) $parsedGemtext = ""; // to delete the last empty lines $parsedGemtext = rtrim($parsedGemtext); // The text must be parsed $parsedGemtext = gemtextParser($parsedGemtext); $this->parsedGemtext = $parsedGemtext; $this->translate($textDecoration); } function addCss($css) { $this->cssList []= $css; } function getCss() { return $this->cssList; } function getTitle() { return $this->pageTitle; } const NARROW_NO_BREAK_SPACE = " "; const DASHES ="‒" # U+2012 Figure Dash ."–" # U+2013 En Dash ."—" # U+2014 Em Dash ."⸺" # U+2E3A Two-Em Dash ."⸻" # U+2E3B Three-Em Dash (Three times larger than a single char) ; /** * Replaces markups things like __underlined__ to underlined. * @param $instruction the characters to replace, ex. _ * @param $markup the markup to replace to, ex. "u" to get * @param &$text where to replace. */ protected static function markupPreg($instruction, $markup, &$text) { $output = $text; # Replaces couples "__word__" into "word". $output = mb_ereg_replace("${instruction}(.+?)${instruction}", "<{$markup}>\\1", $output); # Replaces a remaining __ into "" to the end of the line. $output = mb_ereg_replace("${instruction}(.+)?", "<{$markup}>\\1", $output); $text = $output; } /** * Adds text attributes sucj as underline, bold, … to $line * @param $line the line to process */ protected static function addTextDecoration(&$line) { self::markupPreg("__", "u", $line); self::markupPreg("\*\*", "strong", $line); self::markupPreg("//", "em", $line); self::markupPreg("~~", "del", $line); } /** * Prepares the raw text to be displayed in HTML environment: * * Escapes the HTML entities yet contained in the Gemtext. * * Puts thin unbrakable spaces before some characters. * @param $text1, $text2 texts to process */ protected static function htmlPrepare(&$text) { if (empty($text)) { $text = " "; } else { $text = htmlspecialchars($text, ENT_HTML5|ENT_QUOTES, "UTF-8", true); $text = mb_ereg_replace("\ ([?!:;»€$])", self::NARROW_NO_BREAK_SPACE."\\1", $text); $text = mb_ereg_replace("([«])\ ", "\\1".self::NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable # Warning: using a monospace font editor may not display dashes as they should be! # Adds no-break spaces to stick the (EM/EN dashes) to words : aaaaaa – bb – ccccc ==> aaaaaa –$bb$– ccccc $text = mb_ereg_replace("([".self::DASHES."]) ([^".self::DASHES.".]+) ([".self::DASHES."])", "\\1".self::NARROW_NO_BREAK_SPACE."\\2".self::NARROW_NO_BREAK_SPACE."\\3", $text); # Adds no-break space to stick the (EM/EN dashes) to words : aaaaaa – bb. ==> aaaaaa –$bb. $text = mb_ereg_replace("([—–]) ([^.]+)\.", "\\1".self::NARROW_NO_BREAK_SPACE."\\2.", $text); } } protected static function spacesCompress(&$text) { # Replaces several spaces (0x20) by only one $text = preg_replace("/ +/", " ", $text); } public function translate($textDecoration=true) { $output = ""; foreach ($this->parsedGemtext as $node) { $mode = $node["mode"]; switch($mode) { case "": $text = $node["text"]; self::spacesCompress($text); self::htmlPrepare($text); if ($textDecoration) self::addTextDecoration($text); $output .= "

$text

\n"; break; case "*": $output .= "\n"; break; case "```": $text = implode("\n", $node["texts"]); self::htmlPrepare($text); $alt = $node["alt"]; $output .= "
\n$text\n
\n"; break; case ">": $output .= "
\n"; foreach ($node["texts"] as $text) { self::spacesCompress($text); self::htmlPrepare($text); if ($textDecoration) self::addTextDecoration($text); $output .= "

$text

\n"; } $output .= "
\n"; break; case "=>": $link = $node["link"]; $linkText = $node["text"]; if (empty($linkText)) { $linkText = $link; self::htmlPrepare($linkText); } else { self::spacesCompress($linkText); // Don't double encode, just escapes quotes, "<" and ">". // So "I'm>" becomes "I'>". The & remains untouched. $link = htmlspecialchars($link, ENT_HTML5|ENT_QUOTES, "UTF-8", false); self::htmlPrepare($linkText); if ($textDecoration) self::addTextDecoration($linkText); } preg_match("/^([^:]+):/", $link, $matches); $protocol = @$matches[1]??"local"; if ("local"==$protocol) { if (!is_null($this->urlPrefix)) { // No URL rewriting $link = $this->currentPageDir."/".$link; $link = resolve_path($link); $link = $this->urlPrefix.$link; } $newWindow = ""; } else { $newWindow = "target='_blank' "; } $output .= "

$linkText

\n"; break; case "#": $title = $node["title"]; self::spacesCompress($linkText); self::htmlPrepare($title); if (empty($this->pageTitle)) $this->pageTitle = $title; $output .= "

$title

\n"; break; case "##": $title = $node["title"]; self::spacesCompress($linkText); self::htmlPrepare($title); $output .= "

$title

\n"; break; case "###": $title = $node["title"]; self::spacesCompress($linkText); self::htmlPrepare($title); $output .= "

$title

\n"; break; case "^^^": $textDecoration = !$textDecoration; break; default: die("Unknown mode: '{$node["mode"]}'\n"); } } $this->translatedGemtext = $output; } } // GemTextTranslate_html ?>