2021-03-16 13:49:11 +01:00
|
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
namespace htmgem;
|
|
|
|
|
|
|
|
|
|
mb_internal_encoding("UTF-8");
|
|
|
|
|
mb_regex_encoding("UTF-8");
|
|
|
|
|
|
|
|
|
|
/**
|
2021-03-16 20:54:10 +01:00
|
|
|
|
* Parses the gemtext and generates the internal format version
|
|
|
|
|
* @param str $fileContents the gemtext to parse
|
2021-03-16 13:49:11 +01:00
|
|
|
|
*/
|
2021-03-16 20:54:10 +01:00
|
|
|
|
function gemtextParser($fileContents) {
|
|
|
|
|
$fileLines = explode("\n", $fileContents);
|
2021-03-16 13:49:11 +01:00
|
|
|
|
$mode = null;
|
2021-03-16 20:54:10 +01:00
|
|
|
|
$current = array();
|
2021-03-16 13:49:11 +01:00
|
|
|
|
foreach ($fileLines as $line) {
|
|
|
|
|
$reDoCount = 0;
|
|
|
|
|
$mode_textAttributes_temp = false;
|
|
|
|
|
while (true) {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
/* The continue instruction is used to make another turn when there is a transition
|
|
|
|
|
* between two modes. */
|
|
|
|
|
if ($reDoCount>1) {
|
|
|
|
|
die("HtmGem: Too many loops, mode == '$mode'");
|
2021-03-16 13:49:11 +01:00
|
|
|
|
}
|
|
|
|
|
$reDoCount += 1;
|
|
|
|
|
$line1 = substr($line, 0, 1); // $line can be modified
|
|
|
|
|
$line2 = substr($line, 0, 2); // in the meantime.
|
|
|
|
|
$line3 = substr($line, 0, 3);
|
|
|
|
|
if (is_null($mode)) {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
if ('^^^' == $line3) {
|
|
|
|
|
yield array("mode" => "^^^");
|
2021-03-16 13:49:11 +01:00
|
|
|
|
} elseif ("#" == $line1) {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
preg_match("/^(#{1,3})\s*(.+)/", $line, $matches);
|
|
|
|
|
yield array("mode" => $matches[1], "title" => trim($matches[2]));
|
2021-03-16 13:49:11 +01:00
|
|
|
|
} elseif ("=>" == $line2) {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
preg_match("/^=>\s*([^\s]+)(?:\s+(.*))?$/", $line, $matches);
|
|
|
|
|
yield array("mode" => "=>", "link" => trim($matches[1]), "text" => trim(@$matches[2]));
|
2021-03-16 13:49:11 +01:00
|
|
|
|
} elseif ("```" == $line3) {
|
|
|
|
|
preg_match("/^```\s*(.*)$/", $line, $matches);
|
2021-03-16 20:54:10 +01:00
|
|
|
|
$current = array("mode" => "```", "alt" => trim($matches[1]), "texts" => array());
|
|
|
|
|
$mode="```";
|
2021-03-16 13:49:11 +01:00
|
|
|
|
} elseif (">" == $line1) {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
preg_match("/^>\s*(.*)$/", $line, $matches);
|
|
|
|
|
$current = array("mode" => ">", "texts" => array(trim($matches[1])));
|
|
|
|
|
$mode = ">";
|
2021-03-16 13:49:11 +01:00
|
|
|
|
} elseif ("*" == $line1) {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
preg_match("/^\*\s*(.*)$/", $line, $matches);
|
|
|
|
|
$current = array("mode" => "*", "texts" => array(trim($matches[1])));
|
|
|
|
|
$mode = "*";
|
2021-03-16 13:49:11 +01:00
|
|
|
|
} else {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
// text_line
|
|
|
|
|
yield array("mode"=>"", "text" => trim($line));
|
2021-03-16 13:49:11 +01:00
|
|
|
|
}
|
|
|
|
|
} else {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
if ("```"==$mode) {
|
2021-03-16 13:49:11 +01:00
|
|
|
|
if ("```" == $line3) {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
yield $current;
|
|
|
|
|
$current = array();
|
2021-03-16 13:49:11 +01:00
|
|
|
|
$mode = null;
|
|
|
|
|
} else {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
$current["texts"] []= $line; // No trim() as it’s a preformated text!
|
2021-03-16 13:49:11 +01:00
|
|
|
|
}
|
2021-03-16 20:54:10 +01:00
|
|
|
|
} elseif (">"==$mode) {
|
2021-03-16 13:49:11 +01:00
|
|
|
|
if (">" == $line1) {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
preg_match("/^>\s*(.*)$/", $line, $matches);
|
|
|
|
|
$current["texts"] []= trim($matches[1]);
|
2021-03-16 13:49:11 +01:00
|
|
|
|
} else {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
yield $current;
|
|
|
|
|
$current = array();
|
2021-03-16 13:49:11 +01:00
|
|
|
|
$mode = null;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2021-03-16 20:54:10 +01:00
|
|
|
|
} elseif ("*"==$mode) {
|
2021-03-16 13:49:11 +01:00
|
|
|
|
if ("*" == $line1) {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
preg_match("/^\*\s*(.*)$/", $line, $matches);
|
|
|
|
|
$current["texts"] []= trim($matches[1]);
|
2021-03-16 13:49:11 +01:00
|
|
|
|
} else {
|
2021-03-16 20:54:10 +01:00
|
|
|
|
yield $current;
|
|
|
|
|
$current = array();
|
2021-03-16 13:49:11 +01:00
|
|
|
|
$mode = null;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
die("Unexpected mode: $mode!");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break; // exits the while(true) as no continue occured
|
|
|
|
|
} // while(true)
|
2021-03-16 20:54:10 +01:00
|
|
|
|
}// foreach
|
|
|
|
|
if ($current) yield $current; # File ends before the block.
|
|
|
|
|
} // gemtextParser
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Translates the internal format into a gemtext.
|
|
|
|
|
* Uses cases:
|
|
|
|
|
*
|
|
|
|
|
* - test suites
|
|
|
|
|
* - serialisation easier with a text content
|
|
|
|
|
* - normalization (trimming spaces for instance)
|
|
|
|
|
*/
|
|
|
|
|
class GemtextTranslate_gemtext {
|
|
|
|
|
|
|
|
|
|
function __construct($parsedGemtext) {
|
|
|
|
|
$this->parsedGemtext = $parsedGemtext;
|
|
|
|
|
$this->translate();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected function translate() {
|
|
|
|
|
ob_start();
|
|
|
|
|
foreach ($this->parsedGemtext as $node) {
|
|
|
|
|
$mode = $node["mode"];
|
|
|
|
|
switch($mode) {
|
|
|
|
|
case "":
|
|
|
|
|
echo $node["text"]."\n";
|
|
|
|
|
break;
|
|
|
|
|
case "*":
|
|
|
|
|
foreach ($node["texts"] as $text) {
|
|
|
|
|
echo "* $text\n";
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case "```":
|
|
|
|
|
print("```\n");
|
|
|
|
|
foreach ($node["texts"] as $text) {
|
|
|
|
|
echo "$text\n";
|
|
|
|
|
}
|
|
|
|
|
print("```\n");
|
|
|
|
|
break;
|
|
|
|
|
case ">":
|
|
|
|
|
foreach ($node["texts"] as $text) {
|
|
|
|
|
echo "> $text\n";
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case "=>":
|
|
|
|
|
$linkText = $node["text"];
|
|
|
|
|
if (!empty($linkText)) $linkText = " $linkText";
|
|
|
|
|
print("=> ".$node["link"].$linkText."\n");
|
|
|
|
|
break;
|
|
|
|
|
case "#":
|
|
|
|
|
case "##":
|
|
|
|
|
case "###":
|
|
|
|
|
print("$mode ".$node["title"]."\n");
|
|
|
|
|
break;
|
|
|
|
|
case "^^^":
|
|
|
|
|
print("^^^\n");
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
die("Unknown mode: '{$node["mode"]}'\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$this->translatedGemtext = ob_get_contents();
|
|
|
|
|
ob_end_clean();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function __toString() {
|
|
|
|
|
return $this->translatedGemtext;
|
|
|
|
|
}
|
|
|
|
|
} // GemtextTranslate_gemtext
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Translates the internal format to HTML
|
|
|
|
|
*/
|
|
|
|
|
class GemtextTranslate_html {
|
|
|
|
|
|
|
|
|
|
protected $cssList = array();
|
|
|
|
|
protected $pageTitle = "";
|
|
|
|
|
public $translatedGemtext;
|
|
|
|
|
|
|
|
|
|
function __construct($parsedGemtext, $textDecorationEnabled=true) {
|
|
|
|
|
if (empty($parsedGemtext))
|
2021-03-18 14:43:02 +01:00
|
|
|
|
$parsedGemtext = "";
|
2021-03-16 20:54:10 +01:00
|
|
|
|
elseif (is_string($parsedGemtext))
|
|
|
|
|
// to delete the last empty line, <p> </p> in HTML
|
|
|
|
|
$parsedGemtext = rtrim($parsedGemtext);
|
|
|
|
|
// The text must be parsed
|
|
|
|
|
$parsedGemtext = gemtextParser($parsedGemtext);
|
|
|
|
|
$this->parsedGemtext = $parsedGemtext;
|
|
|
|
|
$this->translate($textDecorationEnabled);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function addCss($css) {
|
|
|
|
|
$this->cssList []= $css;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const NARROW_NO_BREAK_SPACE = " ";
|
|
|
|
|
const DASHES
|
|
|
|
|
="‒" # U+2012 Figure Dash
|
|
|
|
|
."–" # U+2013 En Dash
|
|
|
|
|
."—" # U+2014 Em Dash
|
|
|
|
|
."⸺" # U+2E3A Two-Em Dash
|
|
|
|
|
."⸻" # U+2E3B Three-Em Dash (Three times larger than a single char)
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Replaces markups things like __underlined__ to <u>underlined</u>.
|
|
|
|
|
* @param $instruction the characters to replace, ex. _
|
|
|
|
|
* @param $markup the markup to replace to, ex. "u" to get <u>…</u>
|
|
|
|
|
* @param &$text where to replace.
|
|
|
|
|
*/
|
|
|
|
|
protected static function markupPreg($instruction, $markup, &$text) {
|
|
|
|
|
$output = $text;
|
|
|
|
|
|
|
|
|
|
# Replaces couples "__word__" into "<i>word</i>".
|
|
|
|
|
$output = mb_ereg_replace("${instruction}(.+?)${instruction}", "<{$markup}>\\1</{$markup}>", $output);
|
|
|
|
|
|
|
|
|
|
# Replaces a remaining __ into "<i>…</i>" to the end of the line.
|
|
|
|
|
$output = mb_ereg_replace("${instruction}(.+)?", "<{$markup}>\\1</{$markup}>", $output);
|
|
|
|
|
|
|
|
|
|
$text = $output;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Adds text attributes sucj as underline, bold, … to $line
|
|
|
|
|
* @param $line the line to process
|
|
|
|
|
*/
|
|
|
|
|
protected static function addTextDecoration(&$line) {
|
|
|
|
|
self::markupPreg("__", "u", $line);
|
|
|
|
|
self::markupPreg("\*\*", "strong", $line);
|
|
|
|
|
self::markupPreg("//", "em", $line);
|
|
|
|
|
self::markupPreg("~~", "del", $line);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Prepares the raw text to be displayed in HTML environment:
|
|
|
|
|
* * Escapes the HTML entities yet contained in the Gemtext.
|
|
|
|
|
* * Puts thin unbrakable spaces before some characters.
|
|
|
|
|
* @param $text1, $text2 texts to process
|
|
|
|
|
*/
|
|
|
|
|
protected static function htmlPrepare(&$text) {
|
|
|
|
|
if (empty($text)) {
|
|
|
|
|
$text = " ";
|
|
|
|
|
} else {
|
|
|
|
|
$text = htmlspecialchars($text, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
|
|
|
|
|
$text = mb_ereg_replace("\ ([?!:;»€$])", self::NARROW_NO_BREAK_SPACE."\\1", $text);
|
|
|
|
|
$text = mb_ereg_replace("([«])\ ", "\\1".self::NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable
|
|
|
|
|
|
|
|
|
|
# Warning: using a monospace font editor may not display dashes as they should be!
|
|
|
|
|
# Adds no-break spaces to stick the (EM/EN dashes) to words : aaaaaa – bb – ccccc ==> aaaaaa –$bb$– ccccc
|
|
|
|
|
$text = mb_ereg_replace("([".self::DASHES."]) ([^".self::DASHES.".]+) ([".self::DASHES."])", "\\1".self::NARROW_NO_BREAK_SPACE."\\2".self::NARROW_NO_BREAK_SPACE."\\3", $text);
|
|
|
|
|
|
|
|
|
|
# Adds no-break space to stick the (EM/EN dashes) to words : aaaaaa – bb. ==> aaaaaa –$bb.
|
|
|
|
|
$text = mb_ereg_replace("([—–]) ([^.]+)\.", "\\1".self::NARROW_NO_BREAK_SPACE."\\2.", $text);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function translate($textDecoration=true) {
|
|
|
|
|
ob_start();
|
|
|
|
|
foreach ($this->parsedGemtext as $node) {
|
|
|
|
|
$mode = $node["mode"];
|
|
|
|
|
switch($mode) {
|
|
|
|
|
case "":
|
|
|
|
|
$text = $node["text"];
|
|
|
|
|
self::htmlPrepare($text);
|
|
|
|
|
if ($textDecoration) self::addTextDecoration($text);
|
|
|
|
|
echo "<p>$text</p>\n";
|
|
|
|
|
break;
|
|
|
|
|
case "*":
|
|
|
|
|
echo "<ul>\n";
|
|
|
|
|
foreach ($node["texts"] as $text) {
|
|
|
|
|
self::htmlPrepare($text);
|
|
|
|
|
if ($textDecoration) self::addTextDecoration($text);
|
|
|
|
|
print("<li>$text\n");
|
|
|
|
|
}
|
|
|
|
|
echo "</ul>\n";
|
|
|
|
|
break;
|
|
|
|
|
case "```":
|
|
|
|
|
$text = implode("\n", $node["texts"]);
|
|
|
|
|
self::htmlPrepare($text);
|
|
|
|
|
echo "<pre>\n$text\n</pre>\n";
|
|
|
|
|
break;
|
|
|
|
|
case ">":
|
|
|
|
|
$text = implode("\n", $node["texts"]);
|
|
|
|
|
self::htmlPrepare($text);
|
|
|
|
|
if ($textDecoration) self::addTextDecoration($text);
|
|
|
|
|
echo "<blockquote>\n$text\n</blockquote>\n";
|
|
|
|
|
break;
|
|
|
|
|
case "=>":
|
|
|
|
|
$link = $node["link"];
|
|
|
|
|
$linkText = $node["text"];
|
|
|
|
|
if (empty($linkText)) {
|
|
|
|
|
$linkText = $link;
|
|
|
|
|
self::htmlPrepare($linkText);
|
|
|
|
|
} else {
|
|
|
|
|
self::htmlPrepare($linkText);
|
|
|
|
|
if ($textDecoration) self::addTextDecoration($linkText);
|
|
|
|
|
}
|
|
|
|
|
preg_match("/^([^:]+):/", $link, $matches);
|
|
|
|
|
$protocol = @$matches[1];
|
|
|
|
|
if (empty($protocol)) $protocol = "local";
|
|
|
|
|
echo "<p><a class='$protocol' href='$link'>$linkText</a></p>\n";
|
|
|
|
|
break;
|
|
|
|
|
case "#":
|
|
|
|
|
$title = $node["title"];
|
|
|
|
|
self::htmlPrepare($title);
|
|
|
|
|
if (empty($this->pageTitle)) $this->pageTitle = $title;
|
|
|
|
|
echo "<h1>$title</h1>\n";
|
|
|
|
|
break;
|
|
|
|
|
case "##":
|
|
|
|
|
$title = $node["title"];
|
|
|
|
|
self::htmlPrepare($title);
|
|
|
|
|
echo "<h2>$title</h2>\n";
|
|
|
|
|
break;
|
|
|
|
|
case "###":
|
|
|
|
|
$title = $node["title"];
|
|
|
|
|
self::htmlPrepare($title);
|
|
|
|
|
echo "<h3>$title</h3>\n";
|
|
|
|
|
break;
|
|
|
|
|
case "^^^":
|
|
|
|
|
$this->textDecorationEnabled = !$this->textDecorationEnabled;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
die("Unknown mode: '{$node["mode"]}'\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$this->translatedGemtext = ob_get_contents();
|
|
|
|
|
ob_end_clean();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function getFullHtml() {
|
|
|
|
|
if (!$this->cssList)
|
|
|
|
|
$css = array("/htmgem/css/htmgem.css");
|
|
|
|
|
else
|
|
|
|
|
$css = $this->cssList;
|
|
|
|
|
echo <<<EOL
|
|
|
|
|
<!DOCTYPE html>
|
|
|
|
|
<html>
|
|
|
|
|
<head>
|
|
|
|
|
<title>{$this->pageTitle}</title>
|
|
|
|
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
|
|
|
EOL;
|
|
|
|
|
foreach ($css as $c) {
|
|
|
|
|
echo "<link type='text/css' rel='StyleSheet' href='$c'>\n";
|
|
|
|
|
}
|
|
|
|
|
echo <<<EOL
|
|
|
|
|
</head>
|
|
|
|
|
<body>\n
|
|
|
|
|
EOL;
|
|
|
|
|
echo $this->translatedGemtext;
|
|
|
|
|
echo "</body>\n</html>\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function __toString() {
|
|
|
|
|
return $this->translatedGemtext;
|
2021-03-16 13:49:11 +01:00
|
|
|
|
}
|
2021-03-16 20:54:10 +01:00
|
|
|
|
} // GemTextTranslate_html
|
2021-03-16 13:49:11 +01:00
|
|
|
|
|
|
|
|
|
?>
|