mirror of
https://tildegit.org/sbgodin/HtmGem.git
synced 2023-08-25 13:53:12 +02:00
v1.2.0
* Removes "^" to disable text decoration line-wise. * CSS is no longer incorporated in the HTML page. * Perform sanity checks against unauthorized file access. * Properly close tags when the page exists in a non-null mode. * Split HTML generation in two: parsing and translating. * Create classes to handle gemtext parsing and translating. * Create class to generate back gemtext (for future test cases). * Fix: 404 doesn't occur for an empty file. * Page 404 fully generated by HtmGem itself.
This commit is contained in:
commit
cef2417f91
@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.
|
|||||||
=> https://keepachangelog.com/en/1.0.0/ The format is based on keep a Changelog.
|
=> https://keepachangelog.com/en/1.0.0/ The format is based on keep a Changelog.
|
||||||
=> https://semver.org/spec/v2.0.0.html And this project adheres to Semantic Versioning.
|
=> https://semver.org/spec/v2.0.0.html And this project adheres to Semantic Versioning.
|
||||||
|
|
||||||
|
## [1.2.0] - 2021-03-19
|
||||||
|
* Removes "^" to disable text decoration line-wise.
|
||||||
|
* CSS is no longer incorporated in the HTML page.
|
||||||
|
* Perform sanity checks against unauthorized file access.
|
||||||
|
* Properly close tags when the page exists in a non-null mode.
|
||||||
|
* Split HTML generation in two: parsing and translating.
|
||||||
|
* Create classes to handle gemtext parsing and translating.
|
||||||
|
* Create class to generate back gemtext (for future test cases).
|
||||||
|
* Fix: 404 doesn't occur for an empty file.
|
||||||
|
* Page 404 fully generated by HtmGem itself.
|
||||||
|
|
||||||
## [1.1.0] - 2021-03-14
|
## [1.1.0] - 2021-03-14
|
||||||
* File download when using "source" as a style.
|
* File download when using "source" as a style.
|
||||||
* Improves the regex.
|
* Improves the regex.
|
||||||
|
@ -118,8 +118,7 @@ La décoration du texte, qui interprête le **gras** par exemple, ne fait pas pa
|
|||||||
### Désactiver la décoration du texte
|
### Désactiver la décoration du texte
|
||||||
|
|
||||||
On peut :
|
On peut :
|
||||||
* commencer la ligne par **^**,
|
* désactiver et activer la décoration du texte avec une ligne **^^^**,
|
||||||
* faire un bloc non décoré avec **^^^**,
|
|
||||||
* ajouter ce qui suit à la **réécriture** d’URL :
|
* ajouter ce qui suit à la **réécriture** d’URL :
|
||||||
> &textDecoration=0
|
> &textDecoration=0
|
||||||
|
|
||||||
|
366
index.php
366
index.php
@ -1,305 +1,72 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
mb_internal_encoding("UTF-8");
|
require_once "lib-htmgem.php";
|
||||||
mb_regex_encoding("UTF-8");
|
|
||||||
|
|
||||||
define("NARROW_NO_BREAK_SPACE", " ");
|
# The url argument is always absolute compared to the document root.
|
||||||
define("DASHES"
|
|
||||||
,"‒" # U+2012 Figure Dash
|
|
||||||
."–" # U+2013 En Dash
|
|
||||||
."—" # U+2014 Em Dash
|
|
||||||
."⸺" # U+2E3A Two-Em Dash
|
|
||||||
."⸻" # U+2E3B Three-Em Dash (Three times larger than a single char)
|
|
||||||
);
|
|
||||||
|
|
||||||
$style = @$_REQUEST['style'];
|
|
||||||
|
|
||||||
# to false only if textDecoration=0 in the URL
|
|
||||||
$textDecoration = "0" != @$_REQUEST['textDecoration'];
|
|
||||||
|
|
||||||
/* The url argument is always absolute compared to the document root
|
|
||||||
* The leading slash is removed. so url=/foo/bar and url=foo/bar ar the same.
|
|
||||||
*/
|
|
||||||
$url = @$_REQUEST["url"];
|
$url = @$_REQUEST["url"];
|
||||||
|
|
||||||
######################################## Installation page
|
/* Installation page
|
||||||
|
*
|
||||||
|
* Accessing directly /htmgem will make display the self-hosted documentation
|
||||||
|
* contained in "index.gmi". If it's removed, diplay an empty page with a
|
||||||
|
* comment
|
||||||
|
*/
|
||||||
if (empty($url)) {
|
if (empty($url)) {
|
||||||
if (!file_exists("index.gmi")) {
|
if (!file_exists("index.gmi")) {
|
||||||
http_response_code(403);
|
http_response_code(403);
|
||||||
die("<!-- index.gmi missing -->");
|
die("<!-- index.gmi missing -->");
|
||||||
}
|
}
|
||||||
?>
|
$t = new \htmgem\GemTextTranslate_html(@file_get_contents("index.gmi"));
|
||||||
<!DOCTYPE html>
|
echo $t->getFullHtml();
|
||||||
<html lang="fr">
|
exit();
|
||||||
<head>
|
|
||||||
<title>Installation de HtmGem</title>
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
||||||
<style>
|
|
||||||
<?php include("css/htmgem.css"); ?>
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<?php
|
|
||||||
echo translateGemToHtml(@file_get_contents("index.gmi"));
|
|
||||||
echo "</body>\n</html>\n";
|
|
||||||
die();
|
|
||||||
}
|
}
|
||||||
######################################## /Installation page
|
|
||||||
|
$documentRoot = $_SERVER['DOCUMENT_ROOT'];
|
||||||
|
|
||||||
# Removes the headling and trailling slashes, to be sure there's not any.
|
# Removes the headling and trailling slashes, to be sure there's not any.
|
||||||
$filePath = rtrim($_SERVER['DOCUMENT_ROOT'], "/")."/".ltrim($url, "/");
|
$filePath = rtrim($_SERVER['DOCUMENT_ROOT'], "/")."/".ltrim($url, "/");
|
||||||
|
|
||||||
$fileContents = @file_get_contents($filePath);
|
switch(true) {
|
||||||
|
case false:
|
||||||
|
case !realPath($filePath):
|
||||||
|
case !preg_match("/\.gmi$/", $url): # not finishing by .gmi
|
||||||
|
case strpos($filePath, $documentRoot)!==0: # not in web directory
|
||||||
|
$go404 = true;
|
||||||
|
// Says 404 even if the file exists to not give any information.
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
$go404 = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 404 page
|
||||||
######################################## 404 page
|
*/
|
||||||
if (!file_exists($filePath) || !preg_match("/\.gmi$/", $url)) {
|
if ($go404) {
|
||||||
error_log("HtmGem: 404 $url $filePath");
|
error_log("HtmGem: 404 $url $filePath");
|
||||||
http_response_code(404); ?>
|
http_response_code(404);
|
||||||
<!DOCTYPE html>
|
$page404 = <<<EOF
|
||||||
<html lang="fr">
|
|
||||||
<head>
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
||||||
<style>
|
|
||||||
<?php include("css/htmgem.css"); ?>
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<?php
|
|
||||||
$text404 = <<<EOF
|
|
||||||
# ⚠ Page non trouvée
|
# ⚠ Page non trouvée
|
||||||
|
|
||||||
**$url**
|
**$url**
|
||||||
|
|
||||||
=> $url Recharger 🔄
|
=> .. 🔄 🔄
|
||||||
|
|
||||||
=> /
|
|
||||||
EOF;
|
EOF;
|
||||||
echo translateGemToHtml($text404);
|
$t = new \htmgem\GemTextTranslate_html($page404);
|
||||||
echo "</body>\n</html>";
|
echo $t->getFullHtml();
|
||||||
die();
|
exit();
|
||||||
}
|
}
|
||||||
######################################## /404 page
|
|
||||||
|
|
||||||
|
# to false only if textDecoration=0 in the URL
|
||||||
|
$textDecoration = "0" != @$_REQUEST['textDecoration'];
|
||||||
|
|
||||||
|
$fileContents = @file_get_contents($filePath);
|
||||||
# Removes the Byte Order Mark
|
# Removes the Byte Order Mark
|
||||||
$fileContents = preg_replace("/\xEF\xBB\xBF/", "", $fileContents);
|
$fileContents = preg_replace("/\xEF\xBB\xBF/", "", $fileContents);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/* CSS and special style management
|
||||||
* Replaces markups things like __underlined__ to <u>underlined</u>.
|
|
||||||
* @param $instruction the characters to replace, ex. _
|
|
||||||
* @param $markup the markup to replace to, ex. "u" to get <u>…</u>
|
|
||||||
* @param &$text where to replace.
|
|
||||||
*/
|
*/
|
||||||
function markupPreg($instruction, $markup, &$text) {
|
|
||||||
$output = $text;
|
|
||||||
|
|
||||||
# Replaces couples "__word__" into "<i>word</i>".
|
|
||||||
$output = mb_ereg_replace("${instruction}(.+?)${instruction}", "<{$markup}>\\1</{$markup}>", $output);
|
|
||||||
|
|
||||||
# Replaces a remaining __ into "<i>…</i>" to the end of the line.
|
|
||||||
$output = mb_ereg_replace("${instruction}(.+)?", "<{$markup}>\\1</{$markup}>", $output);
|
|
||||||
|
|
||||||
$text = $output;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds text attributes sucj as underline, bold, … to $line
|
|
||||||
* @param $line the line to process
|
|
||||||
*/
|
|
||||||
function addTextAttributes(&$line) {
|
|
||||||
global $textDecoration;
|
|
||||||
if (!$textDecoration) return;
|
|
||||||
markupPreg("__", "u", $line);
|
|
||||||
markupPreg("\*\*", "strong", $line);
|
|
||||||
markupPreg("//", "em", $line);
|
|
||||||
markupPreg("~~", "del", $line);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prepares the raw text to be displayed in HTML environment:
|
|
||||||
* * Escapes the HTML entities yet contained in the Gemtext.
|
|
||||||
* * Puts thin unbrakable spaces before some characters.
|
|
||||||
* @param $text1, $text2 texts to process
|
|
||||||
*/
|
|
||||||
function htmlPrepare(&$text) {
|
|
||||||
$text = htmlspecialchars($text, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
|
|
||||||
$text = mb_ereg_replace("\ ([?!:;»€$])", NARROW_NO_BREAK_SPACE."\\1", $text);
|
|
||||||
$text = mb_ereg_replace("([«])\ ", "\\1".NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable
|
|
||||||
|
|
||||||
# Warning: using a monospace font editor may not display dashes as they should be!
|
|
||||||
# Adds no-break spaces to stick the (EM/EN dashes) to words : aaaaaa – bb – ccccc ==> aaaaaa –$bb$– ccccc
|
|
||||||
$text = mb_ereg_replace("([".DASHES."]) ([^".DASHES.".]+) ([".DASHES."])", "\\1".NARROW_NO_BREAK_SPACE."\\2".NARROW_NO_BREAK_SPACE."\\3", $text);
|
|
||||||
|
|
||||||
# Adds no-break space to stick the (EM/EN dashes) to words : aaaaaa – bb. ==> aaaaaa –$bb.
|
|
||||||
$text = mb_ereg_replace("([—–]) ([^.]+)\.", "\\1".NARROW_NO_BREAK_SPACE."\\2.", $text);
|
|
||||||
}
|
|
||||||
|
|
||||||
function translateGemToHtml($fileContents) {
|
|
||||||
$fileLines = preg_split("/\n/", $fileContents);
|
|
||||||
if (empty($fileLines[-1])) array_pop($fileLines); # Don't output a last empty line
|
|
||||||
ob_start();
|
|
||||||
$mode = null;
|
|
||||||
$mode_textAttributes = true;
|
|
||||||
foreach ($fileLines as $line) {
|
|
||||||
$reDoCount = 0;
|
|
||||||
$mode_textAttributes_temp = false;
|
|
||||||
while (true) {
|
|
||||||
if ($reDoCount>2) {
|
|
||||||
error_log("HtmGem: Too many loops, mode == '$mode'");
|
|
||||||
$mode = null;
|
|
||||||
$reDoCount = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
$reDoCount += 1;
|
|
||||||
$line1 = substr($line, 0, 1); // $line can be modified
|
|
||||||
$line2 = substr($line, 0, 2); // in the meantime.
|
|
||||||
$line3 = substr($line, 0, 3);
|
|
||||||
if (is_null($mode)) {
|
|
||||||
if (empty($line)) {
|
|
||||||
echo "<p> </p>\n";
|
|
||||||
} elseif ('^^^' == $line3) {
|
|
||||||
$mode_textAttributes = !$mode_textAttributes;
|
|
||||||
} elseif ('^' == $line1 and !$mode_textAttributes_temp) {
|
|
||||||
if (preg_match("/^\^\s*(.+)$/", $line, $parts)) {
|
|
||||||
$line = $parts[1];
|
|
||||||
$mode_textAttributes_temp = true;
|
|
||||||
} else {
|
|
||||||
$mode = "raw";
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} elseif ("#" == $line1) {
|
|
||||||
if (preg_match("/^(#{1,3})\s*(.+)/", $line, $sharps)) {
|
|
||||||
$h_level = strlen($sharps[1]);
|
|
||||||
$text = $sharps[2];
|
|
||||||
htmlPrepare($text);
|
|
||||||
switch ($h_level) {
|
|
||||||
case 1: echo "<h1>".$text."</h1>\n"; break;
|
|
||||||
case 2: echo "<h2>".$text."</h2>\n"; break;
|
|
||||||
case 3: echo "<h3>".$text."</h3>\n"; break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
$mode = "raw";
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} elseif ("=>" == $line2) {
|
|
||||||
if (preg_match("/^=>\s*([^\s]+)(?:\s+(.*))?$/", $line, $linkParts)) {
|
|
||||||
$url_link = $linkParts[1];
|
|
||||||
$url_label = @$linkParts[2];
|
|
||||||
preg_match("/^([^:]+):/", $url_link, $matches);
|
|
||||||
$url_protocol = @$matches[1];
|
|
||||||
if (empty($url_protocol)) $url_protocol = "local";
|
|
||||||
if (empty(trim($url_label))) {
|
|
||||||
$url_label = $url_link;
|
|
||||||
} else {
|
|
||||||
// the label is humain-made, apply formatting
|
|
||||||
htmlPrepare($url_label);
|
|
||||||
if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($url_label);
|
|
||||||
}
|
|
||||||
echo "<p><a class='$url_protocol' href='$url_link'>$url_label</a></p>\n";
|
|
||||||
} else {
|
|
||||||
$mode = "raw";
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} elseif ("```" == $line3) {
|
|
||||||
preg_match("/^```\s*(.*)$/", $line, $matches);
|
|
||||||
$alt_text = trim($matches[1]);
|
|
||||||
if (empty($alt_text)) {
|
|
||||||
echo "<pre>\n";
|
|
||||||
} else {
|
|
||||||
echo "<pre alt='$alt_text' title='$alt_text'>\n";
|
|
||||||
}
|
|
||||||
$mode="pre";
|
|
||||||
} elseif (">" == $line1) {
|
|
||||||
echo "<blockquote>\n";
|
|
||||||
$mode = "quote";
|
|
||||||
continue;
|
|
||||||
} elseif ("*" == $line1) {
|
|
||||||
echo "<ul>\n";
|
|
||||||
$mode = "ul";
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
$mode = "raw";
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if ("raw"==$mode) {
|
|
||||||
if (empty($line)) {
|
|
||||||
$line = " ";
|
|
||||||
} else {
|
|
||||||
htmlPrepare($line);
|
|
||||||
if ($mode_textAttributes xor $mode_textAttributes_temp)
|
|
||||||
addTextAttributes($line);
|
|
||||||
}
|
|
||||||
echo "<p>$line</p>\n";
|
|
||||||
$mode = null;
|
|
||||||
} elseif ("pre"==$mode) {
|
|
||||||
if ("```" == $line3) {
|
|
||||||
echo "</pre>\n";
|
|
||||||
$mode = null;
|
|
||||||
} else {
|
|
||||||
htmlPrepare($line);
|
|
||||||
echo $line."\n";
|
|
||||||
}
|
|
||||||
} elseif ("quote"==$mode) {
|
|
||||||
if (">" == $line1) {
|
|
||||||
preg_match("/^>\s*(.*)$/", $line, $quoteParts);
|
|
||||||
$quote = $quoteParts[1];
|
|
||||||
if (empty($quote))
|
|
||||||
echo "<p> </p>\n";
|
|
||||||
else {
|
|
||||||
htmlPrepare($quote);
|
|
||||||
if ($mode_textAttributes xor $mode_textAttributes_temp)
|
|
||||||
addTextAttributes($line);
|
|
||||||
echo "<p>".$quote."</p>\n";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
echo "</blockquote>\n";
|
|
||||||
$mode = null;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} elseif ("ul"==$mode) {
|
|
||||||
if ("*" == $line1) {
|
|
||||||
preg_match("/^\*\s*(.*)$/", $line, $ulParts);
|
|
||||||
$li = $ulParts[1];
|
|
||||||
if (empty($li)) {
|
|
||||||
echo "<li> \n";
|
|
||||||
} else {
|
|
||||||
htmlPrepare($li);
|
|
||||||
if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($li);
|
|
||||||
echo "<li>".$li."\n";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
echo "</ul>\n";
|
|
||||||
$mode = null;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
die("Unexpected mode: $mode!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break; // exits the while(true) as no continue occured
|
|
||||||
} // while(true)
|
|
||||||
}
|
|
||||||
$html = ob_get_contents();
|
|
||||||
ob_clean();
|
|
||||||
return $html;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Gets the page title: the first occurrence with # at the line start
|
|
||||||
mb_ereg("#\s*([^\n]+)\n", $fileContents, $matches);
|
|
||||||
$page_title = @$matches[1];
|
|
||||||
|
|
||||||
###################################### CSS Management
|
|
||||||
/**
|
|
||||||
* if &style=source displays the source directly and stops.
|
|
||||||
* if there's a filename.css besides filename.gmi, use the css and stops.
|
|
||||||
* if &style=<NOTHING> then embbed the default style, and stops.
|
|
||||||
* if &style=<word not beginngin by slash> then use htmgem/word.css
|
|
||||||
* if &style=/… then use the … as as stylesheet.
|
|
||||||
**/
|
|
||||||
|
|
||||||
|
$style = @$_REQUEST['style'];
|
||||||
if ("source" == $style) {
|
if ("source" == $style) {
|
||||||
$basename = basename($filePath);
|
$basename = basename($filePath);
|
||||||
header("Cache-Control: public");
|
header("Cache-Control: public");
|
||||||
@ -307,9 +74,12 @@ if ("source" == $style) {
|
|||||||
header("Content-Type: text/plain");
|
header("Content-Type: text/plain");
|
||||||
header("Content-Transfer-Encoding: binary");
|
header("Content-Transfer-Encoding: binary");
|
||||||
header('Content-Length: ' . filesize($filePath));
|
header('Content-Length: ' . filesize($filePath));
|
||||||
readfile($filePath);
|
echo $fileContents;
|
||||||
exit();
|
exit();
|
||||||
} elseif ("pre" == $style) {
|
} elseif ("pre" == $style) {
|
||||||
|
# Gets the page title: the first occurrence with # at the line start
|
||||||
|
mb_ereg("#\s*([^\n]+)\n", $fileContents, $matches);
|
||||||
|
$page_title = @$matches[1];
|
||||||
$fileContents = htmlspecialchars($fileContents, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
|
$fileContents = htmlspecialchars($fileContents, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
|
||||||
echo <<<EOL
|
echo <<<EOL
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
@ -318,52 +88,32 @@ if ("source" == $style) {
|
|||||||
<title>$page_title</title>
|
<title>$page_title</title>
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||||
</head>
|
</head>
|
||||||
<pre>$fileContents</pre>
|
<pre>
|
||||||
|
$fileContents</pre>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
EOL;
|
EOL;
|
||||||
} else {
|
exit();
|
||||||
|
}
|
||||||
|
|
||||||
|
$t = new \htmgem\GemTextTranslate_html($fileContents, $textDecoration);
|
||||||
|
if ("none" == $style) {
|
||||||
|
$t->addCss("");
|
||||||
|
} elseif ("/" == @$style[0]) {
|
||||||
|
$t->addCss($style);
|
||||||
|
} elseif (empty($style)) {
|
||||||
$parts = pathinfo($filePath);
|
$parts = pathinfo($filePath);
|
||||||
$localCss = $parts["filename"].".css";
|
$localCss = $parts["filename"].".css";
|
||||||
$localCssFilePath = $parts["dirname"]."/".$localCss;
|
$localCssFilePath = $parts["dirname"]."/".$localCss;
|
||||||
if (file_exists($localCssFilePath)) {
|
if (file_exists($localCssFilePath)) {
|
||||||
# Warning, using htmhem.php?url=… will make $localCss not found
|
# Warning, using htmhem.php?url=… will make $localCss not found
|
||||||
# as the path is relative to htmgem.php and not / !
|
# as the path is relative to htmgem.php and not / !
|
||||||
$cssContent = "<link type='text/css' rel='StyleSheet' href='$localCss'>";
|
$t->addCss($localCss);
|
||||||
} else {
|
|
||||||
if (empty($style)) {
|
|
||||||
$cssContent =
|
|
||||||
"<style>\n"
|
|
||||||
.@file_get_contents("css/htmgem.css")
|
|
||||||
."</style>\n";
|
|
||||||
} else {
|
|
||||||
if ("none" == $style) {
|
|
||||||
$cssContent = "";
|
|
||||||
} else {
|
|
||||||
if ("/" == $style[0])
|
|
||||||
$href = $style;
|
|
||||||
else
|
|
||||||
$href = "/htmgem/css/$style.css";
|
|
||||||
$cssContent = "<link type='text/css' rel='StyleSheet' href='$href'>";
|
|
||||||
}
|
}
|
||||||
}
|
} else { #TODO: regex check for $style
|
||||||
}
|
$t->addCss("/htmgem/css/$style.css");
|
||||||
echo <<<EOL
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="fr">
|
|
||||||
<head>
|
|
||||||
<title>$page_title</title>
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
||||||
$cssContent
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
EOL;
|
|
||||||
|
|
||||||
echo "\n".translateGemToHtml($fileContents);
|
|
||||||
echo "</body>\n</html>\n";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
echo $t->getFullHtml();
|
||||||
ob_end_flush();
|
|
||||||
|
|
||||||
?>
|
?>
|
||||||
|
350
lib-htmgem.php
Normal file
350
lib-htmgem.php
Normal file
@ -0,0 +1,350 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace htmgem;
|
||||||
|
|
||||||
|
mb_internal_encoding("UTF-8");
|
||||||
|
mb_regex_encoding("UTF-8");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses the gemtext and generates the internal format version
|
||||||
|
* @param str $fileContents the gemtext to parse
|
||||||
|
*/
|
||||||
|
function gemtextParser($fileContents) {
|
||||||
|
$fileLines = explode("\n", $fileContents);
|
||||||
|
$mode = null;
|
||||||
|
$current = array();
|
||||||
|
foreach ($fileLines as $line) {
|
||||||
|
$reDoCount = 0;
|
||||||
|
$mode_textAttributes_temp = false;
|
||||||
|
while (true) {
|
||||||
|
/* The continue instruction is used to make another turn when there is a transition
|
||||||
|
* between two modes. */
|
||||||
|
if ($reDoCount>1) {
|
||||||
|
die("HtmGem: Too many loops, mode == '$mode'");
|
||||||
|
}
|
||||||
|
$reDoCount += 1;
|
||||||
|
$line1 = substr($line, 0, 1); // $line can be modified
|
||||||
|
$line2 = substr($line, 0, 2); // in the meantime.
|
||||||
|
$line3 = substr($line, 0, 3);
|
||||||
|
if (is_null($mode)) {
|
||||||
|
if ('^^^' == $line3) {
|
||||||
|
yield array("mode" => "^^^");
|
||||||
|
} elseif ("#" == $line1) {
|
||||||
|
preg_match("/^(#{1,3})\s*(.+)/", $line, $matches);
|
||||||
|
yield array("mode" => $matches[1], "title" => trim($matches[2]));
|
||||||
|
} elseif ("=>" == $line2) {
|
||||||
|
preg_match("/^=>\s*([^\s]+)(?:\s+(.*))?$/", $line, $matches);
|
||||||
|
yield array("mode" => "=>", "link" => trim($matches[1]), "text" => trim(@$matches[2]));
|
||||||
|
} elseif ("```" == $line3) {
|
||||||
|
preg_match("/^```\s*(.*)$/", $line, $matches);
|
||||||
|
$current = array("mode" => "```", "alt" => trim($matches[1]), "texts" => array());
|
||||||
|
$mode="```";
|
||||||
|
} elseif (">" == $line1) {
|
||||||
|
preg_match("/^>\s*(.*)$/", $line, $matches);
|
||||||
|
$current = array("mode" => ">", "texts" => array(trim($matches[1])));
|
||||||
|
$mode = ">";
|
||||||
|
} elseif ("*" == $line1) {
|
||||||
|
preg_match("/^\*\s*(.*)$/", $line, $matches);
|
||||||
|
$current = array("mode" => "*", "texts" => array(trim($matches[1])));
|
||||||
|
$mode = "*";
|
||||||
|
} else {
|
||||||
|
// text_line
|
||||||
|
yield array("mode"=>"", "text" => trim($line));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ("```"==$mode) {
|
||||||
|
if ("```" == $line3) {
|
||||||
|
yield $current;
|
||||||
|
$current = array();
|
||||||
|
$mode = null;
|
||||||
|
} else {
|
||||||
|
$current["texts"] []= $line; // No trim() as it’s a preformated text!
|
||||||
|
}
|
||||||
|
} elseif (">"==$mode) {
|
||||||
|
if (">" == $line1) {
|
||||||
|
preg_match("/^>\s*(.*)$/", $line, $matches);
|
||||||
|
$current["texts"] []= trim($matches[1]);
|
||||||
|
} else {
|
||||||
|
yield $current;
|
||||||
|
$current = array();
|
||||||
|
$mode = null;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} elseif ("*"==$mode) {
|
||||||
|
if ("*" == $line1) {
|
||||||
|
preg_match("/^\*\s*(.*)$/", $line, $matches);
|
||||||
|
$current["texts"] []= trim($matches[1]);
|
||||||
|
} else {
|
||||||
|
yield $current;
|
||||||
|
$current = array();
|
||||||
|
$mode = null;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
die("Unexpected mode: $mode!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break; // exits the while(true) as no continue occured
|
||||||
|
} // while(true)
|
||||||
|
}// foreach
|
||||||
|
if ($current) yield $current; # File ends before the block.
|
||||||
|
} // gemtextParser
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Translates the internal format into a gemtext.
|
||||||
|
* Uses cases:
|
||||||
|
*
|
||||||
|
* - test suites
|
||||||
|
* - serialisation easier with a text content
|
||||||
|
* - normalization (trimming spaces for instance)
|
||||||
|
*/
|
||||||
|
class GemtextTranslate_gemtext {
|
||||||
|
|
||||||
|
function __construct($parsedGemtext) {
|
||||||
|
$this->parsedGemtext = $parsedGemtext;
|
||||||
|
$this->translate();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function translate() {
|
||||||
|
ob_start();
|
||||||
|
foreach ($this->parsedGemtext as $node) {
|
||||||
|
$mode = $node["mode"];
|
||||||
|
switch($mode) {
|
||||||
|
case "":
|
||||||
|
echo $node["text"]."\n";
|
||||||
|
break;
|
||||||
|
case "*":
|
||||||
|
foreach ($node["texts"] as $text) {
|
||||||
|
echo "* $text\n";
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "```":
|
||||||
|
print("```\n");
|
||||||
|
foreach ($node["texts"] as $text) {
|
||||||
|
echo "$text\n";
|
||||||
|
}
|
||||||
|
print("```\n");
|
||||||
|
break;
|
||||||
|
case ">":
|
||||||
|
foreach ($node["texts"] as $text) {
|
||||||
|
echo "> $text\n";
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "=>":
|
||||||
|
$linkText = $node["text"];
|
||||||
|
if (!empty($linkText)) $linkText = " $linkText";
|
||||||
|
print("=> ".$node["link"].$linkText."\n");
|
||||||
|
break;
|
||||||
|
case "#":
|
||||||
|
case "##":
|
||||||
|
case "###":
|
||||||
|
print("$mode ".$node["title"]."\n");
|
||||||
|
break;
|
||||||
|
case "^^^":
|
||||||
|
print("^^^\n");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
die("Unknown mode: '{$node["mode"]}'\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->translatedGemtext = ob_get_contents();
|
||||||
|
ob_end_clean();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function __toString() {
|
||||||
|
return $this->translatedGemtext;
|
||||||
|
}
|
||||||
|
} // GemtextTranslate_gemtext
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Translates the internal format to HTML
|
||||||
|
*/
|
||||||
|
class GemtextTranslate_html {
|
||||||
|
|
||||||
|
protected $cssList = array();
|
||||||
|
protected $pageTitle = "";
|
||||||
|
public $translatedGemtext;
|
||||||
|
|
||||||
|
function __construct($parsedGemtext, $textDecorationEnabled=true) {
|
||||||
|
if (empty($parsedGemtext))
|
||||||
|
$parsedGemtext = "";
|
||||||
|
elseif (is_string($parsedGemtext))
|
||||||
|
// to delete the last empty line, <p> </p> in HTML
|
||||||
|
$parsedGemtext = rtrim($parsedGemtext);
|
||||||
|
// The text must be parsed
|
||||||
|
$parsedGemtext = gemtextParser($parsedGemtext);
|
||||||
|
$this->parsedGemtext = $parsedGemtext;
|
||||||
|
$this->translate($textDecorationEnabled);
|
||||||
|
}
|
||||||
|
|
||||||
|
function addCss($css) {
|
||||||
|
$this->cssList []= $css;
|
||||||
|
}
|
||||||
|
|
||||||
|
const NARROW_NO_BREAK_SPACE = " ";
|
||||||
|
const DASHES
|
||||||
|
="‒" # U+2012 Figure Dash
|
||||||
|
."–" # U+2013 En Dash
|
||||||
|
."—" # U+2014 Em Dash
|
||||||
|
."⸺" # U+2E3A Two-Em Dash
|
||||||
|
."⸻" # U+2E3B Three-Em Dash (Three times larger than a single char)
|
||||||
|
;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replaces markups things like __underlined__ to <u>underlined</u>.
|
||||||
|
* @param $instruction the characters to replace, ex. _
|
||||||
|
* @param $markup the markup to replace to, ex. "u" to get <u>…</u>
|
||||||
|
* @param &$text where to replace.
|
||||||
|
*/
|
||||||
|
protected static function markupPreg($instruction, $markup, &$text) {
|
||||||
|
$output = $text;
|
||||||
|
|
||||||
|
# Replaces couples "__word__" into "<i>word</i>".
|
||||||
|
$output = mb_ereg_replace("${instruction}(.+?)${instruction}", "<{$markup}>\\1</{$markup}>", $output);
|
||||||
|
|
||||||
|
# Replaces a remaining __ into "<i>…</i>" to the end of the line.
|
||||||
|
$output = mb_ereg_replace("${instruction}(.+)?", "<{$markup}>\\1</{$markup}>", $output);
|
||||||
|
|
||||||
|
$text = $output;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds text attributes sucj as underline, bold, … to $line
|
||||||
|
* @param $line the line to process
|
||||||
|
*/
|
||||||
|
protected static function addTextDecoration(&$line) {
|
||||||
|
self::markupPreg("__", "u", $line);
|
||||||
|
self::markupPreg("\*\*", "strong", $line);
|
||||||
|
self::markupPreg("//", "em", $line);
|
||||||
|
self::markupPreg("~~", "del", $line);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prepares the raw text to be displayed in HTML environment:
|
||||||
|
* * Escapes the HTML entities yet contained in the Gemtext.
|
||||||
|
* * Puts thin unbrakable spaces before some characters.
|
||||||
|
* @param $text1, $text2 texts to process
|
||||||
|
*/
|
||||||
|
protected static function htmlPrepare(&$text) {
|
||||||
|
if (empty($text)) {
|
||||||
|
$text = " ";
|
||||||
|
} else {
|
||||||
|
$text = htmlspecialchars($text, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
|
||||||
|
$text = mb_ereg_replace("\ ([?!:;»€$])", self::NARROW_NO_BREAK_SPACE."\\1", $text);
|
||||||
|
$text = mb_ereg_replace("([«])\ ", "\\1".self::NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable
|
||||||
|
|
||||||
|
# Warning: using a monospace font editor may not display dashes as they should be!
|
||||||
|
# Adds no-break spaces to stick the (EM/EN dashes) to words : aaaaaa – bb – ccccc ==> aaaaaa –$bb$– ccccc
|
||||||
|
$text = mb_ereg_replace("([".self::DASHES."]) ([^".self::DASHES.".]+) ([".self::DASHES."])", "\\1".self::NARROW_NO_BREAK_SPACE."\\2".self::NARROW_NO_BREAK_SPACE."\\3", $text);
|
||||||
|
|
||||||
|
# Adds no-break space to stick the (EM/EN dashes) to words : aaaaaa – bb. ==> aaaaaa –$bb.
|
||||||
|
$text = mb_ereg_replace("([—–]) ([^.]+)\.", "\\1".self::NARROW_NO_BREAK_SPACE."\\2.", $text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public function translate($textDecoration=true) {
|
||||||
|
ob_start();
|
||||||
|
foreach ($this->parsedGemtext as $node) {
|
||||||
|
$mode = $node["mode"];
|
||||||
|
switch($mode) {
|
||||||
|
case "":
|
||||||
|
$text = $node["text"];
|
||||||
|
self::htmlPrepare($text);
|
||||||
|
if ($textDecoration) self::addTextDecoration($text);
|
||||||
|
echo "<p>$text</p>\n";
|
||||||
|
break;
|
||||||
|
case "*":
|
||||||
|
echo "<ul>\n";
|
||||||
|
foreach ($node["texts"] as $text) {
|
||||||
|
self::htmlPrepare($text);
|
||||||
|
if ($textDecoration) self::addTextDecoration($text);
|
||||||
|
print("<li>$text\n");
|
||||||
|
}
|
||||||
|
echo "</ul>\n";
|
||||||
|
break;
|
||||||
|
case "```":
|
||||||
|
$text = implode("\n", $node["texts"]);
|
||||||
|
self::htmlPrepare($text);
|
||||||
|
echo "<pre>\n$text\n</pre>\n";
|
||||||
|
break;
|
||||||
|
case ">":
|
||||||
|
$text = implode("\n", $node["texts"]);
|
||||||
|
self::htmlPrepare($text);
|
||||||
|
if ($textDecoration) self::addTextDecoration($text);
|
||||||
|
echo "<blockquote>\n$text\n</blockquote>\n";
|
||||||
|
break;
|
||||||
|
case "=>":
|
||||||
|
$link = $node["link"];
|
||||||
|
$linkText = $node["text"];
|
||||||
|
if (empty($linkText)) {
|
||||||
|
$linkText = $link;
|
||||||
|
self::htmlPrepare($linkText);
|
||||||
|
} else {
|
||||||
|
self::htmlPrepare($linkText);
|
||||||
|
if ($textDecoration) self::addTextDecoration($linkText);
|
||||||
|
}
|
||||||
|
preg_match("/^([^:]+):/", $link, $matches);
|
||||||
|
$protocol = @$matches[1];
|
||||||
|
if (empty($protocol)) $protocol = "local";
|
||||||
|
echo "<p><a class='$protocol' href='$link'>$linkText</a></p>\n";
|
||||||
|
break;
|
||||||
|
case "#":
|
||||||
|
$title = $node["title"];
|
||||||
|
self::htmlPrepare($title);
|
||||||
|
if (empty($this->pageTitle)) $this->pageTitle = $title;
|
||||||
|
echo "<h1>$title</h1>\n";
|
||||||
|
break;
|
||||||
|
case "##":
|
||||||
|
$title = $node["title"];
|
||||||
|
self::htmlPrepare($title);
|
||||||
|
echo "<h2>$title</h2>\n";
|
||||||
|
break;
|
||||||
|
case "###":
|
||||||
|
$title = $node["title"];
|
||||||
|
self::htmlPrepare($title);
|
||||||
|
echo "<h3>$title</h3>\n";
|
||||||
|
break;
|
||||||
|
case "^^^":
|
||||||
|
$this->textDecorationEnabled = !$this->textDecorationEnabled;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
die("Unknown mode: '{$node["mode"]}'\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->translatedGemtext = ob_get_contents();
|
||||||
|
ob_end_clean();
|
||||||
|
}
|
||||||
|
|
||||||
|
function getFullHtml() {
|
||||||
|
if (!$this->cssList)
|
||||||
|
$css = array("/htmgem/css/htmgem.css");
|
||||||
|
else
|
||||||
|
$css = $this->cssList;
|
||||||
|
echo <<<EOL
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>{$this->pageTitle}</title>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||||
|
EOL;
|
||||||
|
foreach ($css as $c) {
|
||||||
|
echo "<link type='text/css' rel='StyleSheet' href='$c'>\n";
|
||||||
|
}
|
||||||
|
echo <<<EOL
|
||||||
|
</head>
|
||||||
|
<body>\n
|
||||||
|
EOL;
|
||||||
|
echo $this->translatedGemtext;
|
||||||
|
echo "</body>\n</html>\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
public function __toString() {
|
||||||
|
return $this->translatedGemtext;
|
||||||
|
}
|
||||||
|
} // GemTextTranslate_html
|
||||||
|
|
||||||
|
?>
|
@ -53,9 +53,11 @@ Il ne peut exister qu’un lien par ligne. Et la ligne est dédiée à ça ! Voi
|
|||||||
|
|
||||||
La décoration du texte ne fait pas partie des spécifications de Gemini.
|
La décoration du texte ne fait pas partie des spécifications de Gemini.
|
||||||
|
|
||||||
^ Cette **ligne** utilise la //décoration du texte// qui peut ~~barrer~~ ou __souligner__ des mots.
|
Cette **ligne** utilise la //décoration du texte// qui peut ~~barrer~~ ou __souligner__ des mots.
|
||||||
Cette **ligne** utilise la //décoration du texte// qui peut ~~barrer~~ ou __souligner__ des mots.
|
Cette **ligne** utilise la //décoration du texte// qui peut ~~barrer~~ ou __souligner__ des mots.
|
||||||
|
|
||||||
|
On peut désactiver et activer la décoration du texte avec **^^^** sur une ligne.
|
||||||
|
|
||||||
Et voilà ! Vous savez tout ce qui est à savoir 🥳 Les spécifications complètes sont sur le site de Gemini :
|
Et voilà ! Vous savez tout ce qui est à savoir 🥳 Les spécifications complètes sont sur le site de Gemini :
|
||||||
=> https://gemini.circumlunar.space/docs/specification.gmi
|
=> https://gemini.circumlunar.space/docs/specification.gmi
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user