1
0
mirror of https://tildegit.org/sbgodin/HtmGem.git synced 2023-08-25 13:53:12 +02:00
HtmGem/htmgem.php
Christophe HENRY 6a37ecebcf Passes on next line in case of error, and log error
When there are too many loops, indicating that something went wrong,
sets the loop count to zero, the mode to default and go to the next
line.
2021-03-08 10:14:57 +01:00

242 lines
8.2 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
mb_internal_encoding("UTF-8");
mb_regex_encoding("UTF-8");
if (isset($_REQUEST["url"]))
$url = $_REQUEST["url"];
elseif (isset($_SERVER["QUERY_STRING"]))
$url = "/".$_SERVER["QUERY_STRING"];
else
$url = "/index.gmi";
$GMI_DIR = $_SERVER['DOCUMENT_ROOT'];
$filePath = $GMI_DIR.$url;
$fileContents = @file_get_contents($filePath);
if (!$fileContents) {
http_response_code(404);
die("404: $url");
}
# Removes the Byte Order Mark
$fileContents = preg_replace("/\xEF\xBB\xBF/", "", $fileContents);
$fileLines = preg_split("/\n/", $fileContents);
/**
* Replaces markups things like __underlined__ to <u>underlined</u>.
* @param $instruction the characters to replace, ex. _
* @param $markup the markup to replace to, ex. "u" to get <u>…</u>
* @param &$text where to replace.
*/
function markupPreg($instruction, $markup, &$text) {
$output = $text;
# Replaces couples "__word__" into "<i>word</i>".
$output = mb_ereg_replace("${instruction}(.+?)${instruction}", "<{$markup}>\\1</{$markup}>", $output);
# Replaces a remaining __ into "<i>…</i>" to the end of the line.
$output = mb_ereg_replace("${instruction}(.+)?", "<{$markup}>\\1</{$markup}>", $output);
$text = $output;
}
/**
* Adds text attributes sucj as underline, bold, … to $line
* @param $line the line to process
*/
function addTextAttributes(&$line) {
markupPreg("__", "u", $line);
markupPreg("\*\*", "strong", $line);
markupPreg("//", "em", $line);
markupPreg("~~", "del", $line);
}
define("NARROW_NO_BREAK_SPACE", "&#8239;");
define("DASHES"
,"" # U+2012 Figure Dash
."" # U+2013 En Dash
."" # U+2014 Em Dash
."" # U+2E3A Two-Em Dash
."" # U+2E3B Three-Em Dash (Three times larger than a single char)
);
/**
* Prepares the raw text to be displayed in HTML environment:
* * Escapes the HTML entities yet contained in the Gemtext.
* * Puts thin unbrakable spaces before some characters.
* @param $text1, $text2 texts to process
*/
function htmlPrepare(&$text) {
$text = htmlspecialchars($text, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
$text = mb_ereg_replace("\ ([?!:;»€$])", NARROW_NO_BREAK_SPACE."\\1", $text);
$text = mb_ereg_replace("([«])\ ", "\\1".NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable
# Warning: using a monospace font editor may not display dashes as they should be!
# Adds no-break spaces to stick the (EM/EN dashes) to words : aaaaaa bb ccccc ==> aaaaaa $bb$ ccccc
$text = mb_ereg_replace("([".DASHES."]) ([^".DASHES.".]+) ([".DASHES."])", "\\1".NARROW_NO_BREAK_SPACE."\\2".NARROW_NO_BREAK_SPACE."\\3", $text);
# Adds no-break space to stick the (EM/EN dashes) to words : aaaaaa bb. ==> aaaaaa $bb.
$text = mb_ereg_replace("([—–]) ([^.]+).", "\\1".NARROW_NO_BREAK_SPACE."\\2.", $text);
}
ob_start();
$mode = null;
$mode_textAttributes = true;
foreach ($fileLines as $line) {
$reDoCount = 0;
$mode_textAttributes_temp = false;
while (true) {
if ($reDoCount>2) {
error_log("Too many loops, mode == '$mode'");
$mode = null;
$reDoCount = 0;
break;
}
$reDoCount += 1;
$line1 = substr($line, 0, 1); // $line can be modified
$line2 = substr($line, 0, 2); // in the meantime.
$line3 = substr($line, 0, 3);
if (is_null($mode)) {
if (empty($line)) {
echo "<p>&nbsp;</p>\n";
} elseif ('^^^' == $line3) {
$mode_textAttributes = !$mode_textAttributes;
} elseif ('^' == $line1) {
if (preg_match("/^\^\s*(.*)$/", $line, $parts)) {
$line = $parts[1];
$mode_textAttributes_temp = true;
} else {
$mode = "raw";
}
continue;
} elseif ("#" == $line1) {
preg_match("/^(#{1,3})\s*(.*)/", $line, $sharps);
$h_level = strlen($sharps[1]);
$text = $sharps[2];
htmlPrepare($text);
switch ($h_level) {
case 1: echo "<h1>".$text."</h1>\n"; break;
case 2: echo "<h2>".$text."</h2>\n"; break;
case 3: echo "<h3>".$text."</h3>\n"; break;
}
} elseif ("=>" == $line2) {
if (preg_match("/^=>\s*([^\s]+)(?:\s+(.*))?$/", $line, $linkParts)) {
$url_link = $linkParts[1];
$url_label = @$linkParts[2];
if (empty(trim($url_label))) {
$url_label = $url_link;
} else {
// the label is humain-made, apply formatting
htmlPrepare($url_label);
}
echo "<p><a href='".$url_link."'>".$url_label."</a></p>\n";
} else {
$mode = "raw";
continue;
}
} elseif ("```" == $line3) {
$mode="pre";
echo "<pre>\n";
} elseif (">" == $line1) {
$mode = "quote";
preg_match("/^>\s*(.*)$/", $line, $quoteParts);
$quote = $quoteParts[1];
echo "<blockquote>\n";
if (empty($quote))
echo "<p>&nbsp;</p>\n";
else
htmlPrepare($quote);
if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($line);
echo "<p>".$quote."</p>\n";
} elseif ("* " == $line2) {
echo "<ul>\n";
$mode = "ul";
continue;
} else {
$mode = "raw";
continue;
}
} else {
if ("raw"==$mode) {
htmlPrepare($line);
if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($line);
if (empty($line)) $line = "&nbsp;";
echo "<p>$line</p>\n";
$mode = null;
} elseif ("pre"==$mode) {
if ("```" == $line3) {
echo "</pre>\n";
$mode = null;
} else {
htmlPrepare($line);
echo $line."\n";
}
} elseif ("quote"==$mode) {
if (">" == $line1) {
preg_match("/^>\s*(.*)$/", $line, $quoteParts);
$quote = $quoteParts[1];
if (empty($quote))
echo "<p>&nbsp;</p>\n";
else
htmlPrepare($quote);
echo "<p>".$quote."</p>\n";
} else {
echo "</blockquote>\n";
$mode = null;
continue;
}
} elseif ("ul"==$mode) {
if ("* " == $line2) {
preg_match("/^\*\s*(.*)$/", $line, $ulParts);
$li = $ulParts[1];
if (empty($li)) {
echo "<li>&nbsp;\n";
} else {
htmlPrepare($li);
if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($li);
echo "<li>".$li."\n";
}
} else {
echo "</ul>\n";
$mode = null;
continue;
}
} else {
die("Unexpected mode: $mode!");
}
}
break; // exits the while(true) as no continue occured
}
}
$body = ob_get_contents();
ob_clean();
# Gets the page title: the first occurrence with # at the line start
mb_ereg("#\s*([^\n]+)\n", $fileContents, $matches);
$page_title = @$matches[1];
# <!-- link type="text/css" rel="StyleSheet" href="/htmgem.css" -->
echo <<<EOL
<!DOCTYPE html>
<html lang="fr">
<head>
<title>$page_title</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<style>
EOL;
include("htmgem.css");
echo <<<EOL
</style>
</head>
<body>
EOL;
echo "\n".$body;
echo "</body>\n</html>\n";
ob_end_flush();
?>