.
*/
//! Markdown-to-HTML converter
class Markdown extends Prefab {
protected
//! Parsing rules
$blocks,
//! Special characters
$special;
/**
* Process blockquote
* @return string
* @param $str string
**/
protected function _blockquote($str) {
$str=preg_replace('/(?<=^|\n)\h?>\h?(.*?(?:\n+|$))/','\1',$str);
return strlen($str)?
('
'.$this->build($str).'
'."\n\n"):'';
}
/**
* Process whitespace-prefixed code block
* @return string
* @param $str string
**/
protected function _pre($str) {
$str=preg_replace('/(?<=^|\n)(?: {4}|\t)(.+?(?:\n+|$))/','\1',
$this->esc($str));
return strlen($str)?
(''.
$this->esc($this->snip($str)).
'
'."\n\n"):
'';
}
/**
* Process fenced code block
* @return string
* @param $hint string
* @param $str string
**/
protected function _fence($hint,$str) {
$str=$this->snip($str);
$fw=Base::instance();
if ($fw->HIGHLIGHT) {
switch (strtolower($hint)) {
case 'php':
$str=$fw->highlight($str);
break;
case 'apache':
preg_match_all('/(?<=^|\n)(\h*)'.
'(?:(<\/?)(\w+)((?:\h+[^>]+)*)(>)|'.
'(?:(\w+)(\h.+?)))(\h*(?:\n+|$))/',
$str,$matches,PREG_SET_ORDER);
$out='';
foreach ($matches as $match)
$out.=$match[1].
($match[3]?
(''.
$this->esc($match[2]).$match[3].
''.
($match[4]?
(''.
$this->esc($match[4]).
''):
'').
''.
$this->esc($match[5]).
''):
(''.
$match[6].
''.
''.
$this->esc($match[7]).
'')).
$match[8];
$str=''.$out.'
';
break;
case 'html':
preg_match_all(
'/(?:(?:<(\/?)(\w+)'.
'((?:\h+(?:\w+\h*=\h*)?".+?"|[^>]+)*|'.
'\h+.+?)(\h*\/?)>)|(.+?))/s',
$str,$matches,PREG_SET_ORDER
);
$out='';
foreach ($matches as $match) {
if ($match[2]) {
$out.='<'.
$match[1].$match[2].'';
if ($match[3]) {
preg_match_all(
'/(?:\h+(?:(?:(\w+)\h*=\h*)?'.
'(".+?")|(.+)))/',
$match[3],$parts,PREG_SET_ORDER
);
foreach ($parts as $part)
$out.=' '.
(empty($part[3])?
((empty($part[1])?
'':
(''.
$part[1].'=')).
''.
$part[2].''):
(''.
$part[3].''));
}
$out.=''.
$match[4].'>';
}
else
$out.=$this->esc($match[5]);
}
$str=''.$out.'
';
break;
case 'ini':
preg_match_all(
'/(?<=^|\n)(?:'.
'(;[^\n]*)|(?:<\?php.+?\?>?)|'.
'(?:\[(.+?)\])|'.
'(.+?)(\h*=\h*)'.
'((?:\\\\\h*\r?\n|.+?)*)'.
')((?:\r?\n)+|$)/',
$str,$matches,PREG_SET_ORDER
);
$out='';
foreach ($matches as $match) {
if ($match[1])
$out.='';
elseif ($match[2])
$out.='['.$match[2].']'.
'';
elseif ($match[3])
$out.=''.$match[3].
''.$match[4].
($match[5]?
(''.
$match[5].''):'');
else
$out.=$match[0];
if (isset($match[6]))
$out.=$match[6];
}
$str=''.$out.'
';
break;
default:
$str=''.$this->esc($str).'
';
break;
}
}
else
$str=''.$this->esc($str).'
';
return ''.$str.'
'."\n\n";
}
/**
* Process horizontal rule
* @return string
**/
protected function _hr() {
return '
'."\n\n";
}
/**
* Process atx-style heading
* @return string
* @param $type string
* @param $str string
**/
protected function _atx($type,$str) {
$level=strlen($type);
return ''.
$this->scan($str).''."\n\n";
}
/**
* Process setext-style heading
* @return string
* @param $str string
* @param $type string
**/
protected function _setext($str,$type) {
$level=strpos('=-',$type)+1;
return ''.
$this->scan($str).''."\n\n";
}
/**
* Process ordered/unordered list
* @return string
* @param $str string
**/
protected function _li($str) {
// Initialize list parser
$len=strlen($str);
$ptr=0;
$dst='';
$first=TRUE;
$tight=TRUE;
$type='ul';
// Main loop
while ($ptr<$len) {
if (preg_match('/^\h*[*\-](?:\h?[*\-]){2,}(?:\n+|$)/',
substr($str,$ptr),$match)) {
$ptr+=strlen($match[0]);
// Embedded horizontal rule
return (strlen($dst)?
('<'.$type.'>'."\n".$dst.''.$type.'>'."\n\n"):'').
'
'."\n\n".$this->build(substr($str,$ptr));
}
elseif (preg_match('/(?<=^|\n)([*+\-]|\d+\.)\h'.
'(.+?(?:\n+|$))((?:(?: {4}|\t)+.+?(?:\n+|$))*)/s',
substr($str,$ptr),$match)) {
$match[3]=preg_replace('/(?<=^|\n)(?: {4}|\t)/','',$match[3]);
$found=FALSE;
foreach (array_slice($this->blocks,0,-1) as $regex)
if (preg_match($regex,$match[3])) {
$found=TRUE;
break;
}
// List
if ($first) {
// First pass
if (is_numeric($match[1]))
$type='ol';
if (preg_match('/\n{2,}$/',$match[2].
($found?'':$match[3])))
// Loose structure; Use paragraphs
$tight=FALSE;
$first=FALSE;
}
// Strip leading whitespaces
$ptr+=strlen($match[0]);
$tmp=$this->snip($match[2].$match[3]);
if ($tight) {
if ($found)
$tmp=$match[2].$this->build($this->snip($match[3]));
}
else
$tmp=$this->build($tmp);
$dst.=''.$this->scan(trim($tmp)).''."\n";
}
}
return strlen($dst)?
('<'.$type.'>'."\n".$dst.''.$type.'>'."\n\n"):'';
}
/**
* Ignore raw HTML
* @return string
* @param $str string
**/
protected function _raw($str) {
return $str;
}
/**
* Process paragraph
* @return string
* @param $str string
**/
protected function _p($str) {
$str=trim($str);
if (strlen($str)) {
if (preg_match('/^(.+?\n)([>#].+)$/s',$str,$parts))
return $this->_p($parts[1]).$this->build($parts[2]);
$str=preg_replace_callback(
'/([^<>\[]+)?(<[\?%].+?[\?%]>|<.+?>|\[.+?\]\s*\(.+?\))|'.
'(.+)/s',
function($expr) {
$tmp='';
if (isset($expr[4]))
$tmp.=$this->esc($expr[4]);
else {
if (isset($expr[1]))
$tmp.=$this->esc($expr[1]);
$tmp.=$expr[2];
if (isset($expr[3]))
$tmp.=$this->esc($expr[3]);
}
return $tmp;
},
$str
);
$str=preg_replace('/\s{2}\r?\n/','
',$str);
return ''.$this->scan($str).'
'."\n\n";
}
return '';
}
/**
* Process strong/em/strikethrough spans
* @return string
* @param $str string
**/
protected function _text($str) {
$tmp='';
while ($str!=$tmp)
$str=preg_replace_callback(
'/(?<=\s|^)(?'.$expr[4].'';
if ($expr[2])
return ''.$expr[4].'';
return ''.$expr[4].'';
},
preg_replace(
'/(?\1',
$tmp=$str
)
);
return $str;
}
/**
* Process image span
* @return string
* @param $str string
**/
protected function _img($str) {
return preg_replace_callback(
'/!(?:\[(.+?)\])?\h*\((.*?)>?(?:\h*"(.*?)"\h*)?\)/',
function($expr) {
return '
';
},
$str
);
}
/**
* Process anchor span
* @return string
* @param $str string
**/
protected function _a($str) {
return preg_replace_callback(
'/(??(?:\h*"(.*?)"\h*)?\)/',
function($expr) {
return ''.$this->scan($expr[1]).'';
},
$str
);
}
/**
* Auto-convert links
* @return string
* @param $str string
**/
protected function _auto($str) {
return preg_replace_callback(
'/`.*?<(.+?)>.*?`|<(.+?)>/',
function($expr) {
if (empty($expr[1]) && parse_url($expr[2],PHP_URL_SCHEME)) {
$expr[2]=$this->esc($expr[2]);
return ''.$expr[2].'';
}
return $expr[0];
},
$str
);
}
/**
* Process code span
* @return string
* @param $str string
**/
protected function _code($str) {
return preg_replace_callback(
'/`` (.+?) ``|(?'.
$this->esc(empty($expr[1])?$expr[2]:$expr[1]).'';
},
$str
);
}
/**
* Convert characters to HTML entities
* @return string
* @param $str string
**/
function esc($str) {
if (!$this->special)
$this->special=[
'...'=>'…',
'(tm)'=>'™',
'(r)'=>'®',
'(c)'=>'©'
];
foreach ($this->special as $key=>$val)
$str=preg_replace('/'.preg_quote($key,'/').'/i',$val,$str);
return htmlspecialchars($str,ENT_COMPAT,
Base::instance()->ENCODING,FALSE);
}
/**
* Reduce multiple line feeds
* @return string
* @param $str string
**/
protected function snip($str) {
return preg_replace('/(?:(?<=\n)\n+)|\n+$/',"\n",$str);
}
/**
* Scan line for convertible spans
* @return string
* @param $str string
**/
function scan($str) {
$inline=['img','a','text','auto','code'];
foreach ($inline as $func)
$str=$this->{'_'.$func}($str);
return $str;
}
/**
* Assemble blocks
* @return string
* @param $str string
**/
protected function build($str) {
if (!$this->blocks) {
// Regexes for capturing entire blocks
$this->blocks=[
'blockquote'=>'/^(?:\h?>\h?.*?(?:\n+|$))+/',
'pre'=>'/^(?:(?: {4}|\t).+?(?:\n+|$))+/',
'fence'=>'/^`{3}\h*(\w+)?.*?[^\n]*\n+(.+?)`{3}[^\n]*'.
'(?:\n+|$)/s',
'hr'=>'/^\h*[*_\-](?:\h?[\*_\-]){2,}\h*(?:\n+|$)/',
'atx'=>'/^\h*(#{1,6})\h?(.+?)\h*(?:#.*)?(?:\n+|$)/',
'setext'=>'/^\h*(.+?)\h*\n([=\-])+\h*(?:\n+|$)/',
'li'=>'/^(?:(?:[*+\-]|\d+\.)\h.+?(?:\n+|$)'.
'(?:(?: {4}|\t)+.+?(?:\n+|$))*)+/s',
'raw'=>'/^((?:|'.
'<(address|article|aside|audio|blockquote|canvas|dd|'.
'div|dl|fieldset|figcaption|figure|footer|form|h\d|'.
'header|hgroup|hr|noscript|object|ol|output|p|pre|'.
'section|table|tfoot|ul|video).*?'.
'(?:\/>|>(?:(?>[^><]+)|(?R))*<\/\2>))'.
'\h*(?:\n{2,}|\n*$)|<[\?%].+?[\?%]>\h*(?:\n?$|\n*))/s',
'p'=>'/^(.+?(?:\n{2,}|\n*$))/s'
];
}
// Treat lines with nothing but whitespaces as empty lines
$str=preg_replace('/\n\h+(?=\n)/',"\n",$str);
// Initialize block parser
$len=strlen($str);
$ptr=0;
$dst='';
// Main loop
while ($ptr<$len) {
if (preg_match('/^ {0,3}\[([^\[\]]+)\]:\s*(.*?)>?\s*'.
'(?:"([^\n]*)")?(?:\n+|$)/s',substr($str,$ptr),$match)) {
// Reference-style link; Backtrack
$ptr+=strlen($match[0]);
$tmp='';
// Catch line breaks in title attribute
$ref=preg_replace('/\h/','\s',preg_quote($match[1],'/'));
while ($dst!=$tmp) {
$dst=preg_replace_callback(
'/(?esc($match[2]).'"'.
(empty($match[3])?
'':
(' title="'.
$this->esc($match[3]).'"')).'>'.
// Link
$this->scan(
empty($expr[3])?
(empty($expr[1])?
$expr[4]:
$expr[1]):
$expr[3]
).''):
// Image
('
');
},
$tmp=$dst
);
}
}
else
foreach ($this->blocks as $func=>$regex)
if (preg_match($regex,substr($str,$ptr),$match)) {
$ptr+=strlen($match[0]);
$dst.=call_user_func_array(
[$this,'_'.$func],
count($match)>1?array_slice($match,1):$match
);
break;
}
}
return $dst;
}
/**
* Render HTML equivalent of markdown
* @return string
* @param $txt string
**/
function convert($txt) {
$txt=preg_replace_callback(
'/(.+?<\/code>|'.
'<[^>\n]+>|\([^\n\)]+\)|"[^"\n]+")|'.
'\\\\(.)/s',
function($expr) {
// Process escaped characters
return empty($expr[1])?$expr[2]:$expr[1];
},
$this->build(preg_replace('/\r\n|\r/',"\n",$txt))
);
return $this->snip($txt);
}
}