. */ //! Markdown-to-HTML converter class Markdown extends Prefab { protected //! Parsing rules $blocks, //! Special characters $special; /** * Process blockquote * @return string * @param $str string **/ protected function _blockquote($str) { $str=preg_replace('/(?<=^|\n)\h?>\h?(.*?(?:\n+|$))/','\1',$str); return strlen($str)? ('
'.$this->build($str).'
'."\n\n"):''; } /** * Process whitespace-prefixed code block * @return string * @param $str string **/ protected function _pre($str) { $str=preg_replace('/(?<=^|\n)(?: {4}|\t)(.+?(?:\n+|$))/','\1', $this->esc($str)); return strlen($str)? ('
'.
				$this->esc($this->snip($str)).
			'
'."\n\n"): ''; } /** * Process fenced code block * @return string * @param $hint string * @param $str string **/ protected function _fence($hint,$str) { $str=$this->snip($str); $fw=Base::instance(); if ($fw->HIGHLIGHT) { switch (strtolower($hint)) { case 'php': $str=$fw->highlight($str); break; case 'apache': preg_match_all('/(?<=^|\n)(\h*)'. '(?:(<\/?)(\w+)((?:\h+[^>]+)*)(>)|'. '(?:(\w+)(\h.+?)))(\h*(?:\n+|$))/', $str,$matches,PREG_SET_ORDER); $out=''; foreach ($matches as $match) $out.=$match[1]. ($match[3]? (''. $this->esc($match[2]).$match[3]. ''. ($match[4]? (''. $this->esc($match[4]). ''): ''). ''. $this->esc($match[5]). ''): (''. $match[6]. ''. ''. $this->esc($match[7]). '')). $match[8]; $str=''.$out.''; break; case 'html': preg_match_all( '/(?:(?:<(\/?)(\w+)'. '((?:\h+(?:\w+\h*=\h*)?".+?"|[^>]+)*|'. '\h+.+?)(\h*\/?)>)|(.+?))/s', $str,$matches,PREG_SET_ORDER ); $out=''; foreach ($matches as $match) { if ($match[2]) { $out.='<'. $match[1].$match[2].''; if ($match[3]) { preg_match_all( '/(?:\h+(?:(?:(\w+)\h*=\h*)?'. '(".+?")|(.+)))/', $match[3],$parts,PREG_SET_ORDER ); foreach ($parts as $part) $out.=' '. (empty($part[3])? ((empty($part[1])? '': (''. $part[1].'=')). ''. $part[2].''): (''. $part[3].'')); } $out.=''. $match[4].'>'; } else $out.=$this->esc($match[5]); } $str=''.$out.''; break; case 'ini': preg_match_all( '/(?<=^|\n)(?:'. '(;[^\n]*)|(?:<\?php.+?\?>?)|'. '(?:\[(.+?)\])|'. '(.+?)(\h*=\h*)'. '((?:\\\\\h*\r?\n|.+?)*)'. ')((?:\r?\n)+|$)/', $str,$matches,PREG_SET_ORDER ); $out=''; foreach ($matches as $match) { if ($match[1]) $out.=''.$match[1]. ''; elseif ($match[2]) $out.='['.$match[2].']'. ''; elseif ($match[3]) $out.=''.$match[3]. ''.$match[4]. ($match[5]? (''. $match[5].''):''); else $out.=$match[0]; if (isset($match[6])) $out.=$match[6]; } $str=''.$out.''; break; default: $str=''.$this->esc($str).''; break; } } else $str=''.$this->esc($str).''; return '
'.$str.'
'."\n\n"; } /** * Process horizontal rule * @return string **/ protected function _hr() { return '
'."\n\n"; } /** * Process atx-style heading * @return string * @param $type string * @param $str string **/ protected function _atx($type,$str) { $level=strlen($type); return ''. $this->scan($str).''."\n\n"; } /** * Process setext-style heading * @return string * @param $str string * @param $type string **/ protected function _setext($str,$type) { $level=strpos('=-',$type)+1; return ''. $this->scan($str).''."\n\n"; } /** * Process ordered/unordered list * @return string * @param $str string **/ protected function _li($str) { // Initialize list parser $len=strlen($str); $ptr=0; $dst=''; $first=TRUE; $tight=TRUE; $type='ul'; // Main loop while ($ptr<$len) { if (preg_match('/^\h*[*\-](?:\h?[*\-]){2,}(?:\n+|$)/', substr($str,$ptr),$match)) { $ptr+=strlen($match[0]); // Embedded horizontal rule return (strlen($dst)? ('<'.$type.'>'."\n".$dst.''."\n\n"):''). '
'."\n\n".$this->build(substr($str,$ptr)); } elseif (preg_match('/(?<=^|\n)([*+\-]|\d+\.)\h'. '(.+?(?:\n+|$))((?:(?: {4}|\t)+.+?(?:\n+|$))*)/s', substr($str,$ptr),$match)) { $match[3]=preg_replace('/(?<=^|\n)(?: {4}|\t)/','',$match[3]); $found=FALSE; foreach (array_slice($this->blocks,0,-1) as $regex) if (preg_match($regex,$match[3])) { $found=TRUE; break; } // List if ($first) { // First pass if (is_numeric($match[1])) $type='ol'; if (preg_match('/\n{2,}$/',$match[2]. ($found?'':$match[3]))) // Loose structure; Use paragraphs $tight=FALSE; $first=FALSE; } // Strip leading whitespaces $ptr+=strlen($match[0]); $tmp=$this->snip($match[2].$match[3]); if ($tight) { if ($found) $tmp=$match[2].$this->build($this->snip($match[3])); } else $tmp=$this->build($tmp); $dst.='
  • '.$this->scan(trim($tmp)).'
  • '."\n"; } } return strlen($dst)? ('<'.$type.'>'."\n".$dst.''."\n\n"):''; } /** * Ignore raw HTML * @return string * @param $str string **/ protected function _raw($str) { return $str; } /** * Process paragraph * @return string * @param $str string **/ protected function _p($str) { $str=trim($str); if (strlen($str)) { if (preg_match('/^(.+?\n)([>#].+)$/s',$str,$parts)) return $this->_p($parts[1]).$this->build($parts[2]); $str=preg_replace_callback( '/([^<>\[]+)?(<[\?%].+?[\?%]>|<.+?>|\[.+?\]\s*\(.+?\))|'. '(.+)/s', function($expr) { $tmp=''; if (isset($expr[4])) $tmp.=$this->esc($expr[4]); else { if (isset($expr[1])) $tmp.=$this->esc($expr[1]); $tmp.=$expr[2]; if (isset($expr[3])) $tmp.=$this->esc($expr[3]); } return $tmp; }, $str ); $str=preg_replace('/\s{2}\r?\n/','
    ',$str); return '

    '.$this->scan($str).'

    '."\n\n"; } return ''; } /** * Process strong/em/strikethrough spans * @return string * @param $str string **/ protected function _text($str) { $tmp=''; while ($str!=$tmp) $str=preg_replace_callback( '/(?<=\s|^)(?'.$expr[4].''; if ($expr[2]) return ''.$expr[4].''; return ''.$expr[4].''; }, preg_replace( '/(?\1', $tmp=$str ) ); return $str; } /** * Process image span * @return string * @param $str string **/ protected function _img($str) { return preg_replace_callback( '/!(?:\[(.+?)\])?\h*\(?(?:\h*"(.*?)"\h*)?\)/', function($expr) { return ''.$this->esc($expr[1]).''; }, $str ); } /** * Process anchor span * @return string * @param $str string **/ protected function _a($str) { return preg_replace_callback( '/(??(?:\h*"(.*?)"\h*)?\)/', function($expr) { return ''.$this->scan($expr[1]).''; }, $str ); } /** * Auto-convert links * @return string * @param $str string **/ protected function _auto($str) { return preg_replace_callback( '/`.*?<(.+?)>.*?`|<(.+?)>/', function($expr) { if (empty($expr[1]) && parse_url($expr[2],PHP_URL_SCHEME)) { $expr[2]=$this->esc($expr[2]); return ''.$expr[2].''; } return $expr[0]; }, $str ); } /** * Process code span * @return string * @param $str string **/ protected function _code($str) { return preg_replace_callback( '/`` (.+?) ``|(?'. $this->esc(empty($expr[1])?$expr[2]:$expr[1]).''; }, $str ); } /** * Convert characters to HTML entities * @return string * @param $str string **/ function esc($str) { if (!$this->special) $this->special=[ '...'=>'…', '(tm)'=>'™', '(r)'=>'®', '(c)'=>'©' ]; foreach ($this->special as $key=>$val) $str=preg_replace('/'.preg_quote($key,'/').'/i',$val,$str); return htmlspecialchars($str,ENT_COMPAT, Base::instance()->ENCODING,FALSE); } /** * Reduce multiple line feeds * @return string * @param $str string **/ protected function snip($str) { return preg_replace('/(?:(?<=\n)\n+)|\n+$/',"\n",$str); } /** * Scan line for convertible spans * @return string * @param $str string **/ function scan($str) { $inline=['img','a','text','auto','code']; foreach ($inline as $func) $str=$this->{'_'.$func}($str); return $str; } /** * Assemble blocks * @return string * @param $str string **/ protected function build($str) { if (!$this->blocks) { // Regexes for capturing entire blocks $this->blocks=[ 'blockquote'=>'/^(?:\h?>\h?.*?(?:\n+|$))+/', 'pre'=>'/^(?:(?: {4}|\t).+?(?:\n+|$))+/', 'fence'=>'/^`{3}\h*(\w+)?.*?[^\n]*\n+(.+?)`{3}[^\n]*'. '(?:\n+|$)/s', 'hr'=>'/^\h*[*_\-](?:\h?[\*_\-]){2,}\h*(?:\n+|$)/', 'atx'=>'/^\h*(#{1,6})\h?(.+?)\h*(?:#.*)?(?:\n+|$)/', 'setext'=>'/^\h*(.+?)\h*\n([=\-])+\h*(?:\n+|$)/', 'li'=>'/^(?:(?:[*+\-]|\d+\.)\h.+?(?:\n+|$)'. '(?:(?: {4}|\t)+.+?(?:\n+|$))*)+/s', 'raw'=>'/^((?:|'. '<(address|article|aside|audio|blockquote|canvas|dd|'. 'div|dl|fieldset|figcaption|figure|footer|form|h\d|'. 'header|hgroup|hr|noscript|object|ol|output|p|pre|'. 'section|table|tfoot|ul|video).*?'. '(?:\/>|>(?:(?>[^><]+)|(?R))*<\/\2>))'. '\h*(?:\n{2,}|\n*$)|<[\?%].+?[\?%]>\h*(?:\n?$|\n*))/s', 'p'=>'/^(.+?(?:\n{2,}|\n*$))/s' ]; } // Treat lines with nothing but whitespaces as empty lines $str=preg_replace('/\n\h+(?=\n)/',"\n",$str); // Initialize block parser $len=strlen($str); $ptr=0; $dst=''; // Main loop while ($ptr<$len) { if (preg_match('/^ {0,3}\[([^\[\]]+)\]:\s*?\s*'. '(?:"([^\n]*)")?(?:\n+|$)/s',substr($str,$ptr),$match)) { // Reference-style link; Backtrack $ptr+=strlen($match[0]); $tmp=''; // Catch line breaks in title attribute $ref=preg_replace('/\h/','\s',preg_quote($match[1],'/')); while ($dst!=$tmp) { $dst=preg_replace_callback( '/(?esc($match[2]).'"'. (empty($match[3])? '': (' title="'. $this->esc($match[3]).'"')).'>'. // Link $this->scan( empty($expr[3])? (empty($expr[1])? $expr[4]: $expr[1]): $expr[3] ).''): // Image (''.
										$this->esc($expr[3]).''); }, $tmp=$dst ); } } else foreach ($this->blocks as $func=>$regex) if (preg_match($regex,substr($str,$ptr),$match)) { $ptr+=strlen($match[0]); $dst.=call_user_func_array( [$this,'_'.$func], count($match)>1?array_slice($match,1):$match ); break; } } return $dst; } /** * Render HTML equivalent of markdown * @return string * @param $txt string **/ function convert($txt) { $txt=preg_replace_callback( '/(.+?<\/code>|'. '<[^>\n]+>|\([^\n\)]+\)|"[^"\n]+")|'. '\\\\(.)/s', function($expr) { // Process escaped characters return empty($expr[1])?$expr[2]:$expr[1]; }, $this->build(preg_replace('/\r\n|\r/',"\n",$txt)) ); return $this->snip($txt); } }