From 7c8d817e4c95b96468fac76177e652dace28f682 Mon Sep 17 00:00:00 2001 From: Robert Sesek Date: Tue, 3 Apr 2007 19:24:28 +0000 Subject: [PATCH] Markdown parser in progress.... --- Markdown.php | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) diff --git a/Markdown.php b/Markdown.php index 9da0d21..0526275 100644 --- a/Markdown.php +++ b/Markdown.php @@ -19,6 +19,8 @@ || ################################################################### \*=====================================================================*/ +require_once('ISSO/Functions.php'); + /** * Markdown * @@ -34,6 +36,36 @@ */ class BSMarkdown { + /** + * Remove HTML rather than ignoring it? + * @var bool + */ + public $removeHtml = false; + + /** + * Map of HTML blocks extracted from the text + * @var array + */ + private $htmlBlockMap = array(); + + /** + * The working parser text that at any given moment is only partially parsed + * @var string + */ + private $text; + + /** + * HTML block element tags + * @var array + */ + private $htmlBockTags = array( + 'p', 'div', 'blockquote', + 'pre', 'table', 'form', 'ol', + 'ul', 'h1', 'h2', 'h3', 'h4', + 'h5', 'h6', 'iframe', 'script', + 'noscript' + ); + // ################################################################### /** * Quick parsing function that uses the system defaults for parsing. @@ -44,7 +76,199 @@ class BSMarkdown */ public static function Parse($text) { + $parser = new BSMarkdown(); + return $parser->transform($text); + } + + // ################################################################### + /** + * Resets the parser + */ + public function __construct() + { + $this->htmlBlockMap = array(); + $this->text = ''; + } + + // ################################################################### + /** + * Transforms the Markdown text into XHTML with the parser's set options + * + * @param string Text to transform + * + * @return string HTML output + */ + public function transform($text) + { + // reset the data arrays + $this->__construct(); + + // convert line breaks and remove empty lines of whitespace + $this->text = BSFunctions::ConvertLineBreaks($text); + $this->text = preg_replace('/^\s*?$/m', '', $this->text); + + $this->_extractHtmlBlocks(); + + $this->_convertHardLineBreaks(); + $this->_convertAtxHeaders(); + $this->_convertSetextHeaders(); + $this->_formatParagraphs(); + $this->text = $this->_expandHtmlBlocks($this->text); + + // convert entitites + $this->_convertFloatingEntities(); + + return $this->text; + } + + // ################################################################### + /** + * Description of the function + * + * @param string A string param + * + * @return integer Return value + */ + private function _extractHtmlBlocks() + { + $start = -1; + $blockStart = 0; + $nest = array(); + $matches = array(); + + // find the first insance of a block tag + $regex = implode('|', $this->htmlBockTags); + while ($start <= strlen($this->text)) + { + $start++; + if (preg_match("/^<($regex)/i", substr($this->text, $start)) === 1) + { + if (sizeof($nest) == 0) + { + $blockStart = $start; + } + array_push($nest, $start); + } + else if (preg_match("#^#i", substr($this->text, $start), $matches) === 1) + { + array_pop($nest); + if (sizeof($nest) == 0) + { + $block = substr($this->text, $blockStart, $start - $blockStart + strlen($matches[0])); + $hash = md5($block . microtime()); + $this->htmlBlockMap[$hash] = $block; + $this->text = substr_replace($this->text, $hash, $blockStart, strlen($block)); + $start = $blockStart; + } + } + } + } + + // ################################################################### + /** + * Expands the hashed HTML blocks back into their originial form + */ + private function _expandHtmlBlocks($text) + { + return str_replace(array_keys($this->htmlBlockMap), array_values($this->htmlBlockMap), $text); + } + + // ################################################################### + /** + * Converts text surrounded by #sings to headers (## Heading 2) + */ + private function _convertAtxHeaders() + { + $this->text = preg_replace_callback('/^(\#{1,6})\s*(.+)(\s*\#+)?$/', array(&$this, '_convertAtxHeadersCallback'), $this->text); + } + + // ################################################################### + /** + * Callback function for preg_replace() in _convertAtxHeaders() + * + * @param array Matches + */ + private function _convertAtxHeadersCallback($matches) + { + var_dump($matches); + $html = '' . $this->_expandHtmlBlocks($matches[2]) . ''; + $hash = md5($html . microtime()); + $this->htmlBlockMap[$hash] = $html; + return $hash; + } + + // ################################################################### + /** + * Converts headers that are formed by underlines into headings + */ + private function _convertSetextHeaders() + { + $this->text = preg_replace_callback('/(.+)\n(-|=){1,}$/m', array(&$this, '_convertSetextHeadersCallback'), $this->text); + } + + // ################################################################### + /** + * Callback function for _convertSetextHeaders(). This does the actual + * conversion and then hashes it into a block + * + * @param array Matches from the preg_replace_callback() + */ + private function _convertSetextHeadersCallback($matches) + { + $text = $this->_expandHtmlBlocks($matches[1]); + if ($matches[2][0] == '=') + { + $text = '

' . $text . '

'; + } + else + { + $text = '

' . $text . '

'; + } + $hash = md5($text . microtime()); + $this->htmlBlockMap[$hash] = $text; + return $hash; + } + + // ################################################################### + /** + * Wraps blocks into paragraphs + */ + private function _formatParagraphs() + { + $blocks = preg_split('/\n{2,}/', $this->text, -1, PREG_SPLIT_NO_EMPTY); + foreach ($blocks AS $key => $value) + { + if (!isset($this->htmlBlockMap[$value])) + { + $blocks[$key] = "

$value

"; + } + } + + $this->text = implode("\n\n", $blocks); + } + + // ################################################################### + /** + * Converts two spaces followed by a new line with text on it to + * a hard line break (
) + */ + private function _convertHardLineBreaks() + { + $this->text = preg_replace('/ {2,}\n/m', "
\n", $this->text); + } + + // ################################################################### + /** + * Converts all angle brackets and ampersands that are floating into + * HTML entities + */ + private function _convertFloatingEntities() + { + // encode ampersands + $this->text = preg_replace('/&(?!#?x?[0-9a-z]+;)/i', '&', $this->text); + // encode brackets who aren't followed by text + $this->text = preg_replace('#<(?![/?a-z])#i', '<', $this->text); } } -- 2.22.5