Markdown parser in progress....
authorRobert Sesek <rsesek@bluestatic.org>
Tue, 3 Apr 2007 19:24:28 +0000 (19:24 +0000)
committerRobert Sesek <rsesek@bluestatic.org>
Tue, 3 Apr 2007 19:24:28 +0000 (19:24 +0000)
Markdown.php

index 9da0d211a56983e7a929143d8d21b26610d61d84..05262758ffd4c94756beb1041892b1ba15e89199 100644 (file)
@@ -19,6 +19,8 @@
 || ###################################################################
 \*=====================================================================*/
 
+require_once('ISSO/Functions.php');
+
 /**
 * Markdown
 *
 */
 class BSMarkdown
 {
+       /**
+       * Remove HTML rather than ignoring it?
+       * @var bool
+       */
+       public $removeHtml = false;
+       
+       /**
+       * Map of HTML blocks extracted from the text
+       * @var array
+       */
+       private $htmlBlockMap = array();
+       
+       /**
+       * The working parser text that at any given moment is only partially parsed
+       * @var string
+       */
+       private $text;
+       
+       /**
+       * HTML block element tags
+       * @var array
+       */
+       private $htmlBockTags = array(
+               'p', 'div', 'blockquote',
+               'pre', 'table', 'form', 'ol',
+               'ul', 'h1', 'h2', 'h3', 'h4',
+               'h5', 'h6', 'iframe', 'script',
+               'noscript'
+       );
+       
        // ###################################################################
        /**
        * Quick parsing function that uses the system defaults for parsing.
@@ -44,7 +76,199 @@ class BSMarkdown
        */
        public static function Parse($text)
        {
+               $parser = new BSMarkdown();
+               return $parser->transform($text);
+       }
+       
+       // ###################################################################
+       /**
+       * Resets the parser
+       */
+       public function __construct()
+       {
+               $this->htmlBlockMap = array();
+               $this->text = '';
+       }
+       
+       // ###################################################################
+       /**
+       * Transforms the Markdown text into XHTML with the parser's set options
+       *
+       * @param        string  Text to transform
+       *
+       * @return       string  HTML output
+       */
+       public function transform($text)
+       {
+               // reset the data arrays
+               $this->__construct();
+               
+               // convert line breaks and remove empty lines of whitespace
+               $this->text = BSFunctions::ConvertLineBreaks($text);
+               $this->text = preg_replace('/^\s*?$/m', '', $this->text);
+               
+               $this->_extractHtmlBlocks();
+               
+               $this->_convertHardLineBreaks();
+               $this->_convertAtxHeaders();
+               $this->_convertSetextHeaders();
+               $this->_formatParagraphs();
+               $this->text = $this->_expandHtmlBlocks($this->text);
+               
+               // convert entitites
+               $this->_convertFloatingEntities();
+               
+               return $this->text;
+       }
+       
+       // ###################################################################
+       /**
+       * Description of the function
+       *
+       * @param        string  A string param
+       *
+       * @return       integer Return value
+       */
+       private function _extractHtmlBlocks()
+       {
+               $start = -1;
+               $blockStart = 0;
+               $nest = array();
+               $matches = array();
+               
+               // find the first insance of a block tag
+               $regex = implode('|', $this->htmlBockTags);
+               while ($start <= strlen($this->text))
+               {
+                       $start++;
+                       if (preg_match("/^<($regex)/i", substr($this->text, $start)) === 1)
+                       {
+                               if (sizeof($nest) == 0)
+                               {
+                                       $blockStart = $start;
+                               }
+                               array_push($nest, $start);
+                       }
+                       else if (preg_match("#^</($regex)>#i", substr($this->text, $start), $matches) === 1)
+                       {
+                               array_pop($nest);
+                               if (sizeof($nest) == 0)
+                               {
+                                       $block = substr($this->text, $blockStart, $start - $blockStart + strlen($matches[0]));
+                                       $hash = md5($block . microtime());
+                                       $this->htmlBlockMap[$hash] = $block;
+                                       $this->text = substr_replace($this->text, $hash, $blockStart, strlen($block));
+                                       $start = $blockStart;
+                               }
+                       }
+               }
+       }
+       
+       // ###################################################################
+       /**
+       * Expands the hashed HTML blocks back into their originial form
+       */
+       private function _expandHtmlBlocks($text)
+       {
+               return str_replace(array_keys($this->htmlBlockMap), array_values($this->htmlBlockMap), $text);
+       }
+       
+       // ###################################################################
+       /**
+       * Converts text surrounded by #sings to headers (## Heading 2)
+       */
+       private function _convertAtxHeaders()
+       {
+               $this->text = preg_replace_callback('/^(\#{1,6})\s*(.+)(\s*\#+)?$/', array(&$this, '_convertAtxHeadersCallback'), $this->text);
+       }
+       
+       // ###################################################################
+       /**
+       * Callback function for preg_replace() in _convertAtxHeaders()
+       *
+       * @param        array   Matches
+       */
+       private function _convertAtxHeadersCallback($matches)
+       {
+               var_dump($matches);
+               $html = '<h' . strlen($matches[1]) . '>' . $this->_expandHtmlBlocks($matches[2]) . '</h' . strlen($matches[1]) . '>';
+               $hash = md5($html . microtime());
+               $this->htmlBlockMap[$hash] = $html;
+               return $hash;
+       }
+       
+       // ###################################################################
+       /**
+       * Converts headers that are formed by underlines into headings
+       */
+       private function _convertSetextHeaders()
+       {
+               $this->text = preg_replace_callback('/(.+)\n(-|=){1,}$/m', array(&$this, '_convertSetextHeadersCallback'), $this->text);
+       }
+       
+       // ###################################################################
+       /**
+       * Callback function for _convertSetextHeaders(). This does the actual
+       * conversion and then hashes it into a block
+       *
+       * @param        array   Matches from the preg_replace_callback()
+       */
+       private function _convertSetextHeadersCallback($matches)
+       {
+               $text = $this->_expandHtmlBlocks($matches[1]);
+               if ($matches[2][0] == '=')
+               {
+                       $text = '<h1>' . $text . '</h1>';
+               }
+               else
+               {
+                       $text = '<h2>' . $text . '</h2>';
+               }
+               $hash = md5($text . microtime());
+               $this->htmlBlockMap[$hash] = $text;
+               return $hash;
+       }
+       
+       // ###################################################################
+       /**
+       * Wraps blocks into paragraphs
+       */
+       private function _formatParagraphs()
+       {
+               $blocks = preg_split('/\n{2,}/', $this->text, -1, PREG_SPLIT_NO_EMPTY);
+               foreach ($blocks AS $key => $value)
+               {
+                       if (!isset($this->htmlBlockMap[$value]))
+                       {
+                               $blocks[$key] = "<p>$value</p>";
+                       }
+               }
+               
+               $this->text = implode("\n\n", $blocks);
+       }
+       
+       // ###################################################################
+       /**
+       * Converts two spaces followed by a new line with text on it to
+       * a hard line break (<br />)
+       */
+       private function _convertHardLineBreaks()
+       {
+               $this->text = preg_replace('/ {2,}\n/m', " <br />\n", $this->text);
+       }
+       
+       // ###################################################################
+       /**
+       * Converts all angle brackets and ampersands that are floating into
+       * HTML entities
+       */
+       private function _convertFloatingEntities()
+       {
+               // encode ampersands
+               $this->text = preg_replace('/&(?!#?x?[0-9a-z]+;)/i', '&amp;', $this->text);
                
+               // encode brackets who aren't followed by text
+               $this->text = preg_replace('#<(?![/?a-z])#i', '&lt;', $this->text);
        }
 }