From 7c8d817e4c95b96468fac76177e652dace28f682 Mon Sep 17 00:00:00 2001
From: Robert Sesek <rsesek@bluestatic.org>
Date: Tue, 3 Apr 2007 19:24:28 +0000
Subject: [PATCH] Markdown parser in progress....

---
 Markdown.php | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 224 insertions(+)

diff --git a/Markdown.php b/Markdown.php
index 9da0d21..0526275 100644
--- a/Markdown.php
+++ b/Markdown.php
@@ -19,6 +19,8 @@
 || ###################################################################
 \*=====================================================================*/
 
+require_once('ISSO/Functions.php');
+
 /**
 * Markdown
 *
@@ -34,6 +36,36 @@
 */
 class BSMarkdown
 {
+	/**
+	* Remove HTML rather than ignoring it?
+	* @var bool
+	*/
+	public $removeHtml = false;
+	
+	/**
+	* Map of HTML blocks extracted from the text
+	* @var array
+	*/
+	private $htmlBlockMap = array();
+	
+	/**
+	* The working parser text that at any given moment is only partially parsed
+	* @var string
+	*/
+	private $text;
+	
+	/**
+	* HTML block element tags
+	* @var array
+	*/
+	private $htmlBockTags = array(
+		'p', 'div', 'blockquote',
+		'pre', 'table', 'form', 'ol',
+		'ul', 'h1', 'h2', 'h3', 'h4',
+		'h5', 'h6', 'iframe', 'script',
+		'noscript'
+	);
+	
 	// ###################################################################
 	/**
 	* Quick parsing function that uses the system defaults for parsing.
@@ -44,7 +76,199 @@ class BSMarkdown
 	*/
 	public static function Parse($text)
 	{
+		$parser = new BSMarkdown();
+		return $parser->transform($text);
+	}
+	
+	// ###################################################################
+	/**
+	* Resets the parser
+	*/
+	public function __construct()
+	{
+		$this->htmlBlockMap = array();
+		$this->text = '';
+	}
+	
+	// ###################################################################
+	/**
+	* Transforms the Markdown text into XHTML with the parser's set options
+	*
+	* @param	string	Text to transform
+	*
+	* @return	string	HTML output
+	*/
+	public function transform($text)
+	{
+		// reset the data arrays
+		$this->__construct();
+		
+		// convert line breaks and remove empty lines of whitespace
+		$this->text = BSFunctions::ConvertLineBreaks($text);
+		$this->text = preg_replace('/^\s*?$/m', '', $this->text);
+		
+		$this->_extractHtmlBlocks();
+		
+		$this->_convertHardLineBreaks();
+		$this->_convertAtxHeaders();
+		$this->_convertSetextHeaders();
+		$this->_formatParagraphs();
+		$this->text = $this->_expandHtmlBlocks($this->text);
+		
+		// convert entitites
+		$this->_convertFloatingEntities();
+		
+		return $this->text;
+	}
+	
+	// ###################################################################
+	/**
+	* Description of the function
+	*
+	* @param	string	A string param
+	*
+	* @return	integer	Return value
+	*/
+	private function _extractHtmlBlocks()
+	{
+		$start = -1;
+		$blockStart = 0;
+		$nest = array();
+		$matches = array();
+		
+		// find the first insance of a block tag
+		$regex = implode('|', $this->htmlBockTags);
+		while ($start <= strlen($this->text))
+		{
+			$start++;
+			if (preg_match("/^<($regex)/i", substr($this->text, $start)) === 1)
+			{
+				if (sizeof($nest) == 0)
+				{
+					$blockStart = $start;
+				}
+				array_push($nest, $start);
+			}
+			else if (preg_match("#^</($regex)>#i", substr($this->text, $start), $matches) === 1)
+			{
+				array_pop($nest);
+				if (sizeof($nest) == 0)
+				{
+					$block = substr($this->text, $blockStart, $start - $blockStart + strlen($matches[0]));
+					$hash = md5($block . microtime());
+					$this->htmlBlockMap[$hash] = $block;
+					$this->text = substr_replace($this->text, $hash, $blockStart, strlen($block));
+					$start = $blockStart;
+				}
+			}
+		}
+	}
+	
+	// ###################################################################
+	/**
+	* Expands the hashed HTML blocks back into their originial form
+	*/
+	private function _expandHtmlBlocks($text)
+	{
+		return str_replace(array_keys($this->htmlBlockMap), array_values($this->htmlBlockMap), $text);
+	}
+	
+	// ###################################################################
+	/**
+	* Converts text surrounded by #sings to headers (## Heading 2)
+	*/
+	private function _convertAtxHeaders()
+	{
+		$this->text = preg_replace_callback('/^(\#{1,6})\s*(.+)(\s*\#+)?$/', array(&$this, '_convertAtxHeadersCallback'), $this->text);
+	}
+	
+	// ###################################################################
+	/**
+	* Callback function for preg_replace() in _convertAtxHeaders()
+	*
+	* @param	array	Matches
+	*/
+	private function _convertAtxHeadersCallback($matches)
+	{
+		var_dump($matches);
+		$html = '<h' . strlen($matches[1]) . '>' . $this->_expandHtmlBlocks($matches[2]) . '</h' . strlen($matches[1]) . '>';
+		$hash = md5($html . microtime());
+		$this->htmlBlockMap[$hash] = $html;
+		return $hash;
+	}
+	
+	// ###################################################################
+	/**
+	* Converts headers that are formed by underlines into headings
+	*/
+	private function _convertSetextHeaders()
+	{
+		$this->text = preg_replace_callback('/(.+)\n(-|=){1,}$/m', array(&$this, '_convertSetextHeadersCallback'), $this->text);
+	}
+	
+	// ###################################################################
+	/**
+	* Callback function for _convertSetextHeaders(). This does the actual
+	* conversion and then hashes it into a block
+	*
+	* @param	array	Matches from the preg_replace_callback()
+	*/
+	private function _convertSetextHeadersCallback($matches)
+	{
+		$text = $this->_expandHtmlBlocks($matches[1]);
+		if ($matches[2][0] == '=')
+		{
+			$text = '<h1>' . $text . '</h1>';
+		}
+		else
+		{
+			$text = '<h2>' . $text . '</h2>';
+		}
+		$hash = md5($text . microtime());
+		$this->htmlBlockMap[$hash] = $text;
+		return $hash;
+	}
+	
+	// ###################################################################
+	/**
+	* Wraps blocks into paragraphs
+	*/
+	private function _formatParagraphs()
+	{
+		$blocks = preg_split('/\n{2,}/', $this->text, -1, PREG_SPLIT_NO_EMPTY);
+		foreach ($blocks AS $key => $value)
+		{
+			if (!isset($this->htmlBlockMap[$value]))
+			{
+				$blocks[$key] = "<p>$value</p>";
+			}
+		}
+		
+		$this->text = implode("\n\n", $blocks);
+	}
+	
+	// ###################################################################
+	/**
+	* Converts two spaces followed by a new line with text on it to
+	* a hard line break (<br />)
+	*/
+	private function _convertHardLineBreaks()
+	{
+		$this->text = preg_replace('/ {2,}\n/m', " <br />\n", $this->text);
+	}
+	
+	// ###################################################################
+	/**
+	* Converts all angle brackets and ampersands that are floating into
+	* HTML entities
+	*/
+	private function _convertFloatingEntities()
+	{
+		// encode ampersands
+		$this->text = preg_replace('/&(?!#?x?[0-9a-z]+;)/i', '&amp;', $this->text);
 		
+		// encode brackets who aren't followed by text
+		$this->text = preg_replace('#<(?![/?a-z])#i', '&lt;', $this->text);
 	}
 }
 
-- 
2.43.5