Markdown parser in progress....
[isso.git] / Markdown.php
1 <?php
2 /*=====================================================================*\
3 || ###################################################################
4 || # Blue Static ISSO Framework
5 || # Copyright ©2002-[#]year[#] Blue Static
6 || #
7 || # This program is free software; you can redistribute it and/or modify
8 || # it under the terms of the GNU General Public License as published by
9 || # the Free Software Foundation; version [#]gpl[#] of the License.
10 || #
11 || # This program is distributed in the hope that it will be useful, but
12 || # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 || # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 || # more details.
15 || #
16 || # You should have received a copy of the GNU General Public License along
17 || # with this program; if not, write to the Free Software Foundation, Inc.,
18 || # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 || ###################################################################
20 \*=====================================================================*/
21
22 require_once('ISSO/Functions.php');
23
24 /**
25 * Markdown
26 *
27 * This class is a PHP implementation of John Gruber's Markdown text processing
28 * system. It is not a port of his Perl version, but rather a creation based
29 * on the rules outlined here: http://daringfireball.net/projects/markdown/.
30 *
31 * @author Blue Static
32 * @copyright Copyright (c)2002 - [#]year[#], Blue Static
33 * @version $Revision$
34 * @package ISSO
35 *
36 */
37 class BSMarkdown
38 {
39 /**
40 * Remove HTML rather than ignoring it?
41 * @var bool
42 */
43 public $removeHtml = false;
44
45 /**
46 * Map of HTML blocks extracted from the text
47 * @var array
48 */
49 private $htmlBlockMap = array();
50
51 /**
52 * The working parser text that at any given moment is only partially parsed
53 * @var string
54 */
55 private $text;
56
57 /**
58 * HTML block element tags
59 * @var array
60 */
61 private $htmlBockTags = array(
62 'p', 'div', 'blockquote',
63 'pre', 'table', 'form', 'ol',
64 'ul', 'h1', 'h2', 'h3', 'h4',
65 'h5', 'h6', 'iframe', 'script',
66 'noscript'
67 );
68
69 // ###################################################################
70 /**
71 * Quick parsing function that uses the system defaults for parsing.
72 *
73 * @param string Original text
74 *
75 * @return string HTML output
76 */
77 public static function Parse($text)
78 {
79 $parser = new BSMarkdown();
80 return $parser->transform($text);
81 }
82
83 // ###################################################################
84 /**
85 * Resets the parser
86 */
87 public function __construct()
88 {
89 $this->htmlBlockMap = array();
90 $this->text = '';
91 }
92
93 // ###################################################################
94 /**
95 * Transforms the Markdown text into XHTML with the parser's set options
96 *
97 * @param string Text to transform
98 *
99 * @return string HTML output
100 */
101 public function transform($text)
102 {
103 // reset the data arrays
104 $this->__construct();
105
106 // convert line breaks and remove empty lines of whitespace
107 $this->text = BSFunctions::ConvertLineBreaks($text);
108 $this->text = preg_replace('/^\s*?$/m', '', $this->text);
109
110 $this->_extractHtmlBlocks();
111
112 $this->_convertHardLineBreaks();
113 $this->_convertAtxHeaders();
114 $this->_convertSetextHeaders();
115 $this->_formatParagraphs();
116 $this->text = $this->_expandHtmlBlocks($this->text);
117
118 // convert entitites
119 $this->_convertFloatingEntities();
120
121 return $this->text;
122 }
123
124 // ###################################################################
125 /**
126 * Description of the function
127 *
128 * @param string A string param
129 *
130 * @return integer Return value
131 */
132 private function _extractHtmlBlocks()
133 {
134 $start = -1;
135 $blockStart = 0;
136 $nest = array();
137 $matches = array();
138
139 // find the first insance of a block tag
140 $regex = implode('|', $this->htmlBockTags);
141 while ($start <= strlen($this->text))
142 {
143 $start++;
144 if (preg_match("/^<($regex)/i", substr($this->text, $start)) === 1)
145 {
146 if (sizeof($nest) == 0)
147 {
148 $blockStart = $start;
149 }
150 array_push($nest, $start);
151 }
152 else if (preg_match("#^</($regex)>#i", substr($this->text, $start), $matches) === 1)
153 {
154 array_pop($nest);
155 if (sizeof($nest) == 0)
156 {
157 $block = substr($this->text, $blockStart, $start - $blockStart + strlen($matches[0]));
158 $hash = md5($block . microtime());
159 $this->htmlBlockMap[$hash] = $block;
160 $this->text = substr_replace($this->text, $hash, $blockStart, strlen($block));
161 $start = $blockStart;
162 }
163 }
164 }
165 }
166
167 // ###################################################################
168 /**
169 * Expands the hashed HTML blocks back into their originial form
170 */
171 private function _expandHtmlBlocks($text)
172 {
173 return str_replace(array_keys($this->htmlBlockMap), array_values($this->htmlBlockMap), $text);
174 }
175
176 // ###################################################################
177 /**
178 * Converts text surrounded by #sings to headers (## Heading 2)
179 */
180 private function _convertAtxHeaders()
181 {
182 $this->text = preg_replace_callback('/^(\#{1,6})\s*(.+)(\s*\#+)?$/', array(&$this, '_convertAtxHeadersCallback'), $this->text);
183 }
184
185 // ###################################################################
186 /**
187 * Callback function for preg_replace() in _convertAtxHeaders()
188 *
189 * @param array Matches
190 */
191 private function _convertAtxHeadersCallback($matches)
192 {
193 var_dump($matches);
194 $html = '<h' . strlen($matches[1]) . '>' . $this->_expandHtmlBlocks($matches[2]) . '</h' . strlen($matches[1]) . '>';
195 $hash = md5($html . microtime());
196 $this->htmlBlockMap[$hash] = $html;
197 return $hash;
198 }
199
200 // ###################################################################
201 /**
202 * Converts headers that are formed by underlines into headings
203 */
204 private function _convertSetextHeaders()
205 {
206 $this->text = preg_replace_callback('/(.+)\n(-|=){1,}$/m', array(&$this, '_convertSetextHeadersCallback'), $this->text);
207 }
208
209 // ###################################################################
210 /**
211 * Callback function for _convertSetextHeaders(). This does the actual
212 * conversion and then hashes it into a block
213 *
214 * @param array Matches from the preg_replace_callback()
215 */
216 private function _convertSetextHeadersCallback($matches)
217 {
218 $text = $this->_expandHtmlBlocks($matches[1]);
219 if ($matches[2][0] == '=')
220 {
221 $text = '<h1>' . $text . '</h1>';
222 }
223 else
224 {
225 $text = '<h2>' . $text . '</h2>';
226 }
227 $hash = md5($text . microtime());
228 $this->htmlBlockMap[$hash] = $text;
229 return $hash;
230 }
231
232 // ###################################################################
233 /**
234 * Wraps blocks into paragraphs
235 */
236 private function _formatParagraphs()
237 {
238 $blocks = preg_split('/\n{2,}/', $this->text, -1, PREG_SPLIT_NO_EMPTY);
239 foreach ($blocks AS $key => $value)
240 {
241 if (!isset($this->htmlBlockMap[$value]))
242 {
243 $blocks[$key] = "<p>$value</p>";
244 }
245 }
246
247 $this->text = implode("\n\n", $blocks);
248 }
249
250 // ###################################################################
251 /**
252 * Converts two spaces followed by a new line with text on it to
253 * a hard line break (<br />)
254 */
255 private function _convertHardLineBreaks()
256 {
257 $this->text = preg_replace('/ {2,}\n/m', " <br />\n", $this->text);
258 }
259
260 // ###################################################################
261 /**
262 * Converts all angle brackets and ampersands that are floating into
263 * HTML entities
264 */
265 private function _convertFloatingEntities()
266 {
267 // encode ampersands
268 $this->text = preg_replace('/&(?!#?x?[0-9a-z]+;)/i', '&amp;', $this->text);
269
270 // encode brackets who aren't followed by text
271 $this->text = preg_replace('#<(?![/?a-z])#i', '&lt;', $this->text);
272 }
273 }
274
275 /*=====================================================================*\
276 || ###################################################################
277 || # $HeadURL$
278 || # $Id$
279 || ###################################################################
280 \*=====================================================================*/
281 ?>