Added support for horizontal rules in Markdown
[isso.git] / Markdown.php
1 <?php
2 /*=====================================================================*\
3 || ###################################################################
4 || # Blue Static ISSO Framework
5 || # Copyright ©2002-[#]year[#] Blue Static
6 || #
7 || # This program is free software; you can redistribute it and/or modify
8 || # it under the terms of the GNU General Public License as published by
9 || # the Free Software Foundation; version [#]gpl[#] of the License.
10 || #
11 || # This program is distributed in the hope that it will be useful, but
12 || # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 || # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 || # more details.
15 || #
16 || # You should have received a copy of the GNU General Public License along
17 || # with this program; if not, write to the Free Software Foundation, Inc.,
18 || # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 || ###################################################################
20 \*=====================================================================*/
21
22 require_once('ISSO/Functions.php');
23
24 /**
25 * Markdown
26 *
27 * This class is a PHP implementation of John Gruber's Markdown text processing
28 * system. It is not a port of his Perl version, but rather a creation based
29 * on the rules outlined here: http://daringfireball.net/projects/markdown/.
30 *
31 * @author Blue Static
32 * @copyright Copyright (c)2002 - [#]year[#], Blue Static
33 * @version $Revision$
34 * @package ISSO
35 *
36 */
37 class BSMarkdown
38 {
39 /**
40 * Remove HTML rather than ignoring it?
41 * @var bool
42 */
43 public $removeHtml = false;
44
45 /**
46 * Map of HTML blocks extracted from the text
47 * @var array
48 */
49 private $htmlBlockMap = array();
50
51 /**
52 * The working parser text that at any given moment is only partially parsed
53 * @var string
54 */
55 private $text;
56
57 /**
58 * HTML block element tags
59 * @var array
60 */
61 private $htmlBockTags = array(
62 'p', 'div', 'blockquote',
63 'pre', 'table', 'form', 'ol',
64 'ul', 'h1', 'h2', 'h3', 'h4',
65 'h5', 'h6', 'iframe', 'script',
66 'noscript'
67 );
68
69 // ###################################################################
70 /**
71 * Quick parsing function that uses the system defaults for parsing.
72 *
73 * @param string Original text
74 *
75 * @return string HTML output
76 */
77 public static function Parse($text)
78 {
79 $parser = new BSMarkdown();
80 return $parser->transform($text);
81 }
82
83 // ###################################################################
84 /**
85 * Resets the parser
86 */
87 public function __construct()
88 {
89 $this->htmlBlockMap = array();
90 $this->text = '';
91 }
92
93 // ###################################################################
94 /**
95 * Transforms the Markdown text into XHTML with the parser's set options
96 *
97 * @param string Text to transform
98 *
99 * @return string HTML output
100 */
101 public function transform($text)
102 {
103 // reset the data arrays
104 $this->__construct();
105
106 // convert line breaks and remove empty lines of whitespace
107 $this->text = BSFunctions::ConvertLineBreaks($text);
108 $this->text = preg_replace('/^\s*?$/m', '', $this->text);
109
110 $this->_extractHtmlBlocks();
111
112 $this->_convertHardLineBreaks();
113 $this->_convertAtxHeaders();
114 $this->_convertSetextHeaders();
115 $this->_convertHorizontalRules();
116 $this->_formatParagraphs();
117 $this->text = $this->_expandHtmlBlocks($this->text);
118
119 // convert entitites
120 $this->_convertFloatingEntities();
121
122 return $this->text;
123 }
124
125 // ###################################################################
126 /**
127 * Description of the function
128 *
129 * @param string A string param
130 *
131 * @return integer Return value
132 */
133 private function _extractHtmlBlocks()
134 {
135 $start = -1;
136 $blockStart = 0;
137 $nest = array();
138 $matches = array();
139
140 // find the first insance of a block tag
141 $regex = implode('|', $this->htmlBockTags);
142 while ($start <= strlen($this->text))
143 {
144 $start++;
145 if (preg_match("/^<($regex)/i", substr($this->text, $start)) === 1)
146 {
147 if (sizeof($nest) == 0)
148 {
149 $blockStart = $start;
150 }
151 array_push($nest, $start);
152 }
153 else if (preg_match("#^</($regex)>#i", substr($this->text, $start), $matches) === 1)
154 {
155 array_pop($nest);
156 if (sizeof($nest) == 0)
157 {
158 $block = substr($this->text, $blockStart, $start - $blockStart + strlen($matches[0]));
159 $hash = md5($block . microtime());
160 $this->htmlBlockMap[$hash] = $block;
161 $this->text = substr_replace($this->text, $hash, $blockStart, strlen($block));
162 $start = $blockStart;
163 }
164 }
165 }
166 }
167
168 // ###################################################################
169 /**
170 * Expands the hashed HTML blocks back into their originial form
171 */
172 private function _expandHtmlBlocks($text)
173 {
174 return str_replace(array_keys($this->htmlBlockMap), array_values($this->htmlBlockMap), $text);
175 }
176
177 // ###################################################################
178 /**
179 * Converts text surrounded by #sings to headers (## Heading 2)
180 */
181 private function _convertAtxHeaders()
182 {
183 $this->text = preg_replace_callback('/^(\#{1,6})\s*(.+)(\s*\#+)?$/', array(&$this, '_convertAtxHeadersCallback'), $this->text);
184 }
185
186 // ###################################################################
187 /**
188 * Callback function for preg_replace() in _convertAtxHeaders()
189 *
190 * @param array Matches
191 */
192 private function _convertAtxHeadersCallback($matches)
193 {
194 var_dump($matches);
195 $html = '<h' . strlen($matches[1]) . '>' . $this->_expandHtmlBlocks($matches[2]) . '</h' . strlen($matches[1]) . '>';
196 $hash = md5($html . microtime());
197 $this->htmlBlockMap[$hash] = $html;
198 return $hash;
199 }
200
201 // ###################################################################
202 /**
203 * Converts headers that are formed by underlines into headings
204 */
205 private function _convertSetextHeaders()
206 {
207 $this->text = preg_replace_callback('/(.+)\n(-|=){1,}$/m', array(&$this, '_convertSetextHeadersCallback'), $this->text);
208 }
209
210 // ###################################################################
211 /**
212 * Callback function for _convertSetextHeaders(). This does the actual
213 * conversion and then hashes it into a block
214 *
215 * @param array Matches from the preg_replace_callback()
216 */
217 private function _convertSetextHeadersCallback($matches)
218 {
219 $text = $this->_expandHtmlBlocks($matches[1]);
220 if ($matches[2][0] == '=')
221 {
222 $text = '<h1>' . $text . '</h1>';
223 }
224 else
225 {
226 $text = '<h2>' . $text . '</h2>';
227 }
228 $hash = md5($text . microtime());
229 $this->htmlBlockMap[$hash] = $text;
230 return $hash;
231 }
232
233 // ###################################################################
234 /**
235 * Converts three stars or dashes (optionally separated by spaces) into
236 * a horizontal rule.
237 */
238 private function _convertHorizontalRules()
239 {
240 $hash = md5('<hr />' . microtime());
241 $count = 0;
242 $this->text = preg_replace('/((-|\*) ?){3,}/', $hash, $this->text, -1, $count);
243 if ($count > 0)
244 {
245 $this->htmlBlockMap[$hash] = '<hr />';
246 }
247 }
248
249 // ###################################################################
250 /**
251 * Wraps blocks into paragraphs
252 */
253 private function _formatParagraphs()
254 {
255 $blocks = preg_split('/\n{2,}/', $this->text, -1, PREG_SPLIT_NO_EMPTY);
256 foreach ($blocks AS $key => $value)
257 {
258 if (!isset($this->htmlBlockMap[$value]))
259 {
260 $blocks[$key] = "<p>$value</p>";
261 }
262 }
263
264 $this->text = implode("\n\n", $blocks);
265 }
266
267 // ###################################################################
268 /**
269 * Converts two spaces followed by a new line with text on it to
270 * a hard line break (<br />)
271 */
272 private function _convertHardLineBreaks()
273 {
274 $this->text = preg_replace('/ {2,}\n/m', " <br />\n", $this->text);
275 }
276
277 // ###################################################################
278 /**
279 * Converts all angle brackets and ampersands that are floating into
280 * HTML entities
281 */
282 private function _convertFloatingEntities()
283 {
284 // encode ampersands
285 $this->text = preg_replace('/&(?!#?x?[0-9a-z]+;)/i', '&amp;', $this->text);
286
287 // encode brackets who aren't followed by text
288 $this->text = preg_replace('#<(?![/?a-z])#i', '&lt;', $this->text);
289 }
290 }
291
292 /*=====================================================================*\
293 || ###################################################################
294 || # $HeadURL$
295 || # $Id$
296 || ###################################################################
297 \*=====================================================================*/
298 ?>