- In Pagination::constructPageNav() we added the $addParam paramter to not force...
[isso.git] / Markdown.php
1 <?php
2 /*=====================================================================*\
3 || ###################################################################
4 || # Blue Static ISSO Framework
5 || # Copyright ©2002-[#]year[#] Blue Static
6 || #
7 || # This program is free software; you can redistribute it and/or modify
8 || # it under the terms of the GNU General Public License as published by
9 || # the Free Software Foundation; version [#]gpl[#] of the License.
10 || #
11 || # This program is distributed in the hope that it will be useful, but
12 || # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 || # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 || # more details.
15 || #
16 || # You should have received a copy of the GNU General Public License along
17 || # with this program; if not, write to the Free Software Foundation, Inc.,
18 || # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 || ###################################################################
20 \*=====================================================================*/
21
22 require_once('ISSO/Functions.php');
23
24 /**
25 * Markdown
26 *
27 * This class is a PHP implementation of John Gruber's Markdown text processing
28 * system. It is not a port of his Perl version, but rather a creation based
29 * on the rules outlined here: http://daringfireball.net/projects/markdown/.
30 *
31 * @author Blue Static
32 * @copyright Copyright (c)2002 - [#]year[#], Blue Static
33 * @version $Revision$
34 * @package ISSO
35 *
36 */
37 class BSMarkdown
38 {
39 /**
40 * Remove HTML rather than ignoring it?
41 * @var bool
42 */
43 public $removeHtml = false;
44
45 /**
46 * Map of HTML blocks extracted from the text
47 * @var array
48 */
49 private $htmlBlockMap = array();
50
51 /**
52 * The working parser text that at any given moment is only partially parsed
53 * @var string
54 */
55 private $text;
56
57 /**
58 * HTML block element tags
59 * @var array
60 */
61 private $htmlBockTags = array(
62 'p', 'div', 'blockquote',
63 'pre', 'table', 'form', 'ol',
64 'ul', 'h1', 'h2', 'h3', 'h4',
65 'h5', 'h6', 'iframe', 'script',
66 'noscript'
67 );
68
69 /**
70 * Map of link IDs to their link
71 * @var string
72 */
73 private $linkMap = array();
74
75 // ###################################################################
76 /**
77 * Quick parsing function that uses the system defaults for parsing.
78 *
79 * @param string Original text
80 *
81 * @return string HTML output
82 */
83 public static function Parse($text)
84 {
85 $parser = new BSMarkdown();
86 return $parser->transform($text);
87 }
88
89 // ###################################################################
90 /**
91 * Resets the parser
92 */
93 public function __construct()
94 {
95 $this->htmlBlockMap = array();
96 $this->text = '';
97 }
98
99 // ###################################################################
100 /**
101 * Transforms the Markdown text into XHTML with the parser's set options
102 *
103 * @param string Text to transform
104 *
105 * @return string HTML output
106 */
107 public function transform($text)
108 {
109 // reset the data arrays
110 $this->__construct();
111
112 // convert line breaks and remove empty lines of whitespace
113 $this->text = BSFunctions::ConvertLineBreaks($text);
114 $this->text = preg_replace('/^\s*?$/m', '', $this->text);
115
116 $this->_extractHtmlBlocks();
117 $this->_extractLinkMap();
118
119 $this->_convertHardLineBreaks();
120 $this->_convertAtxHeaders();
121 $this->_convertSetextHeaders();
122 $this->_convertHorizontalRules();
123 $this->_formatParagraphs();
124 $this->text = $this->_expandHtmlBlocks($this->text);
125
126 // convert entitites
127 $this->_convertFloatingEntities();
128
129 return $this->text;
130 }
131
132 // ###################################################################
133 /**
134 * Description of the function
135 *
136 * @param string A string param
137 *
138 * @return integer Return value
139 */
140 private function _extractHtmlBlocks()
141 {
142 $start = -1;
143 $blockStart = 0;
144 $nest = array();
145 $matches = array();
146
147 // find the first insance of a block tag
148 $regex = implode('|', $this->htmlBockTags);
149 while ($start <= strlen($this->text))
150 {
151 $start++;
152 if (preg_match("/^<($regex)/i", substr($this->text, $start)) === 1)
153 {
154 if (sizeof($nest) == 0)
155 {
156 $blockStart = $start;
157 }
158 array_push($nest, $start);
159 }
160 else if (preg_match("#^</($regex)>#i", substr($this->text, $start), $matches) === 1)
161 {
162 array_pop($nest);
163 if (sizeof($nest) == 0)
164 {
165 $block = substr($this->text, $blockStart, $start - $blockStart + strlen($matches[0]));
166 $hash = md5($block . microtime());
167 $this->htmlBlockMap[$hash] = $block;
168 $this->text = substr_replace($this->text, $hash, $blockStart, strlen($block));
169 $start = $blockStart;
170 }
171 }
172 }
173 }
174
175 // ###################################################################
176 /**
177 * Expands the hashed HTML blocks back into their originial form
178 */
179 private function _expandHtmlBlocks($text)
180 {
181 return str_replace(array_keys($this->htmlBlockMap), array_values($this->htmlBlockMap), $text);
182 }
183
184 // ###################################################################
185 /**
186 * Extracts all links in the "[id]: link" form
187 */
188 public function _extractLinkMap()
189 {
190 $this->text = preg_replace_callback('/\[(\w+)\]:\s*<?(\S+)>?(\s*(("|\')|\()(.*?)(\4|\)))?\n?/', array(&$this, '_extractLinkMapCallback'), $this->text);
191 }
192
193 // ###################################################################
194 /**
195 * Converts extracted link definitions into the map
196 *
197 * @param array Matches array
198 */
199 private function _extractLinkMapCallback($matches)
200 {
201 $this->linkMap[$matches[1]] = array($matches[2], $matches[6]);
202 }
203
204 // ###################################################################
205 /**
206 * Converts text surrounded by #sings to headers (## Heading 2)
207 */
208 private function _convertAtxHeaders()
209 {
210 $this->text = preg_replace_callback('/^(\#{1,6})\s*(.+)(\s*\#+)?$/', array(&$this, '_convertAtxHeadersCallback'), $this->text);
211 }
212
213 // ###################################################################
214 /**
215 * Callback function for preg_replace() in _convertAtxHeaders()
216 *
217 * @param array Matches
218 */
219 private function _convertAtxHeadersCallback($matches)
220 {
221 // var_dump($matches);
222 $html = '<h' . strlen($matches[1]) . '>' . $this->_expandHtmlBlocks($matches[2]) . '</h' . strlen($matches[1]) . '>';
223 $hash = md5($html . microtime());
224 $this->htmlBlockMap[$hash] = $html;
225 return $hash;
226 }
227
228 // ###################################################################
229 /**
230 * Converts headers that are formed by underlines into headings
231 */
232 private function _convertSetextHeaders()
233 {
234 $this->text = preg_replace_callback('/(.+)\n(-|=){1,}$/m', array(&$this, '_convertSetextHeadersCallback'), $this->text);
235 }
236
237 // ###################################################################
238 /**
239 * Callback function for _convertSetextHeaders(). This does the actual
240 * conversion and then hashes it into a block
241 *
242 * @param array Matches from the preg_replace_callback()
243 */
244 private function _convertSetextHeadersCallback($matches)
245 {
246 $text = $this->_expandHtmlBlocks($matches[1]);
247 if ($matches[2][0] == '=')
248 {
249 $text = '<h1>' . $text . '</h1>';
250 }
251 else
252 {
253 $text = '<h2>' . $text . '</h2>';
254 }
255 $hash = md5($text . microtime());
256 $this->htmlBlockMap[$hash] = $text;
257 return $hash;
258 }
259
260 // ###################################################################
261 /**
262 * Converts three stars or dashes (optionally separated by spaces) into
263 * a horizontal rule.
264 */
265 private function _convertHorizontalRules()
266 {
267 $hash = md5('<hr />' . microtime());
268 $count = 0;
269 $this->text = preg_replace('/((-|\*) ?){3,}/', $hash, $this->text, -1, $count);
270 if ($count > 0)
271 {
272 $this->htmlBlockMap[$hash] = '<hr />';
273 }
274 }
275
276 // ###################################################################
277 /**
278 * Wraps blocks into paragraphs
279 */
280 private function _formatParagraphs()
281 {
282 $blocks = preg_split('/\n{2,}/', $this->text, -1, PREG_SPLIT_NO_EMPTY);
283 foreach ($blocks AS $key => $value)
284 {
285 if (!isset($this->htmlBlockMap[$value]))
286 {
287 $blocks[$key] = "<p>$value</p>";
288 }
289 }
290
291 $this->text = implode("\n\n", $blocks);
292 }
293
294 // ###################################################################
295 /**
296 * Converts two spaces followed by a new line with text on it to
297 * a hard line break (<br />)
298 */
299 private function _convertHardLineBreaks()
300 {
301 $this->text = preg_replace('/ {2,}\n/m', " <br />\n", $this->text);
302 }
303
304 // ###################################################################
305 /**
306 * Converts all angle brackets and ampersands that are floating into
307 * HTML entities
308 */
309 private function _convertFloatingEntities()
310 {
311 // encode ampersands
312 $this->text = preg_replace('/&(?!#?x?[0-9a-z]+;)/i', '&amp;', $this->text);
313
314 // encode brackets who aren't followed by text
315 $this->text = preg_replace('#<(?![/?a-z])#i', '&lt;', $this->text);
316 }
317 }
318
319 /*=====================================================================*\
320 || ###################################################################
321 || # $HeadURL$
322 || # $Id$
323 || ###################################################################
324 \*=====================================================================*/
325 ?>