Updating GeSHi to latest version, 1.0.7.6
[viewsvn.git] / includes / geshi / geshi.php
1 <?php
2 /**
3 * GeSHi - Generic Syntax Highlighter
4 *
5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the documentation
6 * at http://qbnz.com/highlighter/documentation.php for more information about how to
7 * use this class.
8 *
9 * For changes, release notes, TODOs etc, see the relevant files in the docs/ directory
10 *
11 * This file is part of GeSHi.
12 *
13 * GeSHi is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * GeSHi is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with GeSHi; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 *
27 * @package core
28 * @author Nigel McNie <nigel@geshi.org>
29 * @copyright Copyright &copy; 2004, 2005, Nigel McNie
30 * @license http://gnu.org/copyleft/gpl.html GNU GPL
31 * @version $Id: geshi.php,v 1.28 2006/01/21 23:31:39 oracleshinoda Exp $
32 *
33 */
34
35 //
36 // GeSHi Constants
37 // You should use these constant names in your programs instead of
38 // their values - you never know when a value may change in a future
39 // version
40 //
41
42 /** The version of this GeSHi file */
43 define('GESHI_VERSION', '1.0.7.6');
44
45 /** Set the correct directory separator */
46 define('GESHI_DIR_SEPARATOR', ('WIN' != substr(PHP_OS, 0, 3)) ? '/' : '\\');
47
48 // Define the root directory for the GeSHi code tree
49 if (!defined('GESHI_ROOT')) {
50 /** The root directory for GeSHi */
51 define('GESHI_ROOT', dirname(__FILE__) . GESHI_DIR_SEPARATOR);
52 }
53 /** The language file directory for GeSHi
54 @access private */
55 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . GESHI_DIR_SEPARATOR);
56
57
58 // Line numbers - use with enable_line_numbers()
59 /** Use no line numbers when building the result */
60 define('GESHI_NO_LINE_NUMBERS', 0);
61 /** Use normal line numbers when building the result */
62 define('GESHI_NORMAL_LINE_NUMBERS', 1);
63 /** Use fancy line numbers when building the result */
64 define('GESHI_FANCY_LINE_NUMBERS', 2);
65
66 // Container HTML type
67 /** Use nothing to surround the source */
68 define('GESHI_HEADER_NONE', 0);
69 /** Use a "div" to surround the source */
70 define('GESHI_HEADER_DIV', 1);
71 /** Use a "pre" to surround the source */
72 define('GESHI_HEADER_PRE', 2);
73
74 // Capatalisation constants
75 /** Lowercase keywords found */
76 define('GESHI_CAPS_NO_CHANGE', 0);
77 /** Uppercase keywords found */
78 define('GESHI_CAPS_UPPER', 1);
79 /** Leave keywords found as the case that they are */
80 define('GESHI_CAPS_LOWER', 2);
81
82 // Link style constants
83 /** Links in the source in the :link state */
84 define('GESHI_LINK', 0);
85 /** Links in the source in the :hover state */
86 define('GESHI_HOVER', 1);
87 /** Links in the source in the :active state */
88 define('GESHI_ACTIVE', 2);
89 /** Links in the source in the :visited state */
90 define('GESHI_VISITED', 3);
91
92 // Important string starter/finisher
93 // Note that if you change these, they should be as-is: i.e., don't
94 // write them as if they had been run through htmlentities()
95 /** The starter for important parts of the source */
96 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
97 /** The ender for important parts of the source */
98 define('GESHI_END_IMPORTANT', '<END GeSHi>');
99
100 /**#@+
101 * @access private
102 */
103 // When strict mode applies for a language
104 /** Strict mode never applies (this is the most common) */
105 define('GESHI_NEVER', 0);
106 /** Strict mode *might* apply, and can be enabled or
107 disabled by {@link GeSHi::enable_strict_mode()} */
108 define('GESHI_MAYBE', 1);
109 /** Strict mode always applies */
110 define('GESHI_ALWAYS', 2);
111
112 // Advanced regexp handling constants, used in language files
113 /** The key of the regex array defining what to search for */
114 define('GESHI_SEARCH', 0);
115 /** The key of the regex array defining what bracket group in a
116 matched search to use as a replacement */
117 define('GESHI_REPLACE', 1);
118 /** The key of the regex array defining any modifiers to the regular expression */
119 define('GESHI_MODIFIERS', 2);
120 /** The key of the regex array defining what bracket group in a
121 matched search to put before the replacement */
122 define('GESHI_BEFORE', 3);
123 /** The key of the regex array defining what bracket group in a
124 matched search to put after the replacement */
125 define('GESHI_AFTER', 4);
126
127 /** Used in language files to mark comments */
128 define('GESHI_COMMENTS', 0);
129
130 // Error detection - use these to analyse faults
131 /** No sourcecode to highlight was specified
132 * @deprecated
133 */
134 define('GESHI_ERROR_NO_INPUT', 1);
135 /** The language specified does not exist */
136 define('GESHI_ERROR_NO_SUCH_LANG', 2);
137 /** GeSHi could not open a file for reading (generally a language file) */
138 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
139 /** The header type passed to {@link GeSHi::set_header_type()} was invalid */
140 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
141 /** The line number type passed to {@link GeSHi::enable_line_numbers()} was invalid */
142 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
143 /**#@-*/
144
145
146 /**
147 * The GeSHi Class.
148 *
149 * Please refer to the documentation for GeSHi 1.0.X that is available
150 * at http://qbnz.com/highlighter/documentation.php for more information
151 * about how to use this class.
152 *
153 * @package core
154 * @author Nigel McNie <nigel@geshi.org>
155 * @copyright Copyright &copy; 2004, 2005 Nigel McNie
156 */
157 class GeSHi
158 {
159 /**#@+
160 * @access private
161 */
162 /**
163 * The source code to highlight
164 * @var string
165 */
166 var $source = '';
167
168 /**
169 * The language to use when highlighting
170 * @var string
171 */
172 var $language = '';
173
174 /**
175 * The data for the language used
176 * @var array
177 */
178 var $language_data = array();
179
180 /**
181 * The path to the language files
182 * @var string
183 */
184 var $language_path = GESHI_LANG_ROOT;
185
186 /**
187 * The error message associated with an error
188 * @var string
189 * @todo check err reporting works
190 */
191 var $error = false;
192
193 /**
194 * Possible error messages
195 * @var array
196 */
197 var $error_messages = array(
198 //GESHI_ERROR_NO_INPUT => 'No source code inputted',
199 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
200 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
201 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
202 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
203 );
204
205 /**
206 * Whether highlighting is strict or not
207 * @var boolean
208 */
209 var $strict_mode = false;
210
211 /**
212 * Whether to use CSS classes in output
213 * @var boolean
214 */
215 var $use_classes = false;
216
217 /**
218 * The type of header to use. Can be one of the following
219 * values:
220 *
221 * <ul>
222 * <li><b>GESHI_HEADER_PRE</b>: Source is outputted in
223 * a &lt;pre&gt; HTML element.</li>
224 * <li><b>GESHI_HEADER_DIV</b>: Source is outputted in
225 * a &lt;div&gt; HTML element.</li>
226 * <li><b>GESHI_HEADER_NONE</b>: No header is outputted.</li>
227 * </ul>
228 *
229 * @var int
230 */
231 var $header_type = GESHI_HEADER_PRE;
232
233 /**
234 * Array of permissions for which lexics should be highlighted
235 * @var array
236 */
237 var $lexic_permissions = array(
238 'KEYWORDS' => array(),
239 'COMMENTS' => array('MULTI' => true),
240 'REGEXPS' => array(),
241 'ESCAPE_CHAR' => true,
242 'BRACKETS' => true,
243 'SYMBOLS' => true,
244 'STRINGS' => true,
245 'NUMBERS' => true,
246 'METHODS' => true,
247 'SCRIPT' => true
248 );
249
250 /**
251 * The time it took to parse the code
252 * @var double
253 */
254 var $time = 0;
255
256 /**
257 * The content of the header block
258 * @var string
259 */
260 var $header_content = '';
261
262 /**
263 * The content of the footer block
264 * @var string
265 */
266 var $footer_content = '';
267
268 /**
269 * The style of the header block
270 * @var string
271 */
272 var $header_content_style = '';
273
274 /**
275 * The style of the footer block
276 * @var string
277 */
278 var $footer_content_style = '';
279
280 /**
281 * The styles for hyperlinks in the code
282 * @var array
283 */
284 var $link_styles = array();
285
286 /**
287 * Whether important blocks should be recognised or not
288 * @var boolean
289 * @deprecated
290 * @todo REMOVE THIS FUNCTIONALITY!
291 */
292 var $enable_important_blocks = false;
293
294 /**
295 * Styles for important parts of the code
296 * @var string
297 * @deprecated
298 * @todo As above - rethink the whole idea of important blocks as it is buggy and
299 * will be hard to implement in 1.2
300 */
301 var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
302
303 /**
304 * Whether CSS IDs should be added to the code
305 * @var boolean
306 */
307 var $add_ids = false;
308
309 /**
310 * Lines that should be highlighted extra
311 * @var array
312 */
313 var $highlight_extra_lines = array();
314
315 /**
316 * Styles of extra-highlighted lines
317 * @var string
318 */
319 var $highlight_extra_lines_style = 'color: #cc0; background-color: #ffc;';
320
321 /**
322 * Number at which line numbers should start at
323 * @var int
324 * @todo Warning documentation about XHTML compliance
325 */
326 var $line_numbers_start = 1;
327
328 /**
329 * The overall style for this code block
330 * @var string
331 */
332 var $overall_style = '';
333
334 /**
335 * The style for the actual code
336 * @var string
337 */
338 var $code_style = 'font-family: \'Courier New\', Courier, monospace; font-weight: normal;';
339
340 /**
341 * The overall class for this code block
342 * @var string
343 */
344 var $overall_class = '';
345
346 /**
347 * The overall ID for this code block
348 * @var string
349 */
350 var $overall_id = '';
351
352 /**
353 * Line number styles
354 * @var string
355 */
356 var $line_style1 = 'font-family: \'Courier New\', Courier, monospace; color: black; font-weight: normal; font-style: normal;';
357
358 /**
359 * Line number styles for fancy lines
360 * @var string
361 */
362 var $line_style2 = 'font-weight: bold;';
363
364 /**
365 * Flag for how line nubmers are displayed
366 * @var boolean
367 */
368 var $line_numbers = GESHI_NO_LINE_NUMBERS;
369
370 /**
371 * The "nth" value for fancy line highlighting
372 * @var int
373 */
374 var $line_nth_row = 0;
375
376 /**
377 * The size of tab stops
378 * @var int
379 */
380 var $tab_width = 8;
381
382 /**
383 * Default target for keyword links
384 * @var string
385 */
386 var $link_target = '';
387
388 /**
389 * The encoding to use for entity encoding
390 * @var string
391 */
392 var $encoding = 'ISO-8859-1';
393
394 /**#@-*/
395
396 /**
397 * Creates a new GeSHi object, with source and language
398 *
399 * @param string The source code to highlight
400 * @param string The language to highlight the source with
401 * @param string The path to the language file directory. <b>This
402 * is deprecated!</b> I've backported the auto path
403 * detection from the 1.1.X dev branch, so now it
404 * should be automatically set correctly. If you have
405 * renamed the language directory however, you will
406 * still need to set the path using this parameter or
407 * {@link GeSHi::set_language_path()}
408 * @since 1.0.0
409 */
410 function GeSHi ($source, $language, $path = '')
411 {
412 $this->set_source($source);
413 $this->set_language_path($path);
414 $this->set_language($language);
415 }
416
417 /**
418 * Returns an error message associated with the last GeSHi operation,
419 * or false if no error has occured
420 *
421 * @return string|false An error message if there has been an error, else false
422 * @since 1.0.0
423 */
424 function error ()
425 {
426 if ($this->error) {
427 $msg = $this->error_messages[$this->error];
428 $debug_tpl_vars = array(
429 '{LANGUAGE}' => $this->language,
430 '{PATH}' => $this->language_path
431 );
432 foreach ($debug_tpl_vars as $tpl => $var) {
433 $msg = str_replace($tpl, $var, $msg);
434 }
435 return "<br /><strong>GeSHi Error:</strong> $msg (code $this->error)<br />";
436 }
437 return false;
438 }
439
440 /**
441 * Gets a human-readable language name (thanks to Simon Patterson
442 * for the idea :))
443 *
444 * @return string The name for the current language
445 * @since 1.0.2
446 */
447 function get_language_name ()
448 {
449 if (GESHI_ERROR_NO_SUCH_LANG == $this->_error) {
450 return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
451 }
452 return $this->language_data['LANG_NAME'];
453 }
454
455 /**
456 * Sets the source code for this object
457 *
458 * @param string The source code to highlight
459 * @since 1.0.0
460 */
461 function set_source ($source)
462 {
463 $this->source = $source;
464 }
465
466 /**
467 * Sets the language for this object
468 *
469 * @param string The name of the language to use
470 * @since 1.0.0
471 */
472 function set_language ($language)
473 {
474 $this->error = false;
475 $this->strict_mode = GESHI_NEVER;
476
477 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
478 $this->language = strtolower($language);
479
480 $file_name = $this->language_path . $this->language . '.php';
481 if (!is_readable($file_name)) {
482 $this->error = GESHI_ERROR_NO_SUCH_LANG;
483 return;
484 }
485 // Load the language for parsing
486 $this->load_language($file_name);
487 }
488
489 /**
490 * Sets the path to the directory containing the language files. Note
491 * that this path is relative to the directory of the script that included
492 * geshi.php, NOT geshi.php itself.
493 *
494 * @param string The path to the language directory
495 * @since 1.0.0
496 * @deprecated The path to the language files should now be automatically
497 * detected, so this method should no longer be needed. The
498 * 1.1.X branch handles manual setting of the path differently
499 * so this method will disappear in 1.2.0.
500 */
501 function set_language_path ($path)
502 {
503 if ($path) {
504 $this->language_path = ('/' == substr($path, strlen($path) - 1, 1)) ? $path : $path . '/';
505 $this->set_language($this->language); // otherwise set_language_path has no effect
506 }
507 }
508
509 /**
510 * Sets the type of header to be used.
511 *
512 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
513 * means more source code but more control over tab width and line-wrapping.
514 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
515 * control. Default is GESHI_HEADER_PRE.
516 *
517 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
518 * should be outputted.
519 *
520 * @param int The type of header to be used
521 * @since 1.0.0
522 */
523 function set_header_type ($type)
524 {
525 if (GESHI_HEADER_DIV != $type && GESHI_HEADER_PRE != $type && GESHI_HEADER_NONE != $type) {
526 $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
527 return;
528 }
529 $this->header_type = $type;
530 }
531
532 /**
533 * Sets the styles for the code that will be outputted
534 * when this object is parsed. The style should be a
535 * string of valid stylesheet declarations
536 *
537 * @param string The overall style for the outputted code block
538 * @param boolean Whether to merge the styles with the current styles or not
539 * @since 1.0.0
540 */
541 function set_overall_style ($style, $preserve_defaults = false)
542 {
543 if (!$preserve_defaults) {
544 $this->overall_style = $style;
545 } else {
546 $this->overall_style .= $style;
547 }
548 }
549
550 /**
551 * Sets the overall classname for this block of code. This
552 * class can then be used in a stylesheet to style this object's
553 * output
554 *
555 * @param string The class name to use for this block of code
556 * @since 1.0.0
557 */
558 function set_overall_class ($class)
559 {
560 $this->overall_class = $class;
561 }
562
563 /**
564 * Sets the overall id for this block of code. This id can then
565 * be used in a stylesheet to style this object's output
566 *
567 * @param string The ID to use for this block of code
568 * @since 1.0.0
569 */
570 function set_overall_id ($id)
571 {
572 $this->overall_id = $id;
573 }
574
575 /**
576 * Sets whether CSS classes should be used to highlight the source. Default
577 * is off, calling this method with no arguments will turn it on
578 *
579 * @param boolean Whether to turn classes on or not
580 * @since 1.0.0
581 */
582 function enable_classes ($flag = true)
583 {
584 $this->use_classes = ($flag) ? true : false;
585 }
586
587 /**
588 * Sets the style for the actual code. This should be a string
589 * containing valid stylesheet declarations. If $preserve_defaults is
590 * true, then styles are merged with the default styles, with the
591 * user defined styles having priority
592 *
593 * Note: Use this method to override any style changes you made to
594 * the line numbers if you are using line numbers, else the line of
595 * code will have the same style as the line number! Consult the
596 * GeSHi documentation for more information about this.
597 *
598 * @param string The style to use for actual code
599 * @param boolean Whether to merge the current styles with the new styles
600 */
601 function set_code_style ($style, $preserve_defaults = false)
602 {
603 if (!$preserve_defaults) {
604 $this->code_style = $style;
605 } else {
606 $this->code_style .= $style;
607 }
608 }
609
610 /**
611 * Sets the styles for the line numbers.
612 *
613 * @param string The style for the line numbers that are "normal"
614 * @param string|boolean If a string, this is the style of the line
615 * numbers that are "fancy", otherwise if boolean then this
616 * defines whether the normal styles should be merged with the
617 * new normal styles or not
618 * @param boolean If set, is the flag for whether to merge the "fancy"
619 * styles with the current styles or not
620 * @since 1.0.2
621 */
622 function set_line_style ($style1, $style2 = '', $preserve_defaults = false)
623 {
624 if (is_bool($style2)) {
625 $preserve_defaults = $style2;
626 $style2 = '';
627 }
628 if (!$preserve_defaults) {
629 $this->line_style1 = $style1;
630 $this->line_style2 = $style2;
631 } else {
632 $this->line_style1 .= $style1;
633 $this->line_style2 .= $style2;
634 }
635 }
636
637 /**
638 * Sets whether line numbers should be displayed.
639 *
640 * Valid values for the first parameter are:
641 *
642 * <ul>
643 * <li><b>GESHI_NO_LINE_NUMBERS</b>: Line numbers will not be displayed</li>
644 * <li><b>GESHI_NORMAL_LINE_NUMBERS</b>: Line numbers will be displayed</li>
645 * <li><b>GESHI_FANCY_LINE_NUMBERS</b>: Fancy line numbers will be displayed</li>
646 * </ul>
647 *
648 * For fancy line numbers, the second parameter is used to signal which lines
649 * are to be fancy. For example, if the value of this parameter is 5 then every
650 * 5th line will be fancy.
651 *
652 * @param int How line numbers should be displayed
653 * @param int Defines which lines are fancy
654 * @since 1.0.0
655 */
656 function enable_line_numbers ($flag, $nth_row = 5)
657 {
658 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
659 && GESHI_FANCY_LINE_NUMBERS != $flag) {
660 $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
661 }
662 $this->line_numbers = $flag;
663 $this->line_nth_row = $nth_row;
664 }
665
666 /**
667 * Sets the style for a keyword group. If $preserve_defaults is
668 * true, then styles are merged with the default styles, with the
669 * user defined styles having priority
670 *
671 * @param int The key of the keyword group to change the styles of
672 * @param string The style to make the keywords
673 * @param boolean Whether to merge the new styles with the old or just
674 * to overwrite them
675 * @since 1.0.0
676 */
677 function set_keyword_group_style ($key, $style, $preserve_defaults = false)
678 {
679 if (!$preserve_defaults) {
680 $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
681 } else {
682 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
683 }
684 }
685
686 /**
687 * Turns highlighting on/off for a keyword group
688 *
689 * @param int The key of the keyword group to turn on or off
690 * @param boolean Whether to turn highlighting for that group on or off
691 * @since 1.0.0
692 */
693 function set_keyword_group_highlighting ( $key, $flag = true )
694 {
695 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
696 }
697
698 /**
699 * Sets the styles for comment groups. If $preserve_defaults is
700 * true, then styles are merged with the default styles, with the
701 * user defined styles having priority
702 *
703 * @param int The key of the comment group to change the styles of
704 * @param string The style to make the comments
705 * @param boolean Whether to merge the new styles with the old or just
706 * to overwrite them
707 * @since 1.0.0
708 */
709 function set_comments_style ($key, $style, $preserve_defaults = false)
710 {
711 if (!$preserve_defaults) {
712 $this->language_data['STYLES']['COMMENTS'][$key] = $style;
713 } else {
714 $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
715 }
716 }
717
718 /**
719 * Turns highlighting on/off for comment groups
720 *
721 * @param int The key of the comment group to turn on or off
722 * @param boolean Whether to turn highlighting for that group on or off
723 * @since 1.0.0
724 */
725 function set_comments_highlighting ($key, $flag = true)
726 {
727 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
728 }
729
730 /**
731 * Sets the styles for escaped characters. If $preserve_defaults is
732 * true, then styles are merged with the default styles, with the
733 * user defined styles having priority
734 *
735 * @param string The style to make the escape characters
736 * @param boolean Whether to merge the new styles with the old or just
737 * to overwrite them
738 * @since 1.0.0
739 */
740 function set_escape_characters_style ($style, $preserve_defaults = false)
741 {
742 if (!$preserve_defaults) {
743 $this->language_data['STYLES']['ESCAPE_CHAR'][0] = $style;
744 } else {
745 $this->language_data['STYLES']['ESCAPE_CHAR'][0] .= $style;
746 }
747 }
748
749 /**
750 * Turns highlighting on/off for escaped characters
751 *
752 * @param boolean Whether to turn highlighting for escape characters on or off
753 * @since 1.0.0
754 */
755 function set_escape_characters_highlighting ($flag = true)
756 {
757 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
758 }
759
760 /**
761 * Sets the styles for brackets. If $preserve_defaults is
762 * true, then styles are merged with the default styles, with the
763 * user defined styles having priority
764 *
765 * This method is DEPRECATED: use set_symbols_style instead.
766 * This method will be removed in 1.2.X
767 *
768 * @param string The style to make the brackets
769 * @param boolean Whether to merge the new styles with the old or just
770 * to overwrite them
771 * @since 1.0.0
772 * @deprecated In favour of set_symbols_style
773 */
774 function set_brackets_style ($style, $preserve_defaults = false)
775 {
776 if (!$preserve_defaults) {
777 $this->language_data['STYLES']['BRACKETS'][0] = $style;
778 } else {
779 $this->language_data['STYLES']['BRACKETS'][0] .= $style;
780 }
781 }
782
783 /**
784 * Turns highlighting on/off for brackets
785 *
786 * This method is DEPRECATED: use set_symbols_highlighting instead.
787 * This method will be remove in 1.2.X
788 *
789 * @param boolean Whether to turn highlighting for brackets on or off
790 * @since 1.0.0
791 * @deprecated In favour of set_symbols_highlighting
792 */
793 function set_brackets_highlighting ($flag)
794 {
795 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
796 }
797
798 /**
799 * Sets the styles for symbols. If $preserve_defaults is
800 * true, then styles are merged with the default styles, with the
801 * user defined styles having priority
802 *
803 * @param string The style to make the symbols
804 * @param boolean Whether to merge the new styles with the old or just
805 * to overwrite them
806 * @since 1.0.1
807 */
808 function set_symbols_style ($style, $preserve_defaults = false)
809 {
810 if (!$preserve_defaults) {
811 $this->language_data['STYLES']['SYMBOLS'][0] = $style;
812 } else {
813 $this->language_data['STYLES']['SYMBOLS'][0] .= $style;
814 }
815 // For backward compatibility
816 $this->set_brackets_style ($style, $preserve_defaults);
817 }
818
819 /**
820 * Turns highlighting on/off for symbols
821 *
822 * @param boolean Whether to turn highlighting for symbols on or off
823 * @since 1.0.0
824 */
825 function set_symbols_highlighting ($flag)
826 {
827 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
828 // For backward compatibility
829 $this->set_brackets_highlighting ($flag);
830 }
831
832 /**
833 * Sets the styles for strings. If $preserve_defaults is
834 * true, then styles are merged with the default styles, with the
835 * user defined styles having priority
836 *
837 * @param string The style to make the escape characters
838 * @param boolean Whether to merge the new styles with the old or just
839 * to overwrite them
840 * @since 1.0.0
841 */
842 function set_strings_style ($style, $preserve_defaults = false)
843 {
844 if (!$preserve_defaults) {
845 $this->language_data['STYLES']['STRINGS'][0] = $style;
846 } else {
847 $this->language_data['STYLES']['STRINGS'][0] .= $style;
848 }
849 }
850
851 /**
852 * Turns highlighting on/off for strings
853 *
854 * @param boolean Whether to turn highlighting for strings on or off
855 * @since 1.0.0
856 */
857 function set_strings_highlighting ($flag)
858 {
859 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
860 }
861
862 /**
863 * Sets the styles for numbers. If $preserve_defaults is
864 * true, then styles are merged with the default styles, with the
865 * user defined styles having priority
866 *
867 * @param string The style to make the numbers
868 * @param boolean Whether to merge the new styles with the old or just
869 * to overwrite them
870 * @since 1.0.0
871 */
872 function set_numbers_style ($style, $preserve_defaults = false)
873 {
874 if (!$preserve_defaults) {
875 $this->language_data['STYLES']['NUMBERS'][0] = $style;
876 } else {
877 $this->language_data['STYLES']['NUMBERS'][0] .= $style;
878 }
879 }
880
881 /**
882 * Turns highlighting on/off for numbers
883 *
884 * @param boolean Whether to turn highlighting for numbers on or off
885 * @since 1.0.0
886 */
887 function set_numbers_highlighting ($flag)
888 {
889 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
890 }
891
892 /**
893 * Sets the styles for methods. $key is a number that references the
894 * appropriate "object splitter" - see the language file for the language
895 * you are highlighting to get this number. If $preserve_defaults is
896 * true, then styles are merged with the default styles, with the
897 * user defined styles having priority
898 *
899 * @param int The key of the object splitter to change the styles of
900 * @param string The style to make the methods
901 * @param boolean Whether to merge the new styles with the old or just
902 * to overwrite them
903 * @since 1.0.0
904 */
905 function set_methods_style ($key, $style, $preserve_defaults = false)
906 {
907 if (!$preserve_defaults) {
908 $this->language_data['STYLES']['METHODS'][$key] = $style;
909 } else {
910 $this->language_data['STYLES']['METHODS'][$key] .= $style;
911 }
912 }
913
914 /**
915 * Turns highlighting on/off for methods
916 *
917 * @param boolean Whether to turn highlighting for methods on or off
918 * @since 1.0.0
919 */
920 function set_methods_highlighting ($flag)
921 {
922 $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
923 }
924
925 /**
926 * Sets the styles for regexps. If $preserve_defaults is
927 * true, then styles are merged with the default styles, with the
928 * user defined styles having priority
929 *
930 * @param string The style to make the regular expression matches
931 * @param boolean Whether to merge the new styles with the old or just
932 * to overwrite them
933 * @since 1.0.0
934 */
935 function set_regexps_style ($key, $style, $preserve_defaults = false)
936 {
937 if (!$preserve_defaults) {
938 $this->language_data['STYLES']['REGEXPS'][$key] = $style;
939 } else {
940 $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
941 }
942 }
943
944 /**
945 * Turns highlighting on/off for regexps
946 *
947 * @param int The key of the regular expression group to turn on or off
948 * @param boolean Whether to turn highlighting for the regular expression group on or off
949 * @since 1.0.0
950 */
951 function set_regexps_highlighting ($key, $flag)
952 {
953 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
954 }
955
956 /**
957 * Sets whether a set of keywords are checked for in a case sensitive manner
958 *
959 * @param int The key of the keyword group to change the case sensitivity of
960 * @param boolean Whether to check in a case sensitive manner or not
961 * @since 1.0.0
962 */
963 function set_case_sensitivity ($key, $case)
964 {
965 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
966 }
967
968 /**
969 * Sets the case that keywords should use when found. Use the constants:
970 *
971 * <ul>
972 * <li><b>GESHI_CAPS_NO_CHANGE</b>: leave keywords as-is</li>
973 * <li><b>GESHI_CAPS_UPPER</b>: convert all keywords to uppercase where found</li>
974 * <li><b>GESHI_CAPS_LOWER</b>: convert all keywords to lowercase where found</li>
975 * </ul>
976 *
977 * @param int A constant specifying what to do with matched keywords
978 * @since 1.0.1
979 * @todo Error check the passed value
980 */
981 function set_case_keywords ($case)
982 {
983 $this->language_data['CASE_KEYWORDS'] = $case;
984 }
985
986 /**
987 * Sets how many spaces a tab is substituted for
988 *
989 * Widths below zero are ignored
990 *
991 * @param int The tab width
992 * @since 1.0.0
993 */
994 function set_tab_width ($width)
995 {
996 $this->tab_width = intval($width);
997 }
998
999 /**
1000 * Enables/disables strict highlighting. Default is off, calling this
1001 * method without parameters will turn it on. See documentation
1002 * for more details on strict mode and where to use it.
1003 *
1004 * @param boolean Whether to enable strict mode or not
1005 * @since 1.0.0
1006 */
1007 function enable_strict_mode ($mode = true)
1008 {
1009 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1010 $this->strict_mode = ($mode) ? true : false;
1011 }
1012 }
1013
1014 /**
1015 * Disables all highlighting
1016 *
1017 * @since 1.0.0
1018 * @todo Rewrite with an array traversal
1019 */
1020 function disable_highlighting ()
1021 {
1022 foreach ($this->lexic_permissions as $key => $value) {
1023 if (is_array($value)) {
1024 foreach ($value as $k => $v) {
1025 $this->lexic_permissions[$key][$k] = false;
1026 }
1027 } else {
1028 $this->lexic_permissions[$key] = false;
1029 }
1030 }
1031 // Context blocks
1032 $this->enable_important_blocks = false;
1033 }
1034
1035 /**
1036 * Enables all highlighting
1037 *
1038 * @since 1.0.0
1039 * @todo Rewrite with array traversal
1040 */
1041 function enable_highlighting ()
1042 {
1043 foreach ($this->lexic_permissions as $key => $value) {
1044 if (is_array($value)) {
1045 foreach ($value as $k => $v) {
1046 $this->lexic_permissions[$key][$k] = true;
1047 }
1048 } else {
1049 $this->lexic_permissions[$key] = true;
1050 }
1051 }
1052 // Context blocks
1053 $this->enable_important_blocks = true;
1054 }
1055
1056 /**
1057 * Given a file extension, this method returns either a valid geshi language
1058 * name, or the empty string if it couldn't be found
1059 *
1060 * @param string The extension to get a language name for
1061 * @param array A lookup array to use instead of the default
1062 * @since 1.0.5
1063 * @todo Re-think about how this method works (maybe make it private and/or make it
1064 * a extension->lang lookup?)
1065 * @todo static?
1066 */
1067 function get_language_name_from_extension ( $extension, $lookup = array() )
1068 {
1069 if ( !$lookup )
1070 {
1071 $lookup = array(
1072 'actionscript' => array('as'),
1073 'ada' => array('a', 'ada', 'adb', 'ads'),
1074 'apache' => array('conf'),
1075 'asm' => array('ash', 'asm'),
1076 'asp' => array('asp'),
1077 'bash' => array('sh'),
1078 'c' => array('c'),
1079 'c_mac' => array('c'),
1080 'caddcl' => array(),
1081 'cadlisp' => array(),
1082 'cpp' => array('cpp'),
1083 'csharp' => array(),
1084 'css' => array('css'),
1085 'delphi' => array('dpk', 'dpr'),
1086 'html4strict' => array('html', 'htm'),
1087 'java' => array('java'),
1088 'javascript' => array('js'),
1089 'lisp' => array('lisp'),
1090 'lua' => array('lua'),
1091 'mpasm' => array(),
1092 'nsis' => array(),
1093 'objc' => array(),
1094 'oobas' => array(),
1095 'oracle8' => array(),
1096 'pascal' => array('pas'),
1097 'perl' => array('pl', 'pm'),
1098 'php' => array('php', 'php5', 'phtml', 'phps'),
1099 'python' => array('py'),
1100 'qbasic' => array('bi'),
1101 'smarty' => array(),
1102 'vb' => array('bas'),
1103 'vbnet' => array(),
1104 'visualfoxpro' => array(),
1105 'xml' => array('xml')
1106 );
1107 }
1108
1109 foreach ($lookup as $lang => $extensions) {
1110 foreach ($extensions as $ext) {
1111 if ($ext == $extension) {
1112 return $lang;
1113 }
1114 }
1115 }
1116 return '';
1117 }
1118
1119 /**
1120 * Given a file name, this method loads its contents in, and attempts
1121 * to set the language automatically. An optional lookup table can be
1122 * passed for looking up the language name. If not specified a default
1123 * table is used
1124 *
1125 * The language table is in the form
1126 * <pre>array(
1127 * 'lang_name' => array('extension', 'extension', ...),
1128 * 'lang_name' ...
1129 * );</pre>
1130 *
1131 * @todo Complete rethink of this and above method
1132 * @since 1.0.5
1133 */
1134 function load_from_file ($file_name, $lookup = array())
1135 {
1136 if (is_readable($file_name)) {
1137 $this->set_source(implode('', file($file_name)));
1138 $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1139 } else {
1140 $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1141 }
1142 }
1143
1144 /**
1145 * Adds a keyword to a keyword group for highlighting
1146 *
1147 * @param int The key of the keyword group to add the keyword to
1148 * @param string The word to add to the keyword group
1149 * @since 1.0.0
1150 */
1151 function add_keyword ($key, $word)
1152 {
1153 $this->language_data['KEYWORDS'][$key][] = $word;
1154 }
1155
1156 /**
1157 * Removes a keyword from a keyword group
1158 *
1159 * @param int The key of the keyword group to remove the keyword from
1160 * @param string The word to remove from the keyword group
1161 * @since 1.0.0
1162 */
1163 function remove_keyword ($key, $word)
1164 {
1165 $this->language_data['KEYWORDS'][$key] =
1166 array_diff($this->language_data['KEYWORDS'][$key], array($word));
1167 }
1168
1169 /**
1170 * Creates a new keyword group
1171 *
1172 * @param int The key of the keyword group to create
1173 * @param string The styles for the keyword group
1174 * @param boolean Whether the keyword group is case sensitive ornot
1175 * @param array The words to use for the keyword group
1176 * @since 1.0.0
1177 */
1178 function add_keyword_group ( $key, $styles, $case_sensitive = true, $words = array() )
1179 {
1180 $words = (array) $words;
1181 $this->language_data['KEYWORDS'][$key] = $words;
1182 $this->lexic_permissions['KEYWORDS'][$key] = true;
1183 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1184 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1185 }
1186
1187 /**
1188 * Removes a keyword group
1189 *
1190 * @param int The key of the keyword group to remove
1191 * @since 1.0.0
1192 */
1193 function remove_keyword_group ($key)
1194 {
1195 unset($this->language_data['KEYWORDS'][$key]);
1196 unset($this->lexic_permissions['KEYWORDS'][$key]);
1197 unset($this->language_data['CASE_SENSITIVE'][$key]);
1198 unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1199 }
1200
1201 /**
1202 * Sets the content of the header block
1203 *
1204 * @param string The content of the header block
1205 * @since 1.0.2
1206 */
1207 function set_header_content ($content)
1208 {
1209 $this->header_content = $content;
1210 }
1211
1212 /**
1213 * Sets the content of the footer block
1214 *
1215 * @param string The content of the footer block
1216 * @since 1.0.2
1217 */
1218 function set_footer_content ($content)
1219 {
1220 $this->footer_content = $content;
1221 }
1222
1223 /**
1224 * Sets the style for the header content
1225 *
1226 * @param string The style for the header content
1227 * @since 1.0.2
1228 */
1229 function set_header_content_style ($style)
1230 {
1231 $this->header_content_style = $style;
1232 }
1233
1234 /**
1235 * Sets the style for the footer content
1236 *
1237 * @param string The style for the footer content
1238 * @since 1.0.2
1239 */
1240 function set_footer_content_style ($style)
1241 {
1242 $this->footer_content_style = $style;
1243 }
1244
1245 /**
1246 * Sets the base URL to be used for keywords
1247 *
1248 * @param int The key of the keyword group to set the URL for
1249 * @param string The URL to set for the group. If {FNAME} is in
1250 * the url somewhere, it is replaced by the keyword
1251 * that the URL is being made for
1252 * @since 1.0.2
1253 */
1254 function set_url_for_keyword_group ($group, $url)
1255 {
1256 $this->language_data['URLS'][$group] = $url;
1257 }
1258
1259 /**
1260 * Sets styles for links in code
1261 *
1262 * @param int A constant that specifies what state the style is being
1263 * set for - e.g. :hover or :visited
1264 * @param string The styles to use for that state
1265 * @since 1.0.2
1266 */
1267 function set_link_styles ($type, $styles)
1268 {
1269 $this->link_styles[$type] = $styles;
1270 }
1271
1272 /**
1273 * Sets the target for links in code
1274 *
1275 * @param string The target for links in the code, e.g. _blank
1276 * @since 1.0.3
1277 */
1278 function set_link_target ( $target )
1279 {
1280 if (!$target) {
1281 $this->link_target = '';
1282 } else {
1283 $this->link_target = ' target="' . $target . '" ';
1284 }
1285 }
1286
1287 /**
1288 * Sets styles for important parts of the code
1289 *
1290 * @param string The styles to use on important parts of the code
1291 * @since 1.0.2
1292 */
1293 function set_important_styles ($styles)
1294 {
1295 $this->important_styles = $styles;
1296 }
1297
1298 /**
1299 * Sets whether context-important blocks are highlighted
1300 *
1301 * @todo REMOVE THIS SHIZ FROM GESHI!
1302 * @deprecated
1303 */
1304 function enable_important_blocks ( $flag )
1305 {
1306 $this->enable_important_blocks = ( $flag ) ? true : false;
1307 }
1308
1309 /**
1310 * Whether CSS IDs should be added to each line
1311 *
1312 * @param boolean If true, IDs will be added to each line.
1313 * @since 1.0.2
1314 */
1315 function enable_ids ($flag = true)
1316 {
1317 $this->add_ids = ($flag) ? true : false;
1318 }
1319
1320 /**
1321 * Specifies which lines to highlight extra
1322 *
1323 * @param mixed An array of line numbers to highlight, or just a line
1324 * number on its own.
1325 * @since 1.0.2
1326 * @todo Some data replication here that could be cut down on
1327 */
1328 function highlight_lines_extra ($lines)
1329 {
1330 if (is_array($lines)) {
1331 foreach ($lines as $line) {
1332 $this->highlight_extra_lines[intval($line)] = intval($line);
1333 }
1334 } else {
1335 $this->highlight_extra_lines[intval($lines)] = intval($lines);
1336 }
1337 }
1338
1339 /**
1340 * Sets the style for extra-highlighted lines
1341 *
1342 * @param string The style for extra-highlighted lines
1343 * @since 1.0.2
1344 */
1345 function set_highlight_lines_extra_style ($styles)
1346 {
1347 $this->highlight_extra_lines_style = $styles;
1348 }
1349
1350 /**
1351 * Sets what number line numbers should start at. Should
1352 * be a positive integer, and will be converted to one.
1353 *
1354 * <b>Warning:</b> Using this method will add the "start"
1355 * attribute to the &lt;ol&gt; that is used for line numbering.
1356 * This is <b>not</b> valid XHTML strict, so if that's what you
1357 * care about then don't use this method. Firefox is getting
1358 * support for the CSS method of doing this in 1.1 and Opera
1359 * has support for the CSS method, but (of course) IE doesn't
1360 * so it's not worth doing it the CSS way yet.
1361 *
1362 * @param int The number to start line numbers at
1363 * @since 1.0.2
1364 */
1365 function start_line_numbers_at ($number)
1366 {
1367 $this->line_numbers_start = abs(intval($number));
1368 }
1369
1370 /**
1371 * Sets the encoding used for htmlspecialchars(), for international
1372 * support.
1373 *
1374 * @param string The encoding to use for the source
1375 * @since 1.0.3
1376 */
1377 function set_encoding ($encoding)
1378 {
1379 if ($encoding) {
1380 $this->encoding = $encoding;
1381 }
1382 }
1383
1384 /**
1385 * Returns the code in $this->source, highlighted and surrounded by the
1386 * nessecary HTML.
1387 *
1388 * This should only be called ONCE, cos it's SLOW! If you want to highlight
1389 * the same source multiple times, you're better off doing a whole lot of
1390 * str_replaces to replace the &lt;span&gt;s
1391 *
1392 * @since 1.0.0
1393 */
1394 function parse_code ()
1395 {
1396 // Start the timer
1397 $start_time = microtime();
1398
1399 // Firstly, if there is an error, we won't highlight
1400 if ($this->error) {
1401 $result = @htmlspecialchars($this->source, ENT_COMPAT, $this->encoding);
1402 // Timing is irrelevant
1403 $this->set_time($start_time, $start_time);
1404 return $this->finalise($result);
1405 }
1406
1407 // Add spaces for regular expression matching and line numbers
1408 $code = ' ' . $this->source . ' ';
1409 // Replace all newlines to a common form.
1410 $code = str_replace("\r\n", "\n", $code);
1411 $code = str_replace("\r", "\n", $code);
1412
1413 // Initialise various stuff
1414 $length = strlen($code);
1415 $STRING_OPEN = '';
1416 $CLOSE_STRING = false;
1417 $ESCAPE_CHAR_OPEN = false;
1418 $COMMENT_MATCHED = false;
1419 // Turn highlighting on if strict mode doesn't apply to this language
1420 $HIGHLIGHTING_ON = ( !$this->strict_mode ) ? true : '';
1421 // Whether to highlight inside a block of code
1422 $HIGHLIGHT_INSIDE_STRICT = false;
1423 $HARDQUOTE_OPEN = false;
1424 $stuff_to_parse = '';
1425 $result = '';
1426
1427 // "Important" selections are handled like multiline comments
1428 // @todo GET RID OF THIS SHIZ
1429 if ($this->enable_important_blocks) {
1430 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
1431 }
1432
1433 if ($this->strict_mode) {
1434 // Break the source into bits. Each bit will be a portion of the code
1435 // within script delimiters - for example, HTML between < and >
1436 $parts = array(0 => array(0 => ''));
1437 $k = 0;
1438 for ($i = 0; $i < $length; $i++) {
1439 $char = substr($code, $i, 1);
1440 if (!$HIGHLIGHTING_ON) {
1441 foreach ($this->language_data['SCRIPT_DELIMITERS'] as $key => $delimiters) {
1442 foreach ($delimiters as $open => $close) {
1443 // Get the next little bit for this opening string
1444 $check = substr($code, $i, strlen($open));
1445 // If it matches...
1446 if ($check == $open) {
1447 // We start a new block with the highlightable
1448 // code in it
1449 $HIGHLIGHTING_ON = $open;
1450 $i += strlen($open) - 1;
1451 $char = $open;
1452 $parts[++$k][0] = $char;
1453
1454 // No point going around again...
1455 break(2);
1456 }
1457 }
1458 }
1459 } else {
1460 foreach ($this->language_data['SCRIPT_DELIMITERS'] as $key => $delimiters) {
1461 foreach ($delimiters as $open => $close) {
1462 if ($open == $HIGHLIGHTING_ON) {
1463 // Found the closing tag
1464 break(2);
1465 }
1466 }
1467 }
1468 // We check code from our current position BACKWARDS. This is so
1469 // the ending string for highlighting can be included in the block
1470 $check = substr($code, $i - strlen($close) + 1, strlen($close));
1471 if ($check == $close) {
1472 $HIGHLIGHTING_ON = '';
1473 // Add the string to the rest of the string for this part
1474 $parts[$k][1] = ( isset($parts[$k][1]) ) ? $parts[$k][1] . $char : $char;
1475 $parts[++$k][0] = '';
1476 $char = '';
1477 }
1478 }
1479 $parts[$k][1] = ( isset($parts[$k][1]) ) ? $parts[$k][1] . $char : $char;
1480 }
1481 $HIGHLIGHTING_ON = '';
1482 } else {
1483 // Not strict mode - simply dump the source into
1484 // the array at index 1 (the first highlightable block)
1485 $parts = array(
1486 1 => array(
1487 0 => '',
1488 1 => $code
1489 )
1490 );
1491 }
1492
1493 // Now we go through each part. We know that even-indexed parts are
1494 // code that shouldn't be highlighted, and odd-indexed parts should
1495 // be highlighted
1496 foreach ($parts as $key => $data) {
1497 $part = $data[1];
1498 // If this block should be highlighted...
1499 if ($key % 2) {
1500 if ($this->strict_mode) {
1501 // Find the class key for this block of code
1502 foreach ($this->language_data['SCRIPT_DELIMITERS'] as $script_key => $script_data) {
1503 foreach ($script_data as $open => $close) {
1504 if ($data[0] == $open) {
1505 break(2);
1506 }
1507 }
1508 }
1509
1510 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
1511 $this->lexic_permissions['SCRIPT']) {
1512 // Add a span element around the source to
1513 // highlight the overall source block
1514 if (!$this->use_classes &&
1515 $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
1516 $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
1517 } else {
1518 $attributes = ' class="sc' . $script_key . '"';
1519 }
1520 $result .= "<span$attributes>";
1521 }
1522 }
1523
1524 if (!$this->strict_mode || $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key]) {
1525 // Now, highlight the code in this block. This code
1526 // is really the engine of GeSHi (along with the method
1527 // parse_non_string_part).
1528 $length = strlen($part);
1529 for ($i = 0; $i < $length; $i++) {
1530 // Get the next char
1531 $char = substr($part, $i, 1);
1532 $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
1533 // Is this char the newline and line numbers being used?
1534 if (($this->line_numbers != GESHI_NO_LINE_NUMBERS
1535 || count($this->highlight_extra_lines) > 0)
1536 && $char == "\n") {
1537 // If so, is there a string open? If there is, we should end it before
1538 // the newline and begin it again (so when <li>s are put in the source
1539 // remains XHTML compliant)
1540 // note to self: This opens up possibility of config files specifying
1541 // that languages can/cannot have multiline strings???
1542 if ($STRING_OPEN) {
1543 if (!$this->use_classes) {
1544 $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
1545 } else {
1546 $attributes = ' class="st0"';
1547 }
1548 $char = '</span>' . $char . "<span$attributes>";
1549 }
1550 } elseif ($char == $STRING_OPEN) {
1551 // A match of a string delimiter
1552 if (($this->lexic_permissions['ESCAPE_CHAR'] && $ESCAPE_CHAR_OPEN) ||
1553 ($this->lexic_permissions['STRINGS'] && !$ESCAPE_CHAR_OPEN)) {
1554 $char .= '</span>';
1555 }
1556 $escape_me = false;
1557 if ($HARDQUOTE_OPEN)
1558 {
1559 if ($ESCAPE_CHAR_OPEN)
1560 $escape_me = true;
1561 else {
1562 foreach ($this->language_data['HARDESCAPE'] as $hardesc)
1563 if (substr($part, $i, strlen($hardesc)) == $hardesc)
1564 {
1565 $escape_me = true;
1566 break;
1567 }
1568 }
1569 }
1570 if (!$ESCAPE_CHAR_OPEN) {
1571 $STRING_OPEN = '';
1572 $CLOSE_STRING = true;
1573 }
1574 if (!$escape_me) {
1575 $HARDQUOTE_OPEN = false;
1576 }
1577 $ESCAPE_CHAR_OPEN = false;
1578 } elseif (in_array($char, $this->language_data['QUOTEMARKS']) &&
1579 ($STRING_OPEN == '') && $this->lexic_permissions['STRINGS']) {
1580 // The start of a new string
1581 $STRING_OPEN = $char;
1582 if (!$this->use_classes) {
1583 $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
1584 } else {
1585 $attributes = ' class="st0"';
1586 }
1587 $char = "<span$attributes>" . $char;
1588
1589 $result .= $this->parse_non_string_part( $stuff_to_parse );
1590 $stuff_to_parse = '';
1591 } elseif (
1592 $hq &&
1593 substr($part, $i, strlen($hq)) == $hq &&
1594 ($STRING_OPEN == '') && $this->lexic_permissions['STRINGS']
1595 )
1596 {
1597 // The start of a hard quoted string
1598 $STRING_OPEN = $this->language_data['HARDQUOTE'][1];
1599 if (!$this->use_classes) {
1600 $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
1601 } else {
1602 $attributes = ' class="st0"';
1603 }
1604 $char = "<span$attributes>" . $hq;
1605 $i += strlen($hq) - 1;
1606 $HARDQUOTE_OPEN = true;
1607 $result .= $this->parse_non_string_part( $stuff_to_parse );
1608 $stuff_to_parse = '';
1609 } elseif ($char == $this->language_data['ESCAPE_CHAR'] && $STRING_OPEN != '')
1610 {
1611 // An escape character
1612 if (!$ESCAPE_CHAR_OPEN) {
1613 $ESCAPE_CHAR_OPEN = !$HARDQUOTE_OPEN; // true unless $HARDQUOTE_OPEN
1614 if ($HARDQUOTE_OPEN)
1615 foreach ($this->language_data['HARDESCAPE'] as $hard)
1616 {
1617 if (substr($part, $i, strlen($hard)) == $hard)
1618 {
1619 $ESCAPE_CHAR_OPEN = true;
1620 break;
1621 }
1622 }
1623 if ($ESCAPE_CHAR_OPEN && $this->lexic_permissions['ESCAPE_CHAR']) {
1624 if (!$this->use_classes) {
1625 $attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
1626 } else {
1627 $attributes = ' class="es0"';
1628 }
1629 $char = "<span$attributes>" . $char;
1630 if (substr($code, $i + 1, 1) == "\n") {
1631 // escaping a newline, what's the point in putting the span around
1632 // the newline? It only causes hassles when inserting line numbers
1633 $char .= '</span>';
1634 $ESCAPE_CHAR_OPEN = false;
1635 }
1636 }
1637 } else {
1638 $ESCAPE_CHAR_OPEN = false;
1639 if ($this->lexic_permissions['ESCAPE_CHAR']) {
1640 $char .= '</span>';
1641 }
1642 }
1643 } elseif ($ESCAPE_CHAR_OPEN) {
1644 if ($this->lexic_permissions['ESCAPE_CHAR']) {
1645 $char .= '</span>';
1646 }
1647 $ESCAPE_CHAR_OPEN = false;
1648 $test_str = $char;
1649 } elseif ($STRING_OPEN == '') {
1650 // Is this a multiline comment?
1651 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
1652 $com_len = strlen($open);
1653 $test_str = substr( $part, $i, $com_len );
1654 $test_str_match = $test_str;
1655 if ($open == $test_str) {
1656 $COMMENT_MATCHED = true;
1657 //@todo If remove important do remove here
1658 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
1659 $test_str == GESHI_START_IMPORTANT) {
1660 if ($test_str != GESHI_START_IMPORTANT) {
1661 if (!$this->use_classes) {
1662 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
1663 } else {
1664 $attributes = ' class="coMULTI"';
1665 }
1666 $test_str = "<span$attributes>" . @htmlspecialchars($test_str, ENT_COMPAT, $this->encoding);
1667 } else {
1668 if (!$this->use_classes) {
1669 $attributes = ' style="' . $this->important_styles . '"';
1670 } else {
1671 $attributes = ' class="imp"';
1672 }
1673 // We don't include the start of the comment if it's an
1674 // "important" part
1675 $test_str = "<span$attributes>";
1676 }
1677 } else {
1678 $test_str = @htmlspecialchars($test_str, ENT_COMPAT, $this->encoding);
1679 }
1680
1681 $close_pos = strpos( $part, $close, $i + strlen($close) );
1682
1683 if ($close_pos === false) {
1684 $close_pos = strlen($part);
1685 }
1686
1687 // Short-cut through all the multiline code
1688 $rest_of_comment = @htmlspecialchars(substr($part, $i + $com_len, $close_pos - $i), ENT_COMPAT, $this->encoding);
1689 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
1690 $test_str_match == GESHI_START_IMPORTANT) &&
1691 ($this->line_numbers != GESHI_NO_LINE_NUMBERS ||
1692 count($this->highlight_extra_lines) > 0)) {
1693 // strreplace to put close span and open span around multiline newlines
1694 $test_str .= str_replace("\n", "</span>\n<span$attributes>", $rest_of_comment);
1695 } else {
1696 $test_str .= $rest_of_comment;
1697 }
1698
1699 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
1700 $test_str_match == GESHI_START_IMPORTANT) {
1701 $test_str .= '</span>';
1702 }
1703 $i = $close_pos + $com_len - 1;
1704 // parse the rest
1705 $result .= $this->parse_non_string_part($stuff_to_parse);
1706 $stuff_to_parse = '';
1707 break;
1708 }
1709 }
1710 // If we haven't matched a multiline comment, try single-line comments
1711 if (!$COMMENT_MATCHED) {
1712 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
1713 $com_len = strlen($comment_mark);
1714 $test_str = substr($part, $i, $com_len);
1715 if ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS]) {
1716 $match = ($comment_mark == $test_str);
1717 } else {
1718 $match = (strtolower($comment_mark) == strtolower($test_str));
1719 }
1720 if ($match) {
1721 $COMMENT_MATCHED = true;
1722 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
1723 if (!$this->use_classes) {
1724 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
1725 } else {
1726 $attributes = ' class="co' . $comment_key . '"';
1727 }
1728 $test_str = "<span$attributes>" . @htmlspecialchars($this->change_case($test_str), ENT_COMPAT, $this->encoding);
1729 } else {
1730 $test_str = @htmlspecialchars($test_str, ENT_COMPAT, $this->encoding);
1731 }
1732 $close_pos = strpos($part, "\n", $i);
1733 $oops = false;
1734 if ($close_pos === false) {
1735 $close_pos = strlen($part);
1736 $oops = true;
1737 }
1738 $test_str .= @htmlspecialchars(substr($part, $i + $com_len, $close_pos - $i - $com_len), ENT_COMPAT, $this->encoding);
1739 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
1740 $test_str .= "</span>";
1741 }
1742 // Take into account that the comment might be the last in the source
1743 if (!$oops) {
1744 $test_str .= "\n";
1745 }
1746 $i = $close_pos;
1747 // parse the rest
1748 $result .= $this->parse_non_string_part($stuff_to_parse);
1749 $stuff_to_parse = '';
1750 break;
1751 }
1752 }
1753 }
1754 } elseif ($STRING_OPEN != '') {
1755 // Otherwise, convert it to HTML form
1756 if (strtolower($this->encoding) == 'utf-8') {
1757 //only escape <128 (we don't want to break multibyte chars)
1758 if (ord($char) < 128) {
1759 $char = @htmlspecialchars($char, ENT_COMPAT, $this->encoding);
1760 }
1761 } else {
1762 //encode everthing
1763 $char = @htmlspecialchars($char, ENT_COMPAT, $this->encoding);
1764 }
1765 }
1766 // Where are we adding this char?
1767 if (!$COMMENT_MATCHED) {
1768 if (($STRING_OPEN == '') && !$CLOSE_STRING) {
1769 $stuff_to_parse .= $char;
1770 } else {
1771 $result .= $char;
1772 $CLOSE_STRING = false;
1773 }
1774 } else {
1775 $result .= $test_str;
1776 $COMMENT_MATCHED = false;
1777 }
1778 }
1779 // Parse the last bit
1780 $result .= $this->parse_non_string_part($stuff_to_parse);
1781 $stuff_to_parse = '';
1782 } else {
1783 $result .= @htmlspecialchars($part, ENT_COMPAT, $this->encoding);
1784 }
1785 // Close the <span> that surrounds the block
1786 if ($this->strict_mode && $this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
1787 $this->lexic_permissions['SCRIPT']) {
1788 $result .= '</span>';
1789 }
1790 } else {
1791 // Else not a block to highlight
1792 $result .= @htmlspecialchars($part, ENT_COMPAT, $this->encoding);
1793 }
1794 }
1795
1796 // Parse the last stuff (redundant?)
1797 $result .= $this->parse_non_string_part($stuff_to_parse);
1798
1799 // Lop off the very first and last spaces
1800 $result = substr($result, 1, strlen($result) - 1);
1801
1802 // Are we still in a string?
1803 if ($STRING_OPEN) {
1804 $result .= '</span>';
1805 }
1806
1807 // We're finished: stop timing
1808 $this->set_time($start_time, microtime());
1809
1810 return $this->finalise($result);
1811 }
1812
1813 /**
1814 * Swaps out spaces and tabs for HTML indentation. Not needed if
1815 * the code is in a pre block...
1816 *
1817 * @param string The source to indent
1818 * @return string The source with HTML indenting applied
1819 * @since 1.0.0
1820 * @access private
1821 */
1822 function indent ($result)
1823 {
1824 /// Replace tabs with the correct number of spaces
1825 if (false !== strpos($result, "\t")) {
1826 $lines = explode("\n", $result);
1827 foreach ($lines as $key => $line) {
1828 if (false === strpos($line, "\t")) {
1829 $lines[$key] = $line;
1830 continue;
1831 }//echo 'checking line ' . $key . '<br />';
1832
1833 $pos = 0;
1834 $tab_width = $this->tab_width;
1835 $length = strlen($line);
1836 $result_line = '';
1837
1838 //echo '<pre>line: ' . htmlspecialchars($line) . '</pre>';
1839 $IN_TAG = false;
1840 for ($i = 0; $i < $length; $i++) {
1841 $char = substr($line, $i, 1);
1842 // Simple engine to work out whether we're in a tag.
1843 // If we are we modify $pos. This is so we ignore HTML
1844 // in the line and only workout the tab replacement
1845 // via the actual content of the string
1846 // This test could be improved to include strings in the
1847 // html so that < or > would be allowed in user's styles
1848 // (e.g. quotes: '<' '>'; or similar)
1849 if ($IN_TAG && '>' == $char) {
1850 $IN_TAG = false;
1851 $result_line .= '>';
1852 ++$pos;
1853 } elseif (!$IN_TAG && '<' == $char) {
1854 $IN_TAG = true;
1855 $result_line .= '<';
1856 ++$pos;
1857 } elseif (!$IN_TAG && '&' == $char) {
1858 //echo "matched &amp; in line... ";
1859 $substr = substr($line, $i + 3, 4);
1860 //$substr_5 = substr($line, 5, 1);
1861 $posi = strpos($substr, ';');
1862 if (false !== $posi) {
1863 //echo "found entity at $posi\n";
1864 $pos += $posi + 3;
1865 }
1866 $result_line .= '&';
1867 } elseif (!$IN_TAG && "\t" == $char) {
1868 $str = '';
1869 // OPTIMISE - move $strs out. Make an array:
1870 // $tabs = array(
1871 // 1 => '&nbsp;',
1872 // 2 => '&nbsp; ',
1873 // 3 => '&nbsp; &nbsp;' etc etc
1874 // to use instead of building a string every time
1875 $strs = array(0 => '&nbsp;', 1 => ' ');
1876 //echo "building (pos=$pos i=$i) (" . ($i - $pos) . ") " . ($tab_width - (($i - $pos) % $tab_width)) . " spaces\n";
1877 for ($k = 0; $k < ($tab_width - (($i - $pos) % $tab_width)); $k++) $str .= $strs[$k % 2];
1878 $result_line .= $str;
1879 //$pos--;
1880 $pos++;
1881 //$pos -= $tab_width-1;
1882
1883 if (false === strpos($line, "\t", $i + 1)) {
1884 //$lines[$key] = $result_line;
1885 //echo 'got here';
1886 $result_line .= substr($line, $i + 1);
1887 break;
1888 }
1889 } elseif ( $IN_TAG ) {
1890 ++$pos;
1891 $result_line .= $char;
1892 } else {
1893 $result_line .= $char;
1894 //++$pos;
1895 }
1896 }
1897 $lines[$key] = $result_line;
1898 }
1899 $result = implode("\n", $lines);
1900 }
1901 // Other whitespace
1902 $result = str_replace(' ', '&nbsp; ', $result);
1903 $result = str_replace(' ', ' &nbsp;', $result);
1904 $result = str_replace("\n ", "\n&nbsp;", $result);
1905
1906 if ($this->line_numbers == GESHI_NO_LINE_NUMBERS) {
1907 $result = nl2br($result);
1908 }
1909 return $result;
1910 }
1911
1912 /**
1913 * Changes the case of a keyword for those languages where a change is asked for
1914 *
1915 * @param string The keyword to change the case of
1916 * @return string The keyword with its case changed
1917 * @since 1.0.0
1918 * @access private
1919 */
1920 function change_case ($instr)
1921 {
1922 if ($this->language_data['CASE_KEYWORDS'] == GESHI_CAPS_UPPER) {
1923 return strtoupper($instr);
1924 } elseif ($this->language_data['CASE_KEYWORDS'] == GESHI_CAPS_LOWER) {
1925 return strtolower($instr);
1926 }
1927 return $instr;
1928 }
1929
1930 /**
1931 * Adds a url to a keyword where needed.
1932 *
1933 * @param string The keyword to add the URL HTML to
1934 * @param int What group the keyword is from
1935 * @param boolean Whether to get the HTML for the start or end
1936 * @return The HTML for either the start or end of the HTML &lt;a&gt; tag
1937 * @since 1.0.2
1938 * @access private
1939 * @todo Get rid of ender
1940 */
1941 function add_url_to_keyword ($keyword, $group, $start_or_end)
1942 {
1943 if (isset($this->language_data['URLS'][$group]) &&
1944 $this->language_data['URLS'][$group] != '' &&
1945 substr($keyword, 0, 5) != '&lt;/') {
1946 // There is a base group for this keyword
1947 if ($start_or_end == 'BEGIN') {
1948 // HTML workaround... not good form (tm) but should work for 1.0.X
1949 if ($keyword != '') {
1950 // Old system: strtolower
1951 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
1952 // New system: get keyword from language file to get correct case
1953 foreach ($this->language_data['KEYWORDS'][$group] as $word) {
1954 if (strtolower($word) == strtolower($keyword)) {
1955 break;
1956 }
1957 }
1958 $word = ( substr($word, 0, 4) == '&lt;' ) ? substr($word, 4) : $word;
1959 $word = ( substr($word, -4) == '&gt;' ) ? substr($word, 0, strlen($word) - 4) : $word;
1960 if (!$word) return '';
1961
1962 return '<|UR1|"' .
1963 str_replace(
1964 array('{FNAME}', '.'),
1965 array(@htmlspecialchars($word, ENT_COMPAT, $this->encoding), '<DOT>'),
1966 $this->language_data['URLS'][$group]
1967 ) . '">';
1968 }
1969 return '';
1970 // HTML fix. Again, dirty hackage...
1971 } elseif (!($this->language == 'html4strict' && '&gt;' == $keyword)) {
1972 return '</a>';
1973 }
1974 }
1975 }
1976
1977 /**
1978 * Takes a string that has no strings or comments in it, and highlights
1979 * stuff like keywords, numbers and methods.
1980 *
1981 * @param string The string to parse for keyword, numbers etc.
1982 * @since 1.0.0
1983 * @access private
1984 * @todo BUGGY! Why? Why not build string and return?
1985 */
1986 function parse_non_string_part (&$stuff_to_parse)
1987 {
1988 $stuff_to_parse = ' ' . quotemeta(@htmlspecialchars($stuff_to_parse, ENT_COMPAT, $this->encoding));
1989 // These vars will disappear in the future
1990 $func = '$this->change_case';
1991 $func2 = '$this->add_url_to_keyword';
1992
1993 //
1994 // Regular expressions
1995 //
1996 foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
1997 if ($this->lexic_permissions['REGEXPS'][$key]) {
1998 if (is_array($regexp)) {
1999 $stuff_to_parse = preg_replace(
2000 "#" .
2001 $regexp[GESHI_SEARCH] .
2002 "#{$regexp[GESHI_MODIFIERS]}",
2003 "{$regexp[GESHI_BEFORE]}<|!REG3XP$key!>{$regexp[GESHI_REPLACE]}|>{$regexp[GESHI_AFTER]}",
2004 $stuff_to_parse
2005 );
2006 } else {
2007 $stuff_to_parse = preg_replace( "#(" . $regexp . ")#", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
2008 }
2009 }
2010 }
2011
2012 //
2013 // Highlight numbers. This regexp sucks... anyone with a regexp that WORKS
2014 // here wins a cookie if they send it to me. At the moment there's two doing
2015 // almost exactly the same thing, except the second one prevents a number
2016 // being highlighted twice (eg <span...><span...>5</span></span>)
2017 // Put /NUM!/ in for the styles, which gets replaced at the end.
2018 //
2019 if ($this->lexic_permissions['NUMBERS'] && preg_match('#[0-9]#', $stuff_to_parse )) {
2020 $stuff_to_parse = preg_replace('#([^a-zA-Z0-9_\#])([0-9]+)([^a-zA-Z0-9])#', "\\1<|/NUM!/>\\2|>\\3", $stuff_to_parse);
2021 $stuff_to_parse = preg_replace('#([^a-zA-Z0-9_\#>])([0-9]+)([^a-zA-Z0-9])#', "\\1<|/NUM!/>\\2|>\\3", $stuff_to_parse);
2022 }
2023
2024 // Highlight keywords
2025 // if there is a couple of alpha symbols there *might* be a keyword
2026 if (preg_match('#[a-zA-Z]{2,}#', $stuff_to_parse)) {
2027 foreach ($this->language_data['KEYWORDS'] as $k => $keywordset) {
2028 if ($this->lexic_permissions['KEYWORDS'][$k]) {
2029 foreach ($keywordset as $keyword) {
2030 $keyword = quotemeta($keyword);
2031 //
2032 // This replacement checks the word is on it's own (except if brackets etc
2033 // are next to it), then highlights it. We don't put the color=" for the span
2034 // in just yet - otherwise languages with the keywords "color" or "or" have
2035 // a fit.
2036 //
2037 if (false !== stristr($stuff_to_parse, $keyword )) {
2038 $stuff_to_parse .= ' ';
2039 // Might make a more unique string for putting the number in soon
2040 // Basically, we don't put the styles in yet because then the styles themselves will
2041 // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
2042 $styles = "/$k/";
2043 $keyword = quotemeta($keyword);
2044 if ($this->language_data['CASE_SENSITIVE'][$k]) {
2045 $stuff_to_parse = preg_replace(
2046 "#([^a-zA-Z0-9\$_\|\#;>])($keyword)(?=[^a-zA-Z0-9_<\|%\-&])#e",
2047 "'\\1' . $func2('\\2', '$k', 'BEGIN') . '<|$styles>' . $func('\\2') . '|>' . $func2('\\2', '$k', 'END')",
2048 $stuff_to_parse
2049 );
2050 } else {
2051 // Change the case of the word.
2052 $stuff_to_parse = preg_replace(
2053 "#([^a-zA-Z0-9\$_\|\#;>])($keyword)(?=[^a-zA-Z0-9_<\|%\-&])#ie",
2054 "'\\1' . $func2('\\2', '$k', 'BEGIN') . '<|$styles>' . $func('\\2') . '|>' . $func2('\\2', '$k', 'END')",
2055 $stuff_to_parse
2056 );
2057 }
2058 $stuff_to_parse = substr($stuff_to_parse, 0, strlen($stuff_to_parse) - 1);
2059 }
2060 }
2061 }
2062 }
2063 }
2064
2065 //
2066 // Now that's all done, replace /[number]/ with the correct styles
2067 //
2068 foreach ($this->language_data['KEYWORDS'] as $k => $kws) {
2069 if (!$this->use_classes) {
2070 $attributes = ' style="' . $this->language_data['STYLES']['KEYWORDS'][$k] . '"';
2071 } else {
2072 $attributes = ' class="kw' . $k . '"';
2073 }
2074 $stuff_to_parse = str_replace("/$k/", $attributes, $stuff_to_parse);
2075 }
2076
2077 // Put number styles in
2078 if (!$this->use_classes && $this->lexic_permissions['NUMBERS']) {
2079 $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][0] . '"';
2080 } else {
2081 $attributes = ' class="nu0"';
2082 }
2083 $stuff_to_parse = str_replace('/NUM!/', $attributes, $stuff_to_parse);
2084
2085 //
2086 // Highlight methods and fields in objects
2087 //
2088 if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
2089 foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
2090 if (false !== stristr($stuff_to_parse, $splitter)) {
2091 if (!$this->use_classes) {
2092 $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
2093 } else {
2094 $attributes = ' class="me' . $key . '"';
2095 }
2096 $stuff_to_parse = preg_replace("#(" . quotemeta($this->language_data['OBJECT_SPLITTERS'][$key]) . "[\s]*)([a-zA-Z\*\(][a-zA-Z0-9_\*]*)#", "\\1<|$attributes>\\2|>", $stuff_to_parse);
2097 }
2098 }
2099 }
2100
2101 //
2102 // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
2103 // You try it, and see what happens ;)
2104 // TODO: Fix lexic permissions not converting entities if shouldn't
2105 // be highlighting regardless
2106 //
2107 if ($this->lexic_permissions['BRACKETS']) {
2108 $code_entities_match = array('[', ']', '(', ')', '{', '}');
2109 if (!$this->use_classes) {
2110 $code_entities_replace = array(
2111 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
2112 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
2113 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
2114 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
2115 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
2116 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
2117 );
2118 } else {
2119 $code_entities_replace = array(
2120 '<| class="br0">&#91;|>',
2121 '<| class="br0">&#93;|>',
2122 '<| class="br0">&#40;|>',
2123 '<| class="br0">&#41;|>',
2124 '<| class="br0">&#123;|>',
2125 '<| class="br0">&#125;|>',
2126 );
2127 }
2128 $stuff_to_parse = str_replace( $code_entities_match, $code_entities_replace, $stuff_to_parse );
2129 }
2130
2131 //
2132 // Add class/style for regexps
2133 //
2134 foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
2135 if ($this->lexic_permissions['REGEXPS'][$key]) {
2136 if (!$this->use_classes) {
2137 $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
2138 } else {
2139 $attributes = ' class="re' . $key . '"';
2140 }
2141 $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
2142 }
2143 }
2144
2145 // Replace <DOT> with . for urls
2146 $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
2147 // Replace <|UR1| with <a href= for urls also
2148 if (isset($this->link_styles[GESHI_LINK])) {
2149 if ($this->use_classes) {
2150 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
2151 } else {
2152 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
2153 }
2154 } else {
2155 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
2156 }
2157
2158 //
2159 // NOW we add the span thingy ;)
2160 //
2161
2162 $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
2163 $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
2164
2165 return substr(stripslashes($stuff_to_parse), 1);
2166 }
2167
2168 /**
2169 * Sets the time taken to parse the code
2170 *
2171 * @param microtime The time when parsing started
2172 * @param microtime The time when parsing ended
2173 * @since 1.0.2
2174 * @access private
2175 */
2176 function set_time ($start_time, $end_time)
2177 {
2178 $start = explode(' ', $start_time);
2179 $end = explode(' ', $end_time);
2180 $this->time = $end[0] + $end[1] - $start[0] - $start[1];
2181 }
2182
2183 /**
2184 * Gets the time taken to parse the code
2185 *
2186 * @return double The time taken to parse the code
2187 * @since 1.0.2
2188 */
2189 function get_time ()
2190 {
2191 return $this->time;
2192 }
2193
2194 /**
2195 * Gets language information and stores it for later use
2196 *
2197 * @access private
2198 * @todo Needs to load keys for lexic permissions for keywords, regexps etc
2199 */
2200 function load_language ($file_name)
2201 {
2202 $language_data = array();
2203 require $file_name;
2204 // Perhaps some checking might be added here later to check that
2205 // $language data is a valid thing but maybe not
2206 $this->language_data = $language_data;
2207 // Set strict mode if should be set
2208 if ($this->language_data['STRICT_MODE_APPLIES'] == GESHI_ALWAYS) {
2209 $this->strict_mode = true;
2210 }
2211 // Set permissions for all lexics to true
2212 // so they'll be highlighted by default
2213 foreach ($this->language_data['KEYWORDS'] as $key => $words) {
2214 $this->lexic_permissions['KEYWORDS'][$key] = true;
2215 }
2216 foreach ($this->language_data['COMMENT_SINGLE'] as $key => $comment) {
2217 $this->lexic_permissions['COMMENTS'][$key] = true;
2218 }
2219 foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
2220 $this->lexic_permissions['REGEXPS'][$key] = true;
2221 }
2222 $this->enable_highlighting();
2223 // Set default class for CSS
2224 $this->overall_class = $this->language;
2225 }
2226
2227 /**
2228 * Takes the parsed code and various options, and creates the HTML
2229 * surrounding it to make it look nice.
2230 *
2231 * @param string The code already parsed
2232 * @return string The code nicely finalised
2233 * @since 1.0.0
2234 * @access private
2235 */
2236 function finalise ($parsed_code)
2237 {
2238 // Remove end parts of important declarations
2239 // This is BUGGY!! My fault for bad code: fix coming in 1.2
2240 // @todo Remove this crap
2241 if ($this->enable_important_blocks &&
2242 (strstr($parsed_code, @htmlspecialchars(GESHI_START_IMPORTANT, ENT_COMPAT, $this->encoding)) === false)) {
2243 $parsed_code = str_replace(@htmlspecialchars(GESHI_END_IMPORTANT, ENT_COMPAT, $this->encoding), '', $parsed_code);
2244 }
2245
2246 // Add HTML whitespace stuff if we're using the <div> header
2247 if ($this->header_type != GESHI_HEADER_PRE) {
2248 $parsed_code = $this->indent($parsed_code);
2249 }
2250
2251 // If we're using line numbers, we insert <li>s and appropriate
2252 // markup to style them (otherwise we don't need to do anything)
2253 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
2254 // If we're using the <pre> header, we shouldn't add newlines because
2255 // the <pre> will line-break them (and the <li>s already do this for us)
2256 $ls = ($this->header_type != GESHI_HEADER_PRE) ? "\n" : '';
2257 // Get code into lines
2258 $code = explode("\n", $parsed_code);
2259 // Set vars to defaults for following loop
2260 $parsed_code = '';
2261 $i = 0;
2262 // Foreach line...
2263 foreach ($code as $line) {
2264 $line = ( $line ) ? $line : '&nbsp;';
2265 // If this is a "special line"...
2266 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
2267 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
2268 // Set the attributes to style the line
2269 if ($this->use_classes) {
2270 $attr = ' class="li2"';
2271 $def_attr = ' class="de2"';
2272 } else {
2273 $attr = ' style="' . $this->line_style2 . '"';
2274 // This style "covers up" the special styles set for special lines
2275 // so that styles applied to special lines don't apply to the actual
2276 // code on that line
2277 $def_attr = ' style="' . $this->code_style . '"';
2278 }
2279 // Span or div?
2280 $start = "<div$def_attr>";
2281 $end = '</div>';
2282 } else {
2283 if ($this->use_classes) {
2284 $attr = ' class="li1"';
2285 $def_attr = ' class="de1"';
2286 } else {
2287 $attr = ' style="' . $this->line_style1 . '"';
2288 $def_attr = ' style="' . $this->code_style . '"';
2289 }
2290 $start = "<div$def_attr>";
2291 $end = '</div>';
2292 }
2293
2294 ++$i;
2295 // Are we supposed to use ids? If so, add them
2296 if ($this->add_ids) {
2297 $attr .= " id=\"{$this->overall_id}-{$i}\"";
2298 }
2299 if ($this->use_classes && in_array($i, $this->highlight_extra_lines)) {
2300 $attr .= " class=\"ln-xtra\"";
2301 }
2302 if (!$this->use_classes && in_array($i, $this->highlight_extra_lines)) {
2303 $attr .= " style=\"{$this->highlight_extra_lines_style}\"";
2304 }
2305
2306 // Add in the line surrounded by appropriate list HTML
2307 $parsed_code .= "<li$attr>$start$line$end</li>$ls";
2308 }
2309 } else {
2310 // No line numbers, but still need to handle highlighting lines extra.
2311 // Have to use divs so the full width of the code is highlighted
2312 $code = explode("\n", $parsed_code);
2313 $parsed_code = '';
2314 $i = 0;
2315 foreach ($code as $line)
2316 {
2317 // Make lines have at least one space in them if they're empty
2318 $line = ($line) ? $line : '&nbsp;';
2319 if (in_array(++$i, $this->highlight_extra_lines)) {
2320 if ($this->use_classes) {
2321 $parsed_code .= '<div class="ln-xtra">';
2322 } else {
2323 $parsed_code .= "<div style=\"{$this->highlight_extra_lines_style}\">";
2324 }
2325 $parsed_code .= $line . "</div>\n";
2326 } else {
2327 $parsed_code .= $line . "\n";
2328 }
2329 }
2330 }
2331
2332 // purge some unnecessary stuff
2333 $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
2334 $parsed_code = preg_replace('#<div[^>]+>(\s*)</div>#', '\\1', $parsed_code);
2335
2336 if ($this->header_type == GESHI_HEADER_PRE) {
2337 // enforce line numbers when using pre
2338 $parsed_code = str_replace('<li></li>', '<li>&nbsp;</li>', $parsed_code);
2339 }
2340
2341 return $this->header() . chop($parsed_code) . $this->footer();
2342 }
2343
2344 /**
2345 * Creates the header for the code block (with correct attributes)
2346 *
2347 * @return string The header for the code block
2348 * @since 1.0.0
2349 * @access private
2350 */
2351 function header ()
2352 {
2353 // Get attributes needed
2354 $attributes = $this->get_attributes();
2355
2356 $ol_attributes = '';
2357
2358 if ($this->line_numbers_start != 1) {
2359 $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
2360 }
2361
2362 // Get the header HTML
2363 $header = $this->format_header_content();
2364
2365 if (GESHI_HEADER_NONE == $this->header_type) {
2366 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
2367 return "$header<ol$ol_attributes>";
2368 }
2369 return $header;
2370 }
2371
2372 // Work out what to return and do it
2373 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
2374 if ($this->header_type == GESHI_HEADER_PRE) {
2375 return "<pre$attributes>$header<ol$ol_attributes>";
2376 } elseif ($this->header_type == GESHI_HEADER_DIV) {
2377 return "<div$attributes>$header<ol$ol_attributes>";
2378 }
2379 } else {
2380 if ($this->header_type == GESHI_HEADER_PRE) {
2381 return "<pre$attributes>$header";
2382 } elseif ($this->header_type == GESHI_HEADER_DIV) {
2383 return "<div$attributes>$header";
2384 }
2385 }
2386 }
2387
2388 /**
2389 * Returns the header content, formatted for output
2390 *
2391 * @return string The header content, formatted for output
2392 * @since 1.0.2
2393 * @access private
2394 */
2395 function format_header_content ()
2396 {
2397 $header = $this->header_content;
2398 if ($header) {
2399 if ($this->header_type == GESHI_HEADER_PRE) {
2400 $header = str_replace("\n", '', $header);
2401 }
2402 $header = $this->replace_keywords($header);
2403
2404 if ($this->use_classes) {
2405 $attr = ' class="head"';
2406 } else {
2407 $attr = " style=\"{$this->header_content_style}\"";
2408 }
2409 return "<div$attr>$header</div>";
2410 }
2411 }
2412
2413 /**
2414 * Returns the footer for the code block.
2415 *
2416 * @return string The footer for the code block
2417 * @since 1.0.0
2418 * @access private
2419 */
2420 function footer ()
2421 {
2422 $footer_content = $this->format_footer_content();
2423
2424 if (GESHI_HEADER_NONE == $this->header_type) {
2425 return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer_content
2426 : $footer_content;
2427 }
2428
2429 if ($this->header_type == GESHI_HEADER_DIV) {
2430 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
2431 return "</ol>$footer_content</div>";
2432 }
2433 return "$footer_content</div>";
2434 } else {
2435 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
2436 return "</ol>$footer_content</pre>";
2437 }
2438 return "$footer_content</pre>";
2439 }
2440 }
2441
2442 /**
2443 * Returns the footer content, formatted for output
2444 *
2445 * @return string The footer content, formatted for output
2446 * @since 1.0.2
2447 * @access private
2448 */
2449 function format_footer_content ()
2450 {
2451 $footer = $this->footer_content;
2452 if ($footer) {
2453 if ($this->header_type == GESHI_HEADER_PRE) {
2454 $footer = str_replace("\n", '', $footer);;
2455 }
2456 $footer = $this->replace_keywords($footer);
2457
2458 if ($this->use_classes) {
2459 $attr = ' class="foot"';
2460 } else {
2461 $attr = " style=\"{$this->footer_content_style}\"";
2462 }
2463 return "<div$attr>$footer</div>";
2464 }
2465 }
2466
2467 /**
2468 * Replaces certain keywords in the header and footer with
2469 * certain configuration values
2470 *
2471 * @param string The header or footer content to do replacement on
2472 * @return string The header or footer with replaced keywords
2473 * @since 1.0.2
2474 * @access private
2475 */
2476 function replace_keywords ($instr)
2477 {
2478 $keywords = $replacements = array();
2479
2480 $keywords[] = '<TIME>';
2481 $replacements[] = number_format($this->get_time(), 3);
2482
2483 $keywords[] = '<LANGUAGE>';
2484 $replacements[] = $this->language;
2485
2486 $keywords[] = '<VERSION>';
2487 $replacements[] = GESHI_VERSION;
2488
2489 return str_replace($keywords, $replacements, $instr);
2490 }
2491
2492 /**
2493 * Gets the CSS attributes for this code
2494 *
2495 * @return The CSS attributes for this code
2496 * @since 1.0.0
2497 * @access private
2498 * @todo Document behaviour change - class is outputted regardless of whether we're using classes or not.
2499 * Same with style
2500 */
2501 function get_attributes ()
2502 {
2503 $attributes = '';
2504
2505 if ($this->overall_class != '') {
2506 $attributes .= " class=\"{$this->overall_class}\"";
2507 }
2508 if ($this->overall_id != '') {
2509 $attributes .= " id=\"{$this->overall_id}\"";
2510 }
2511 if ($this->overall_style != '') {
2512 $attributes .= ' style="' . $this->overall_style . '"';
2513 }
2514 return $attributes;
2515 }
2516
2517 /**
2518 * Returns a stylesheet for the highlighted code. If $economy mode
2519 * is true, we only return the stylesheet declarations that matter for
2520 * this code block instead of the whole thing
2521 *
2522 * @param boolean Whether to use economy mode or not
2523 * @return string A stylesheet built on the data for the current language
2524 * @since 1.0.0
2525 */
2526 function get_stylesheet ($economy_mode = true)
2527 {
2528 // If there's an error, chances are that the language file
2529 // won't have populated the language data file, so we can't
2530 // risk getting a stylesheet...
2531 if ($this->error) {
2532 return '';
2533 }
2534 // First, work out what the selector should be. If there's an ID,
2535 // that should be used, the same for a class. Otherwise, a selector
2536 // of '' means that these styles will be applied anywhere
2537 $selector = ($this->overall_id != '') ? "#{$this->overall_id} " : '';
2538 $selector = ($selector == '' && $this->overall_class != '') ? ".{$this->overall_class} " : $selector;
2539
2540 // Header of the stylesheet
2541 if (!$economy_mode) {
2542 $stylesheet = "/**\n * GeSHi Dynamically Generated Stylesheet\n * --------------------------------------\n * Dynamically generated stylesheet for {$this->language}\n * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n * GeSHi (c) Nigel McNie 2004 (http://qbnz.com/highlighter)\n */\n";
2543 } else {
2544 $stylesheet = '/* GeSHi (c) Nigel McNie 2004 (http://qbnz.com/highlighter) */' . "\n";
2545 }
2546
2547 // Set the <ol> to have no effect at all if there are line numbers
2548 // (<ol>s have margins that should be destroyed so all layout is
2549 // controlled by the set_overall_style method, which works on the
2550 // <pre> or <div> container). Additionally, set default styles for lines
2551 if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
2552 //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
2553 $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
2554 }
2555
2556 // Add overall styles
2557 if (!$economy_mode || $this->overall_style != '') {
2558 $stylesheet .= "$selector {{$this->overall_style}}\n";
2559 }
2560
2561 // Add styles for links
2562 foreach ($this->link_styles as $key => $style) {
2563 if (!$economy_mode || $key == GESHI_LINK && $style != '') {
2564 $stylesheet .= "{$selector}a:link {{$style}}\n";
2565 }
2566 if (!$economy_mode || $key == GESHI_HOVER && $style != '') {
2567 $stylesheet .= "{$selector}a:hover {{$style}}\n";
2568 }
2569 if (!$economy_mode || $key == GESHI_ACTIVE && $style != '') {
2570 $stylesheet .= "{$selector}a:active {{$style}}\n";
2571 }
2572 if (!$economy_mode || $key == GESHI_VISITED && $style != '') {
2573 $stylesheet .= "{$selector}a:visited {{$style}}\n";
2574 }
2575 }
2576
2577 // Header and footer
2578 if (!$economy_mode || $this->header_content_style != '') {
2579 $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
2580 }
2581 if (!$economy_mode || $this->footer_content_style != '') {
2582 $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
2583 }
2584
2585 // Styles for important stuff
2586 if (!$economy_mode || $this->important_styles != '') {
2587 $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
2588 }
2589
2590 // Styles for lines being highlighted extra
2591 if (!$economy_mode || count($this->highlight_extra_lines)) {
2592 $stylesheet .= "$selector.ln-xtra {{$this->highlight_extra_lines_style}}\n";
2593 }
2594
2595 // Simple line number styles
2596 if (!$economy_mode || ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->line_style1 != '')) {
2597 $stylesheet .= "{$selector}li {{$this->line_style1}}\n";
2598 }
2599
2600 // If there is a style set for fancy line numbers, echo it out
2601 if (!$economy_mode || ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && $this->line_style2 != '')) {
2602 $stylesheet .= "{$selector}li.li2 {{$this->line_style2}}\n";
2603 }
2604
2605 foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
2606 if (!$economy_mode || !($economy_mode && (!$this->lexic_permissions['KEYWORDS'][$group] || $styles == ''))) {
2607 $stylesheet .= "$selector.kw$group {{$styles}}\n";
2608 }
2609 }
2610 foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
2611 if (!$economy_mode || !($economy_mode && $styles == '') &&
2612 !($economy_mode && !$this->lexic_permissions['COMMENTS'][$group])) {
2613 $stylesheet .= "$selector.co$group {{$styles}}\n";
2614 }
2615 }
2616 foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
2617 if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
2618 !$this->lexic_permissions['ESCAPE_CHAR'])) {
2619 $stylesheet .= "$selector.es$group {{$styles}}\n";
2620 }
2621 }
2622 foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
2623 if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
2624 !$this->lexic_permissions['BRACKETS'])) {
2625 $stylesheet .= "$selector.br$group {{$styles}}\n";
2626 }
2627 }
2628 foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
2629 if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
2630 !$this->lexic_permissions['STRINGS'])) {
2631 $stylesheet .= "$selector.st$group {{$styles}}\n";
2632 }
2633 }
2634 foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
2635 if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
2636 !$this->lexic_permissions['NUMBERS'])) {
2637 $stylesheet .= "$selector.nu$group {{$styles}}\n";
2638 }
2639 }
2640 foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
2641 if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
2642 !$this->lexic_permissions['METHODS'])) {
2643 $stylesheet .= "$selector.me$group {{$styles}}\n";
2644 }
2645 }
2646 foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
2647 if (!$economy_mode || !($economy_mode && $styles == '')) {
2648 $stylesheet .= "$selector.sc$group {{$styles}}\n";
2649 }
2650 }
2651 foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
2652 if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
2653 !$this->lexic_permissions['REGEXPS'][$group])) {
2654 $stylesheet .= "$selector.re$group {{$styles}}\n";
2655 }
2656 }
2657
2658 return $stylesheet;
2659 }
2660
2661 } // End Class GeSHi
2662
2663
2664 if (!function_exists('geshi_highlight')) {
2665 /**
2666 * Easy way to highlight stuff. Behaves just like highlight_string
2667 *
2668 * @param string The code to highlight
2669 * @param string The language to highlight the code in
2670 * @param string The path to the language files. You can leave this blank if you need
2671 * as from version 1.0.7 the path should be automatically detected
2672 * @param boolean Whether to return the result or to echo
2673 * @return string The code highlighted (if $return is true)
2674 * @since 1.0.2
2675 */
2676 function geshi_highlight ($string, $language, $path, $return = false)
2677 {
2678 $geshi = new GeSHi($string, $language, $path);
2679 $geshi->set_header_type(GESHI_HEADER_NONE);
2680 if ($return) {
2681 return '<code>' . $geshi->parse_code() . '</code>';
2682 }
2683 echo '<code>' . $geshi->parse_code() . '</code>';
2684 if ($geshi->error()) {
2685 return false;
2686 }
2687 return true;
2688 }
2689 }
2690
2691 ?>