Copying geshi 1.0.7.5 to viewsvn/includes/geshi
[viewsvn.git] / includes / geshi / geshi.php
1 <?php
2 /**
3 * GeSHi - Generic Syntax Highlighter
4 *
5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the documentation
6 * at http://qbnz.com/highlighter/documentation.php for more information about how to
7 * use this class.
8 *
9 * For changes, release notes, TODOs etc, see the relevant files in the docs/ directory
10 *
11 * This file is part of GeSHi.
12 *
13 * GeSHi is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * GeSHi is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with GeSHi; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 *
27 * @package core
28 * @author Nigel McNie <nigel@geshi.org>
29 * @copyright Copyright &copy; 2004, 2005, Nigel McNie
30 * @license http://gnu.org/copyleft/gpl.html GNU GPL
31 * @version $Id: geshi.php,v 1.23 2005/11/19 02:23:37 oracleshinoda Exp $
32 *
33 */
34
35 //
36 // GeSHi Constants
37 // You should use these constant names in your programs instead of
38 // their values - you never know when a value may change in a future
39 // version
40 //
41
42 /** The version of this GeSHi file */
43 define('GESHI_VERSION', '1.0.7.5');
44
45 /** For the future (though this may never be realised) */
46 define('GESHI_OUTPUT_HTML', 0);
47
48 /** Set the correct directory separator */
49 define('GESHI_DIR_SEPARATOR', ('WIN' != substr(PHP_OS, 0, 3)) ? '/' : '\\');
50
51 // Define the root directory for the GeSHi code tree
52 if (!defined('GESHI_ROOT')) {
53 /** The root directory for GeSHi */
54 define('GESHI_ROOT', dirname(__FILE__) . GESHI_DIR_SEPARATOR);
55 }
56 /** The language file directory for GeSHi
57 @access private */
58 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . GESHI_DIR_SEPARATOR);
59
60
61 // Line numbers - use with enable_line_numbers()
62 /** Use no line numbers when building the result */
63 define('GESHI_NO_LINE_NUMBERS', 0);
64 /** Use normal line numbers when building the result */
65 define('GESHI_NORMAL_LINE_NUMBERS', 1);
66 /** Use fancy line numbers when building the result */
67 define('GESHI_FANCY_LINE_NUMBERS', 2);
68
69 // Container HTML type
70 /** Use nothing to surround the source */
71 define('GESHI_HEADER_NONE', 0);
72 /** Use a "div" to surround the source */
73 define('GESHI_HEADER_DIV', 1);
74 /** Use a "pre" to surround the source */
75 define('GESHI_HEADER_PRE', 2);
76
77 // Capatalisation constants
78 /** Lowercase keywords found */
79 define('GESHI_CAPS_NO_CHANGE', 0);
80 /** Uppercase keywords found */
81 define('GESHI_CAPS_UPPER', 1);
82 /** Leave keywords found as the case that they are */
83 define('GESHI_CAPS_LOWER', 2);
84
85 // Link style constants
86 /** Links in the source in the :link state */
87 define('GESHI_LINK', 0);
88 /** Links in the source in the :hover state */
89 define('GESHI_HOVER', 1);
90 /** Links in the source in the :active state */
91 define('GESHI_ACTIVE', 2);
92 /** Links in the source in the :visited state */
93 define('GESHI_VISITED', 3);
94
95 // Important string starter/finisher
96 // Note that if you change these, they should be as-is: i.e., don't
97 // write them as if they had been run through htmlentities()
98 /** The starter for important parts of the source */
99 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
100 /** The ender for important parts of the source */
101 define('GESHI_END_IMPORTANT', '<END GeSHi>');
102
103 /**#@+
104 * @access private
105 */
106 // When strict mode applies for a language
107 /** Strict mode never applies (this is the most common) */
108 define('GESHI_NEVER', 0);
109 /** Strict mode *might* apply, and can be enabled or
110 disabled by {@link GeSHi::enable_strict_mode()} */
111 define('GESHI_MAYBE', 1);
112 /** Strict mode always applies */
113 define('GESHI_ALWAYS', 2);
114
115 // Advanced regexp handling constants, used in language files
116 /** The key of the regex array defining what to search for */
117 define('GESHI_SEARCH', 0);
118 /** The key of the regex array defining what bracket group in a
119 matched search to use as a replacement */
120 define('GESHI_REPLACE', 1);
121 /** The key of the regex array defining any modifiers to the regular expression */
122 define('GESHI_MODIFIERS', 2);
123 /** The key of the regex array defining what bracket group in a
124 matched search to put before the replacement */
125 define('GESHI_BEFORE', 3);
126 /** The key of the regex array defining what bracket group in a
127 matched search to put after the replacement */
128 define('GESHI_AFTER', 4);
129
130 /** Used in language files to mark comments */
131 define('GESHI_COMMENTS', 0);
132
133 // Error detection - use these to analyse faults
134 /** No sourcecode to highlight was specified */
135 define('GESHI_ERROR_NO_INPUT', 1);
136 /** The language specified does not exist */
137 define('GESHI_ERROR_NO_SUCH_LANG', 2);
138 /** GeSHi could not open a file for reading (generally a language file) */
139 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
140 /** The header type passed to {@link GeSHi::set_header_type()} was invalid */
141 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
142 /** The line number type passed to {@link GeSHi::enable_line_numbers()} was invalid */
143 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
144 /**#@-*/
145
146
147 /**
148 * The GeSHi Class.
149 *
150 * Please refer to the documentation for GeSHi 1.0.X that is available
151 * at http://qbnz.com/highlighter/documentation.php for more information
152 * about how to use this class.
153 *
154 * @package core
155 * @author Nigel McNie <nigel@geshi.org>
156 * @copyright Copyright &copy; 2004, 2005 Nigel McNie
157 */
158 class GeSHi
159 {
160 /**#@+
161 * @access private
162 */
163 /**
164 * The source code to highlight
165 * @var string
166 */
167 var $source = '';
168
169 /**
170 * The language to use when highlighting
171 * @var string
172 */
173 var $language = '';
174
175 /**
176 * The data for the language used
177 * @var array
178 */
179 var $language_data = array();
180
181 /**
182 * The path to the language files
183 * @var string
184 */
185 var $language_path = GESHI_LANG_ROOT;
186
187 /**
188 * The error message associated with an error
189 * @var string
190 * @todo check err reporting works
191 */
192 var $error = false;
193
194 /**
195 * Possible error messages
196 * @var array
197 */
198 var $error_messages = array(
199 GESHI_ERROR_NO_INPUT => 'No source code inputted',
200 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
201 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
202 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
203 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
204 );
205
206 /**
207 * Whether highlighting is strict or not
208 * @var boolean
209 */
210 var $strict_mode = false;
211
212 /**
213 * Whether to use CSS classes in output
214 * @var boolean
215 */
216 var $use_classes = false;
217
218 /**
219 * The type of header to use. Can be one of the following
220 * values:
221 *
222 * <ul>
223 * <li><b>GESHI_HEADER_PRE</b>: Source is outputted in
224 * a &lt;pre&gt; HTML element.</li>
225 * <li><b>GESHI_HEADER_DIV</b>: Source is outputted in
226 * a &lt;div&gt; HTML element.</li>
227 * </ul>
228 *
229 * @var int
230 */
231 var $header_type = GESHI_HEADER_PRE;
232
233 /**
234 * Array of permissions for which lexics should be highlighted
235 * @var array
236 */
237 var $lexic_permissions = array(
238 'KEYWORDS' => array(),
239 'COMMENTS' => array('MULTI' => true),
240 'REGEXPS' => array(),
241 'ESCAPE_CHAR' => true,
242 'BRACKETS' => true,
243 'SYMBOLS' => true,
244 'STRINGS' => true,
245 'NUMBERS' => true,
246 'METHODS' => true,
247 'SCRIPT' => true
248 );
249
250 /**
251 * The time it took to parse the code
252 * @var double
253 */
254 var $time = 0;
255
256 /**
257 * The content of the header block
258 * @var string
259 */
260 var $header_content = '';
261
262 /**
263 * The content of the footer block
264 * @var string
265 */
266 var $footer_content = '';
267
268 /**
269 * The style of the header block
270 * @var string
271 */
272 var $header_content_style = '';
273
274 /**
275 * The style of the footer block
276 * @var string
277 */
278 var $footer_content_style = '';
279
280 /**
281 * The styles for hyperlinks in the code
282 * @var array
283 */
284 var $link_styles = array();
285
286 /**
287 * Whether important blocks should be recognised or not
288 * @var boolean
289 * @deprecated
290 * @todo REMOVE THIS FUNCTIONALITY!
291 */
292 var $enable_important_blocks = false;
293
294 /**
295 * Styles for important parts of the code
296 * @var string
297 * @deprecated
298 * @todo As above - rethink the whole idea of important blocks as it is buggy and
299 * will be hard to implement in 1.2
300 */
301 var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
302
303 /**
304 * Whether CSS IDs should be added to the code
305 * @var boolean
306 */
307 var $add_ids = false;
308
309 /**
310 * Lines that should be highlighted extra
311 * @var array
312 */
313 var $highlight_extra_lines = array();
314
315 /**
316 * Styles of extra-highlighted lines
317 * @var string
318 */
319 var $highlight_extra_lines_style = 'color: #cc0; background-color: #ffc;';
320
321 /**
322 * Number at which line numbers should start at
323 * @var int
324 * @todo Warning documentation about XHTML compliance
325 */
326 var $line_numbers_start = 1;
327
328 /**
329 * The overall style for this code block
330 * @var string
331 */
332 var $overall_style = '';
333
334 /**
335 * The style for the actual code
336 * @var string
337 */
338 var $code_style = 'font-family: \'Courier New\', Courier, monospace; font-weight: normal;';
339
340 /**
341 * The overall class for this code block
342 * @var string
343 */
344 var $overall_class = '';
345
346 /**
347 * The overall ID for this code block
348 * @var string
349 */
350 var $overall_id = '';
351
352 /**
353 * Line number styles
354 * @var string
355 */
356 var $line_style1 = 'font-family: \'Courier New\', Courier, monospace; color: black; font-weight: normal; font-style: normal;';
357
358 /**
359 * Line number styles for fancy lines
360 * @var string
361 */
362 var $line_style2 = 'font-weight: bold;';
363
364 /**
365 * Flag for how line nubmers are displayed
366 * @var boolean
367 */
368 var $line_numbers = GESHI_NO_LINE_NUMBERS;
369
370 /**
371 * The "nth" value for fancy line highlighting
372 * @var int
373 */
374 var $line_nth_row = 0;
375
376 /**
377 * The size of tab stops
378 * @var int
379 */
380 var $tab_width = 8;
381
382 /**
383 * Default target for keyword links
384 * @var string
385 */
386 var $link_target = '';
387
388 /**
389 * The encoding to use for entity encoding
390 * @var string
391 */
392 var $encoding = 'ISO-8859-1';
393
394 /**
395 * Unused (planned for future)
396 * @var int
397 */
398 var $output_format = GESHI_OUTPUT_HTML;
399
400 /**#@-*/
401
402 /**
403 * Creates a new GeSHi object, with source and language
404 *
405 * @param string The source code to highlight
406 * @param string The language to highlight the source with
407 * @param string The path to the language file directory. <b>This
408 * is deprecated!</b> I've backported the auto path
409 * detection from the 1.1.X dev branch, so now it
410 * should be automatically set correctly. If you have
411 * renamed the language directory however, you will
412 * still need to set the path using this parameter or
413 * {@link GeSHi::set_language_path()}
414 * @since 1.0.0
415 */
416 function GeSHi ($source, $language, $path = '')
417 {
418 $this->set_source($source);
419 $this->set_language_path($path);
420 $this->set_language($language);
421 }
422
423 /**
424 * Returns an error message associated with the last GeSHi operation,
425 * or false if no error has occured
426 *
427 * @return string|false An error message if there has been an error, else false
428 * @since 1.0.0
429 */
430 function error ()
431 {
432 if ($this->error) {
433 $msg = $this->error_messages[$this->error];
434 $debug_tpl_vars = array(
435 '{LANGUAGE}' => $this->language,
436 '{PATH}' => $this->language_path
437 );
438 foreach ($debug_tpl_vars as $tpl => $var) {
439 $msg = str_replace($tpl, $var, $msg);
440 }
441 return "<br /><strong>GeSHi Error:</strong> $msg (code $this->error)<br />";
442 }
443 return false;
444 }
445
446 /**
447 * Gets a human-readable language name (thanks to Simon Patterson
448 * for the idea :))
449 *
450 * @return string The name for the current language
451 * @since 1.0.2
452 */
453 function get_language_name ()
454 {
455 if (GESHI_ERROR_NO_SUCH_LANG == $this->_error) {
456 return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
457 }
458 return $this->language_data['LANG_NAME'];
459 }
460
461 /**
462 * Sets the source code for this object
463 *
464 * @param string The source code to highlight
465 * @since 1.0.0
466 */
467 function set_source ($source)
468 {
469 if ('' == trim($source)) {
470 $this->error = GESHI_ERROR_NO_INPUT;
471 }
472 $this->source = $source;
473 }
474
475 /**
476 * Sets the language for this object
477 *
478 * @param string The name of the language to use
479 * @since 1.0.0
480 */
481 function set_language ($language)
482 {
483 $this->error = false;
484 $this->strict_mode = GESHI_NEVER;
485
486 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
487 $this->language = strtolower($language);
488
489 $file_name = $this->language_path . $this->language . '.php';
490 if (!is_readable($file_name)) {
491 $this->error = GESHI_ERROR_NO_SUCH_LANG;
492 return;
493 }
494 // Load the language for parsing
495 $this->load_language($file_name);
496 }
497
498 /**
499 * Sets the path to the directory containing the language files. Note
500 * that this path is relative to the directory of the script that included
501 * geshi.php, NOT geshi.php itself.
502 *
503 * @param string The path to the language directory
504 * @since 1.0.0
505 * @deprecated The path to the language files should now be automatically
506 * detected, so this method should no longer be needed. The
507 * 1.1.X branch handles manual setting of the path differently
508 * so this method will disappear in 1.2.0.
509 */
510 function set_language_path ($path)
511 {
512 if ($path) {
513 $this->language_path = ('/' == substr($path, strlen($path) - 1, 1)) ? $path : $path . '/';
514 }
515 }
516
517 /**
518 * Sets the type of header to be used.
519 *
520 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
521 * means more source code but more control over tab width and line-wrapping.
522 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
523 * control. Default is GESHI_HEADER_PRE.
524 *
525 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
526 * should be outputted.
527 *
528 * @param int The type of header to be used
529 * @since 1.0.0
530 */
531 function set_header_type ($type)
532 {
533 if (GESHI_HEADER_DIV != $type && GESHI_HEADER_PRE != $type && GESHI_HEADER_NONE != $type) {
534 $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
535 return;
536 }
537 $this->header_type = $type;
538 }
539
540 /**
541 * Sets the styles for the code that will be outputted
542 * when this object is parsed. The style should be a
543 * string of valid stylesheet declarations
544 *
545 * @param string The overall style for the outputted code block
546 * @param boolean Whether to merge the styles with the current styles or not
547 * @since 1.0.0
548 */
549 function set_overall_style ($style, $preserve_defaults = false)
550 {
551 if (!$preserve_defaults) {
552 $this->overall_style = $style;
553 } else {
554 $this->overall_style .= $style;
555 }
556 }
557
558 /**
559 * Sets the overall classname for this block of code. This
560 * class can then be used in a stylesheet to style this object's
561 * output
562 *
563 * @param string The class name to use for this block of code
564 * @since 1.0.0
565 */
566 function set_overall_class ($class)
567 {
568 $this->overall_class = $class;
569 }
570
571 /**
572 * Sets the overall id for this block of code. This id can then
573 * be used in a stylesheet to style this object's output
574 *
575 * @param string The ID to use for this block of code
576 * @since 1.0.0
577 */
578 function set_overall_id ($id)
579 {
580 $this->overall_id = $id;
581 }
582
583 /**
584 * Sets whether CSS classes should be used to highlight the source. Default
585 * is off, calling this method with no arguments will turn it on
586 *
587 * @param boolean Whether to turn classes on or not
588 * @since 1.0.0
589 */
590 function enable_classes ($flag = true)
591 {
592 $this->use_classes = ($flag) ? true : false;
593 }
594
595 /**
596 * Sets the style for the actual code. This should be a string
597 * containing valid stylesheet declarations. If $preserve_defaults is
598 * true, then styles are merged with the default styles, with the
599 * user defined styles having priority
600 *
601 * Note: Use this method to override any style changes you made to
602 * the line numbers if you are using line numbers, else the line of
603 * code will have the same style as the line number! Consult the
604 * GeSHi documentation for more information about this.
605 *
606 * @param string The style to use for actual code
607 * @param boolean Whether to merge the current styles with the new styles
608 */
609 function set_code_style ($style, $preserve_defaults = false)
610 {
611 if (!$preserve_defaults) {
612 $this->code_style = $style;
613 } else {
614 $this->code_style .= $style;
615 }
616 }
617
618 /**
619 * Sets the styles for the line numbers.
620 *
621 * @param string The style for the line numbers that are "normal"
622 * @param string|boolean If a string, this is the style of the line
623 * numbers that are "fancy", otherwise if boolean then this
624 * defines whether the normal styles should be merged with the
625 * new normal styles or not
626 * @param boolean If set, is the flag for whether to merge the "fancy"
627 * styles with the current styles or not
628 * @since 1.0.2
629 */
630 function set_line_style ($style1, $style2 = '', $preserve_defaults = false)
631 {
632 if (is_bool($style2)) {
633 $preserve_defaults = $style2;
634 $style2 = '';
635 }
636 if (!$preserve_defaults) {
637 $this->line_style1 = $style1;
638 $this->line_style2 = $style2;
639 } else {
640 $this->line_style1 .= $style1;
641 $this->line_style2 .= $style2;
642 }
643 }
644
645 /**
646 * Sets whether line numbers should be displayed.
647 *
648 * Valid values for the first parameter are:
649 *
650 * <ul>
651 * <li><b>GESHI_NO_LINE_NUMBERS</b>: Line numbers will not be displayed</li>
652 * <li><b>GESHI_NORMAL_LINE_NUMBERS</b>: Line numbers will be displayed</li>
653 * <li><b>GESHI_FANCY_LINE_NUMBERS</b>: Fancy line numbers will be displayed</li>
654 * </ul>
655 *
656 * For fancy line numbers, the second parameter is used to signal which lines
657 * are to be fancy. For example, if the value of this parameter is 5 then every
658 * 5th line will be fancy.
659 *
660 * @param int How line numbers should be displayed
661 * @param int Defines which lines are fancy
662 * @since 1.0.0
663 */
664 function enable_line_numbers ($flag, $nth_row = 5)
665 {
666 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
667 && GESHI_FANCY_LINE_NUMBERS != $flag) {
668 $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
669 }
670 $this->line_numbers = $flag;
671 $this->line_nth_row = $nth_row;
672 }
673
674 /**
675 * Sets the style for a keyword group. If $preserve_defaults is
676 * true, then styles are merged with the default styles, with the
677 * user defined styles having priority
678 *
679 * @param int The key of the keyword group to change the styles of
680 * @param string The style to make the keywords
681 * @param boolean Whether to merge the new styles with the old or just
682 * to overwrite them
683 * @since 1.0.0
684 */
685 function set_keyword_group_style ($key, $style, $preserve_defaults = false)
686 {
687 if (!$preserve_defaults) {
688 $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
689 } else {
690 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
691 }
692 }
693
694 /**
695 * Turns highlighting on/off for a keyword group
696 *
697 * @param int The key of the keyword group to turn on or off
698 * @param boolean Whether to turn highlighting for that group on or off
699 * @since 1.0.0
700 */
701 function set_keyword_group_highlighting ( $key, $flag = true )
702 {
703 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
704 }
705
706 /**
707 * Sets the styles for comment groups. If $preserve_defaults is
708 * true, then styles are merged with the default styles, with the
709 * user defined styles having priority
710 *
711 * @param int The key of the comment group to change the styles of
712 * @param string The style to make the comments
713 * @param boolean Whether to merge the new styles with the old or just
714 * to overwrite them
715 * @since 1.0.0
716 */
717 function set_comments_style ($key, $style, $preserve_defaults = false)
718 {
719 if (!$preserve_defaults) {
720 $this->language_data['STYLES']['COMMENTS'][$key] = $style;
721 } else {
722 $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
723 }
724 }
725
726 /**
727 * Turns highlighting on/off for comment groups
728 *
729 * @param int The key of the comment group to turn on or off
730 * @param boolean Whether to turn highlighting for that group on or off
731 * @since 1.0.0
732 */
733 function set_comments_highlighting ($key, $flag = true)
734 {
735 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
736 }
737
738 /**
739 * Sets the styles for escaped characters. If $preserve_defaults is
740 * true, then styles are merged with the default styles, with the
741 * user defined styles having priority
742 *
743 * @param string The style to make the escape characters
744 * @param boolean Whether to merge the new styles with the old or just
745 * to overwrite them
746 * @since 1.0.0
747 */
748 function set_escape_characters_style ($style, $preserve_defaults = false)
749 {
750 if (!$preserve_defaults) {
751 $this->language_data['STYLES']['ESCAPE_CHAR'][0] = $style;
752 } else {
753 $this->language_data['STYLES']['ESCAPE_CHAR'][0] .= $style;
754 }
755 }
756
757 /**
758 * Turns highlighting on/off for escaped characters
759 *
760 * @param boolean Whether to turn highlighting for escape characters on or off
761 * @since 1.0.0
762 */
763 function set_escape_characters_highlighting ($flag = true)
764 {
765 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
766 }
767
768 /**
769 * Sets the styles for brackets. If $preserve_defaults is
770 * true, then styles are merged with the default styles, with the
771 * user defined styles having priority
772 *
773 * This method is DEPRECATED: use set_symbols_style instead.
774 * This method will be removed in 1.2.X
775 *
776 * @param string The style to make the brackets
777 * @param boolean Whether to merge the new styles with the old or just
778 * to overwrite them
779 * @since 1.0.0
780 * @deprecated In favour of set_symbols_style
781 */
782 function set_brackets_style ($style, $preserve_defaults = false)
783 {
784 if (!$preserve_defaults) {
785 $this->language_data['STYLES']['BRACKETS'][0] = $style;
786 } else {
787 $this->language_data['STYLES']['BRACKETS'][0] .= $style;
788 }
789 }
790
791 /**
792 * Turns highlighting on/off for brackets
793 *
794 * This method is DEPRECATED: use set_symbols_highlighting instead.
795 * This method will be remove in 1.2.X
796 *
797 * @param boolean Whether to turn highlighting for brackets on or off
798 * @since 1.0.0
799 * @deprecated In favour of set_symbols_highlighting
800 */
801 function set_brackets_highlighting ($flag)
802 {
803 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
804 }
805
806 /**
807 * Sets the styles for symbols. If $preserve_defaults is
808 * true, then styles are merged with the default styles, with the
809 * user defined styles having priority
810 *
811 * @param string The style to make the symbols
812 * @param boolean Whether to merge the new styles with the old or just
813 * to overwrite them
814 * @since 1.0.1
815 */
816 function set_symbols_style ($style, $preserve_defaults = false)
817 {
818 if (!$preserve_defaults) {
819 $this->language_data['STYLES']['SYMBOLS'][0] = $style;
820 } else {
821 $this->language_data['STYLES']['SYMBOLS'][0] .= $style;
822 }
823 // For backward compatibility
824 $this->set_brackets_style ($style, $preserve_defaults);
825 }
826
827 /**
828 * Turns highlighting on/off for symbols
829 *
830 * @param boolean Whether to turn highlighting for symbols on or off
831 * @since 1.0.0
832 */
833 function set_symbols_highlighting ($flag)
834 {
835 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
836 // For backward compatibility
837 $this->set_brackets_highlighting ($flag);
838 }
839
840 /**
841 * Sets the styles for strings. If $preserve_defaults is
842 * true, then styles are merged with the default styles, with the
843 * user defined styles having priority
844 *
845 * @param string The style to make the escape characters
846 * @param boolean Whether to merge the new styles with the old or just
847 * to overwrite them
848 * @since 1.0.0
849 */
850 function set_strings_style ($style, $preserve_defaults = false)
851 {
852 if (!$preserve_defaults) {
853 $this->language_data['STYLES']['STRINGS'][0] = $style;
854 } else {
855 $this->language_data['STYLES']['STRINGS'][0] .= $style;
856 }
857 }
858
859 /**
860 * Turns highlighting on/off for strings
861 *
862 * @param boolean Whether to turn highlighting for strings on or off
863 * @since 1.0.0
864 */
865 function set_strings_highlighting ($flag)
866 {
867 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
868 }
869
870 /**
871 * Sets the styles for numbers. If $preserve_defaults is
872 * true, then styles are merged with the default styles, with the
873 * user defined styles having priority
874 *
875 * @param string The style to make the numbers
876 * @param boolean Whether to merge the new styles with the old or just
877 * to overwrite them
878 * @since 1.0.0
879 */
880 function set_numbers_style ($style, $preserve_defaults = false)
881 {
882 if (!$preserve_defaults) {
883 $this->language_data['STYLES']['NUMBERS'][0] = $style;
884 } else {
885 $this->language_data['STYLES']['NUMBERS'][0] .= $style;
886 }
887 }
888
889 /**
890 * Turns highlighting on/off for numbers
891 *
892 * @param boolean Whether to turn highlighting for numbers on or off
893 * @since 1.0.0
894 */
895 function set_numbers_highlighting ($flag)
896 {
897 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
898 }
899
900 /**
901 * Sets the styles for methods. $key is a number that references the
902 * appropriate "object splitter" - see the language file for the language
903 * you are highlighting to get this number. If $preserve_defaults is
904 * true, then styles are merged with the default styles, with the
905 * user defined styles having priority
906 *
907 * @param int The key of the object splitter to change the styles of
908 * @param string The style to make the methods
909 * @param boolean Whether to merge the new styles with the old or just
910 * to overwrite them
911 * @since 1.0.0
912 */
913 function set_methods_style ($key, $style, $preserve_defaults = false)
914 {
915 if (!$preserve_defaults) {
916 $this->language_data['STYLES']['METHODS'][$key] = $style;
917 } else {
918 $this->language_data['STYLES']['METHODS'][$key] .= $style;
919 }
920 }
921
922 /**
923 * Turns highlighting on/off for methods
924 *
925 * @param boolean Whether to turn highlighting for methods on or off
926 * @since 1.0.0
927 */
928 function set_methods_highlighting ($flag)
929 {
930 $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
931 }
932
933 /**
934 * Sets the styles for regexps. If $preserve_defaults is
935 * true, then styles are merged with the default styles, with the
936 * user defined styles having priority
937 *
938 * @param string The style to make the regular expression matches
939 * @param boolean Whether to merge the new styles with the old or just
940 * to overwrite them
941 * @since 1.0.0
942 */
943 function set_regexps_style ($key, $style, $preserve_defaults = false)
944 {
945 if (!$preserve_defaults) {
946 $this->language_data['STYLES']['REGEXPS'][$key] = $style;
947 } else {
948 $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
949 }
950 }
951
952 /**
953 * Turns highlighting on/off for regexps
954 *
955 * @param int The key of the regular expression group to turn on or off
956 * @param boolean Whether to turn highlighting for the regular expression group on or off
957 * @since 1.0.0
958 */
959 function set_regexps_highlighting ($key, $flag)
960 {
961 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
962 }
963
964 /**
965 * Sets whether a set of keywords are checked for in a case sensitive manner
966 *
967 * @param int The key of the keyword group to change the case sensitivity of
968 * @param boolean Whether to check in a case sensitive manner or not
969 * @since 1.0.0
970 */
971 function set_case_sensitivity ($key, $case)
972 {
973 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
974 }
975
976 /**
977 * Sets the case that keywords should use when found. Use the constants:
978 *
979 * <ul>
980 * <li><b>GESHI_CAPS_NO_CHANGE</b>: leave keywords as-is</li>
981 * <li><b>GESHI_CAPS_UPPER</b>: convert all keywords to uppercase where found</li>
982 * <li><b>GESHI_CAPS_LOWER</b>: convert all keywords to lowercase where found</li>
983 * </ul>
984 *
985 * @param int A constant specifying what to do with matched keywords
986 * @since 1.0.1
987 * @todo Error check the passed value
988 */
989 function set_case_keywords ($case)
990 {
991 $this->language_data['CASE_KEYWORDS'] = $case;
992 }
993
994 /**
995 * Sets how many spaces a tab is substituted for
996 *
997 * Widths below zero are ignored
998 *
999 * @param int The tab width
1000 * @since 1.0.0
1001 */
1002 function set_tab_width ($width)
1003 {
1004 $this->tab_width = intval($width);
1005 }
1006
1007 /**
1008 * Enables/disables strict highlighting. Default is off, calling this
1009 * method without parameters will turn it on. See documentation
1010 * for more details on strict mode and where to use it.
1011 *
1012 * @param boolean Whether to enable strict mode or not
1013 * @since 1.0.0
1014 */
1015 function enable_strict_mode ($mode = true)
1016 {
1017 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1018 $this->strict_mode = ($mode) ? true : false;
1019 }
1020 }
1021
1022 /**
1023 * Disables all highlighting
1024 *
1025 * @since 1.0.0
1026 * @todo Rewrite with an array traversal
1027 */
1028 function disable_highlighting ()
1029 {
1030 foreach ($this->lexic_permissions as $key => $value) {
1031 if (is_array($value)) {
1032 foreach ($value as $k => $v) {
1033 $this->lexic_permissions[$key][$k] = false;
1034 }
1035 } else {
1036 $this->lexic_permissions[$key] = false;
1037 }
1038 }
1039 // Context blocks
1040 $this->enable_important_blocks = false;
1041 }
1042
1043 /**
1044 * Enables all highlighting
1045 *
1046 * @since 1.0.0
1047 * @todo Rewrite with array traversal
1048 */
1049 function enable_highlighting ()
1050 {
1051 foreach ($this->lexic_permissions as $key => $value) {
1052 if (is_array($value)) {
1053 foreach ($value as $k => $v) {
1054 $this->lexic_permissions[$key][$k] = true;
1055 }
1056 } else {
1057 $this->lexic_permissions[$key] = true;
1058 }
1059 }
1060 // Context blocks
1061 $this->enable_important_blocks = true;
1062 }
1063
1064 /**
1065 * Given a file extension, this method returns either a valid geshi language
1066 * name, or the empty string if it couldn't be found
1067 *
1068 * @param string The extension to get a language name for
1069 * @param array A lookup array to use instead of the default
1070 * @since 1.0.5
1071 * @todo Re-think about how this method works (maybe make it private and/or make it
1072 * a extension->lang lookup?)
1073 * @todo static?
1074 */
1075 function get_language_name_from_extension ( $extension, $lookup = array() )
1076 {
1077 if ( !$lookup )
1078 {
1079 $lookup = array(
1080 'actionscript' => array('as'),
1081 'ada' => array('a', 'ada', 'adb', 'ads'),
1082 'apache' => array('conf'),
1083 'asm' => array('ash', 'asm'),
1084 'asp' => array('asp'),
1085 'bash' => array('sh'),
1086 'c' => array('c'),
1087 'c_mac' => array('c'),
1088 'caddcl' => array(),
1089 'cadlisp' => array(),
1090 'cpp' => array('cpp'),
1091 'csharp' => array(),
1092 'css' => array('css'),
1093 'delphi' => array('dpk', 'dpr'),
1094 'html4strict' => array('html', 'htm'),
1095 'java' => array('java'),
1096 'javascript' => array('js'),
1097 'lisp' => array('lisp'),
1098 'lua' => array('lua'),
1099 'mpasm' => array(),
1100 'nsis' => array(),
1101 'objc' => array(),
1102 'oobas' => array(),
1103 'oracle8' => array(),
1104 'pascal' => array('pas'),
1105 'perl' => array('pl', 'pm'),
1106 'php' => array('php', 'php5', 'phtml', 'phps'),
1107 'python' => array('py'),
1108 'qbasic' => array('bi'),
1109 'smarty' => array(),
1110 'vb' => array('bas'),
1111 'vbnet' => array(),
1112 'visualfoxpro' => array(),
1113 'xml' => array('xml')
1114 );
1115 }
1116
1117 foreach ($lookup as $lang => $extensions) {
1118 foreach ($extensions as $ext) {
1119 if ($ext == $extension) {
1120 return $lang;
1121 }
1122 }
1123 }
1124 return '';
1125 }
1126
1127 /**
1128 * Given a file name, this method loads its contents in, and attempts
1129 * to set the language automatically. An optional lookup table can be
1130 * passed for looking up the language name. If not specified a default
1131 * table is used
1132 *
1133 * The language table is in the form
1134 * <pre>array(
1135 * 'lang_name' => array('extension', 'extension', ...),
1136 * 'lang_name' ...
1137 * );</pre>
1138 *
1139 * @todo Complete rethink of this and above method
1140 * @since 1.0.5
1141 */
1142 function load_from_file ($file_name, $lookup = array())
1143 {
1144 if (is_readable($file_name)) {
1145 $this->set_source(implode('', file($file_name)));
1146 $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1147 } else {
1148 $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1149 }
1150 }
1151
1152 /**
1153 * Adds a keyword to a keyword group for highlighting
1154 *
1155 * @param int The key of the keyword group to add the keyword to
1156 * @param string The word to add to the keyword group
1157 * @since 1.0.0
1158 */
1159 function add_keyword ($key, $word)
1160 {
1161 $this->language_data['KEYWORDS'][$key][] = $word;
1162 }
1163
1164 /**
1165 * Removes a keyword from a keyword group
1166 *
1167 * @param int The key of the keyword group to remove the keyword from
1168 * @param string The word to remove from the keyword group
1169 * @since 1.0.0
1170 */
1171 function remove_keyword ($key, $word)
1172 {
1173 $this->language_data['KEYWORDS'][$key] =
1174 array_diff($this->language_data['KEYWORDS'][$key], array($word));
1175 }
1176
1177 /**
1178 * Creates a new keyword group
1179 *
1180 * @param int The key of the keyword group to create
1181 * @param string The styles for the keyword group
1182 * @param boolean Whether the keyword group is case sensitive ornot
1183 * @param array The words to use for the keyword group
1184 * @since 1.0.0
1185 */
1186 function add_keyword_group ( $key, $styles, $case_sensitive = true, $words = array() )
1187 {
1188 $words = (array) $words;
1189 $this->language_data['KEYWORDS'][$key] = $words;
1190 $this->lexic_permissions['KEYWORDS'][$key] = true;
1191 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1192 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1193 }
1194
1195 /**
1196 * Removes a keyword group
1197 *
1198 * @param int The key of the keyword group to remove
1199 * @since 1.0.0
1200 */
1201 function remove_keyword_group ($key)
1202 {
1203 unset($this->language_data['KEYWORDS'][$key]);
1204 unset($this->lexic_permissions['KEYWORDS'][$key]);
1205 unset($this->language_data['CASE_SENSITIVE'][$key]);
1206 unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1207 }
1208
1209 /**
1210 * Sets the content of the header block
1211 *
1212 * @param string The content of the header block
1213 * @since 1.0.2
1214 */
1215 function set_header_content ($content)
1216 {
1217 $this->header_content = $content;
1218 }
1219
1220 /**
1221 * Sets the content of the footer block
1222 *
1223 * @param string The content of the footer block
1224 * @since 1.0.2
1225 */
1226 function set_footer_content ($content)
1227 {
1228 $this->footer_content = $content;
1229 }
1230
1231 /**
1232 * Sets the style for the header content
1233 *
1234 * @param string The style for the header content
1235 * @since 1.0.2
1236 */
1237 function set_header_content_style ($style)
1238 {
1239 $this->header_content_style = $style;
1240 }
1241
1242 /**
1243 * Sets the style for the footer content
1244 *
1245 * @param string The style for the footer content
1246 * @since 1.0.2
1247 */
1248 function set_footer_content_style ($style)
1249 {
1250 $this->footer_content_style = $style;
1251 }
1252
1253 /**
1254 * Sets the base URL to be used for keywords
1255 *
1256 * @param int The key of the keyword group to set the URL for
1257 * @param string The URL to set for the group. If {FNAME} is in
1258 * the url somewhere, it is replaced by the keyword
1259 * that the URL is being made for
1260 * @since 1.0.2
1261 */
1262 function set_url_for_keyword_group ($group, $url)
1263 {
1264 $this->language_data['URLS'][$group] = $url;
1265 }
1266
1267 /**
1268 * Sets styles for links in code
1269 *
1270 * @param int A constant that specifies what state the style is being
1271 * set for - e.g. :hover or :visited
1272 * @param string The styles to use for that state
1273 * @since 1.0.2
1274 */
1275 function set_link_styles ($type, $styles)
1276 {
1277 $this->link_styles[$type] = $styles;
1278 }
1279
1280 /**
1281 * Sets the target for links in code
1282 *
1283 * @param string The target for links in the code, e.g. _blank
1284 * @since 1.0.3
1285 */
1286 function set_link_target ( $target )
1287 {
1288 if (!$target) {
1289 $this->link_target = '';
1290 } else {
1291 $this->link_target = ' target="' . $target . '" ';
1292 }
1293 }
1294
1295 /**
1296 * Sets styles for important parts of the code
1297 *
1298 * @param string The styles to use on important parts of the code
1299 * @since 1.0.2
1300 */
1301 function set_important_styles ($styles)
1302 {
1303 $this->important_styles = $styles;
1304 }
1305
1306 /**
1307 * Sets whether context-important blocks are highlighted
1308 *
1309 * @todo REMOVE THIS SHIZ FROM GESHI!
1310 * @deprecated
1311 */
1312 function enable_important_blocks ( $flag )
1313 {
1314 $this->enable_important_blocks = ( $flag ) ? true : false;
1315 }
1316
1317 /**
1318 * Whether CSS IDs should be added to each line
1319 *
1320 * @param boolean If true, IDs will be added to each line.
1321 * @since 1.0.2
1322 */
1323 function enable_ids ($flag = true)
1324 {
1325 $this->add_ids = ($flag) ? true : false;
1326 }
1327
1328 /**
1329 * Specifies which lines to highlight extra
1330 *
1331 * @param mixed An array of line numbers to highlight, or just a line
1332 * number on its own.
1333 * @since 1.0.2
1334 * @todo Some data replication here that could be cut down on
1335 */
1336 function highlight_lines_extra ($lines)
1337 {
1338 if (is_array($lines)) {
1339 foreach ($lines as $line) {
1340 $this->highlight_extra_lines[intval($line)] = intval($line);
1341 }
1342 } else {
1343 $this->highlight_extra_lines[intval($lines)] = intval($lines);
1344 }
1345 }
1346
1347 /**
1348 * Sets the style for extra-highlighted lines
1349 *
1350 * @param string The style for extra-highlighted lines
1351 * @since 1.0.2
1352 */
1353 function set_highlight_lines_extra_style ($styles)
1354 {
1355 $this->highlight_extra_lines_style = $styles;
1356 }
1357
1358 /**
1359 * Sets what number line numbers should start at. Should
1360 * be a positive integer, and will be converted to one.
1361 *
1362 * <b>Warning:</b> Using this method will add the "start"
1363 * attribute to the &lt;ol&gt; that is used for line numbering.
1364 * This is <b>not</b> valid XHTML strict, so if that's what you
1365 * care about then don't use this method. Firefox is getting
1366 * support for the CSS method of doing this in 1.1 and Opera
1367 * has support for the CSS method, but (of course) IE doesn't
1368 * so it's not worth doing it the CSS way yet.
1369 *
1370 * @param int The number to start line numbers at
1371 * @since 1.0.2
1372 */
1373 function start_line_numbers_at ($number)
1374 {
1375 $this->line_numbers_start = abs(intval($number));
1376 }
1377
1378 /**
1379 * Sets the encoding used for htmlspecialchars(), for international
1380 * support.
1381 *
1382 * @param string The encoding to use for the source
1383 * @since 1.0.3
1384 */
1385 function set_encoding ($encoding)
1386 {
1387 if ($encoding) {
1388 $this->encoding = $encoding;
1389 }
1390 }
1391
1392 /**
1393 * Returns the code in $this->source, highlighted and surrounded by the
1394 * nessecary HTML.
1395 *
1396 * This should only be called ONCE, cos it's SLOW! If you want to highlight
1397 * the same source multiple times, you're better off doing a whole lot of
1398 * str_replaces to replace the &lt;span&gt;s
1399 *
1400 * @since 1.0.0
1401 */
1402 function parse_code ()
1403 {
1404 // Start the timer
1405 $start_time = microtime();
1406
1407 // Firstly, if there is an error, we won't highlight
1408 if ($this->error) {
1409 $result = $this->header();
1410 if ($this->header_type != GESHI_HEADER_PRE) {
1411 $result .= $this->indent(@htmlspecialchars($this->source, ENT_COMPAT, $this->encoding));
1412 } else {
1413 $result .= @htmlspecialchars($this->source, ENT_COMPAT, $this->encoding);
1414 }
1415 // Stop Timing
1416 $this->set_time($start_time, microtime());
1417 return $result . $this->footer();
1418 }
1419
1420 // Add spaces for regular expression matching and line numbers
1421 $code = ' ' . $this->source . ' ';
1422 // Replace all newlines to a common form.
1423 $code = str_replace("\r\n", "\n", $code);
1424 $code = str_replace("\r", "\n", $code);
1425
1426 // Initialise various stuff
1427 $length = strlen($code);
1428 $STRING_OPEN = '';
1429 $CLOSE_STRING = false;
1430 $ESCAPE_CHAR_OPEN = false;
1431 $COMMENT_MATCHED = false;
1432 // Turn highlighting on if strict mode doesn't apply to this language
1433 $HIGHLIGHTING_ON = ( !$this->strict_mode ) ? true : '';
1434 // Whether to highlight inside a block of code
1435 $HIGHLIGHT_INSIDE_STRICT = false;
1436 $stuff_to_parse = '';
1437 $result = '';
1438
1439 // "Important" selections are handled like multiline comments
1440 // @todo GET RID OF THIS SHIZ
1441 if ($this->enable_important_blocks) {
1442 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
1443 }
1444
1445 if ($this->strict_mode) {
1446 // Break the source into bits. Each bit will be a portion of the code
1447 // within script delimiters - for example, HTML between < and >
1448 $parts = array(0 => array(0 => ''));
1449 $k = 0;
1450 for ($i = 0; $i < $length; $i++) {
1451 $char = substr($code, $i, 1);
1452 if (!$HIGHLIGHTING_ON) {
1453 foreach ($this->language_data['SCRIPT_DELIMITERS'] as $key => $delimiters) {
1454 foreach ($delimiters as $open => $close) {
1455 // Get the next little bit for this opening string
1456 $check = substr($code, $i, strlen($open));
1457 // If it matches...
1458 if ($check == $open) {
1459 // We start a new block with the highlightable
1460 // code in it
1461 $HIGHLIGHTING_ON = $open;
1462 $i += strlen($open) - 1;
1463 $char = $open;
1464 $parts[++$k][0] = $char;
1465
1466 // No point going around again...
1467 break(2);
1468 }
1469 }
1470 }
1471 } else {
1472 foreach ($this->language_data['SCRIPT_DELIMITERS'] as $key => $delimiters) {
1473 foreach ($delimiters as $open => $close) {
1474 if ($open == $HIGHLIGHTING_ON) {
1475 // Found the closing tag
1476 break(2);
1477 }
1478 }
1479 }
1480 // We check code from our current position BACKWARDS. This is so
1481 // the ending string for highlighting can be included in the block
1482 $check = substr($code, $i - strlen($close) + 1, strlen($close));
1483 if ($check == $close) {
1484 $HIGHLIGHTING_ON = '';
1485 // Add the string to the rest of the string for this part
1486 $parts[$k][1] = ( isset($parts[$k][1]) ) ? $parts[$k][1] . $char : $char;
1487 $parts[++$k][0] = '';
1488 $char = '';
1489 }
1490 }
1491 $parts[$k][1] = ( isset($parts[$k][1]) ) ? $parts[$k][1] . $char : $char;
1492 }
1493 $HIGHLIGHTING_ON = '';
1494 } else {
1495 // Not strict mode - simply dump the source into
1496 // the array at index 1 (the first highlightable block)
1497 $parts = array(
1498 1 => array(
1499 0 => '',
1500 1 => $code
1501 )
1502 );
1503 }
1504
1505 // Now we go through each part. We know that even-indexed parts are
1506 // code that shouldn't be highlighted, and odd-indexed parts should
1507 // be highlighted
1508 foreach ($parts as $key => $data) {
1509 $part = $data[1];
1510 // If this block should be highlighted...
1511 if ($key % 2) {
1512 if ($this->strict_mode) {
1513 // Find the class key for this block of code
1514 foreach ($this->language_data['SCRIPT_DELIMITERS'] as $script_key => $script_data) {
1515 foreach ($script_data as $open => $close) {
1516 if ($data[0] == $open) {
1517 break(2);
1518 }
1519 }
1520 }
1521
1522 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
1523 $this->lexic_permissions['SCRIPT']) {
1524 // Add a span element around the source to
1525 // highlight the overall source block
1526 if (!$this->use_classes &&
1527 $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
1528 $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
1529 } else {
1530 $attributes = ' class="sc' . $script_key . '"';
1531 }
1532 $result .= "<span$attributes>";
1533 }
1534 }
1535
1536 if (!$this->strict_mode || $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key]) {
1537 // Now, highlight the code in this block. This code
1538 // is really the engine of GeSHi (along with the method
1539 // parse_non_string_part).
1540 $length = strlen($part);
1541 for ($i = 0; $i < $length; $i++) {
1542 // Get the next char
1543 $char = substr($part, $i, 1);
1544 // Is this char the newline and line numbers being used?
1545 if (($this->line_numbers != GESHI_NO_LINE_NUMBERS
1546 || count($this->highlight_extra_lines) > 0)
1547 && $char == "\n") {
1548 // If so, is there a string open? If there is, we should end it before
1549 // the newline and begin it again (so when <li>s are put in the source
1550 // remains XHTML compliant)
1551 // note to self: This opens up possibility of config files specifying
1552 // that languages can/cannot have multiline strings???
1553 if ($STRING_OPEN) {
1554 if (!$this->use_classes) {
1555 $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
1556 } else {
1557 $attributes = ' class="st0"';
1558 }
1559 $char = '</span>' . $char . "<span$attributes>";
1560 }
1561 } elseif ($char == $STRING_OPEN) {
1562 // A match of a string delimiter
1563 if (($this->lexic_permissions['ESCAPE_CHAR'] && $ESCAPE_CHAR_OPEN) ||
1564 ($this->lexic_permissions['STRINGS'] && !$ESCAPE_CHAR_OPEN)) {
1565 $char .= '</span>';
1566 }
1567 if (!$ESCAPE_CHAR_OPEN) {
1568 $STRING_OPEN = '';
1569 $CLOSE_STRING = true;
1570 }
1571 $ESCAPE_CHAR_OPEN = false;
1572 } elseif (in_array($char, $this->language_data['QUOTEMARKS']) &&
1573 ($STRING_OPEN == '') && $this->lexic_permissions['STRINGS']) {
1574 // The start of a new string
1575 $STRING_OPEN = $char;
1576 if (!$this->use_classes) {
1577 $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
1578 } else {
1579 $attributes = ' class="st0"';
1580 }
1581 $char = "<span$attributes>" . $char;
1582
1583 $result .= $this->parse_non_string_part( $stuff_to_parse );
1584 $stuff_to_parse = '';
1585 } elseif (($char == $this->language_data['ESCAPE_CHAR']) && ($STRING_OPEN != '')) {
1586 // An escape character
1587 if (!$ESCAPE_CHAR_OPEN) {
1588 $ESCAPE_CHAR_OPEN = true;
1589 if ($this->lexic_permissions['ESCAPE_CHAR']) {
1590 if (!$this->use_classes) {
1591 $attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
1592 } else {
1593 $attributes = ' class="es0"';
1594 }
1595 $char = "<span$attributes>" . $char;
1596 if (substr($code, $i + 1, 1) == "\n") {
1597 // escaping a newline, what's the point in putting the span around
1598 // the newline? It only causes hassles when inserting line numbers
1599 $char .= '</span>';
1600 $ESCAPE_CHAR_OPEN = false;
1601 }
1602 }
1603 } else {
1604 $ESCAPE_CHAR_OPEN = false;
1605 if ($this->lexic_permissions['ESCAPE_CHAR']) {
1606 $char .= '</span>';
1607 }
1608 }
1609 } elseif ($ESCAPE_CHAR_OPEN) {
1610 if ($this->lexic_permissions['ESCAPE_CHAR']) {
1611 $char .= '</span>';
1612 }
1613 $ESCAPE_CHAR_OPEN = false;
1614 $test_str = $char;
1615 } elseif ($STRING_OPEN == '') {
1616 // Is this a multiline comment?
1617 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
1618 $com_len = strlen($open);
1619 $test_str = substr( $part, $i, $com_len );
1620 $test_str_match = $test_str;
1621 if ($open == $test_str) {
1622 $COMMENT_MATCHED = true;
1623 //@todo If remove important do remove here
1624 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
1625 $test_str == GESHI_START_IMPORTANT) {
1626 if ($test_str != GESHI_START_IMPORTANT) {
1627 if (!$this->use_classes) {
1628 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
1629 } else {
1630 $attributes = ' class="coMULTI"';
1631 }
1632 $test_str = "<span$attributes>" . @htmlspecialchars($test_str, ENT_COMPAT, $this->encoding);
1633 } else {
1634 if (!$this->use_classes) {
1635 $attributes = ' style="' . $this->important_styles . '"';
1636 } else {
1637 $attributes = ' class="imp"';
1638 }
1639 // We don't include the start of the comment if it's an
1640 // "important" part
1641 $test_str = "<span$attributes>";
1642 }
1643 } else {
1644 $test_str = @htmlspecialchars($test_str, ENT_COMPAT, $this->encoding);
1645 }
1646
1647 $close_pos = strpos( $part, $close, $i + strlen($close) );
1648
1649 if ($close_pos === false) {
1650 $close_pos = strlen($part);
1651 }
1652
1653 // Short-cut through all the multiline code
1654 $rest_of_comment = @htmlspecialchars(substr($part, $i + $com_len, $close_pos - $i), ENT_COMPAT, $this->encoding);
1655 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
1656 $test_str_match == GESHI_START_IMPORTANT) &&
1657 ($this->line_numbers != GESHI_NO_LINE_NUMBERS ||
1658 count($this->highlight_extra_lines) > 0)) {
1659 // strreplace to put close span and open span around multiline newlines
1660 $test_str .= str_replace("\n", "</span>\n<span$attributes>", $rest_of_comment);
1661 } else {
1662 $test_str .= $rest_of_comment;
1663 }
1664
1665 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
1666 $test_str_match == GESHI_START_IMPORTANT) {
1667 $test_str .= '</span>';
1668 }
1669 $i = $close_pos + $com_len - 1;
1670 // parse the rest
1671 $result .= $this->parse_non_string_part($stuff_to_parse);
1672 $stuff_to_parse = '';
1673 break;
1674 }
1675 }
1676 // If we haven't matched a multiline comment, try single-line comments
1677 if (!$COMMENT_MATCHED) {
1678 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
1679 $com_len = strlen($comment_mark);
1680 $test_str = substr($part, $i, $com_len);
1681 if ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS]) {
1682 $match = ($comment_mark == $test_str);
1683 } else {
1684 $match = (strtolower($comment_mark) == strtolower($test_str));
1685 }
1686 if ($match) {
1687 $COMMENT_MATCHED = true;
1688 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
1689 if (!$this->use_classes) {
1690 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
1691 } else {
1692 $attributes = ' class="co' . $comment_key . '"';
1693 }
1694 $test_str = "<span$attributes>" . @htmlspecialchars($this->change_case($test_str), ENT_COMPAT, $this->encoding);
1695 } else {
1696 $test_str = @htmlspecialchars($test_str, ENT_COMPAT, $this->encoding);
1697 }
1698 $close_pos = strpos($part, "\n", $i);
1699 $oops = false;
1700 if ($close_pos === false) {
1701 $close_pos = strlen($part);
1702 $oops = true;
1703 }
1704 $test_str .= @htmlspecialchars(substr($part, $i + $com_len, $close_pos - $i - $com_len), ENT_COMPAT, $this->encoding);
1705 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
1706 $test_str .= "</span>";
1707 }
1708 // Take into account that the comment might be the last in the source
1709 if (!$oops) {
1710 $test_str .= "\n";
1711 }
1712 $i = $close_pos;
1713 // parse the rest
1714 $result .= $this->parse_non_string_part($stuff_to_parse);
1715 $stuff_to_parse = '';
1716 break;
1717 }
1718 }
1719 }
1720 } elseif ($STRING_OPEN != '') {
1721 // Otherwise, convert it to HTML form
1722 if (strtolower($this->encoding) == 'utf-8') {
1723 //only escape <128 (we don't want to break multibyte chars)
1724 if (ord($char) < 128) {
1725 $char = @htmlspecialchars($char, ENT_COMPAT, $this->encoding);
1726 }
1727 } else {
1728 //encode everthing
1729 $char = @htmlspecialchars($char, ENT_COMPAT, $this->encoding);
1730 }
1731 }
1732 // Where are we adding this char?
1733 if (!$COMMENT_MATCHED) {
1734 if (($STRING_OPEN == '') && !$CLOSE_STRING) {
1735 $stuff_to_parse .= $char;
1736 } else {
1737 $result .= $char;
1738 $CLOSE_STRING = false;
1739 }
1740 } else {
1741 $result .= $test_str;
1742 $COMMENT_MATCHED = false;
1743 }
1744 }
1745 // Parse the last bit
1746 $result .= $this->parse_non_string_part($stuff_to_parse);
1747 $stuff_to_parse = '';
1748 } else {
1749 $result .= @htmlspecialchars($part, ENT_COMPAT, $this->encoding);
1750 }
1751 // Close the <span> that surrounds the block
1752 if ($this->strict_mode && $this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
1753 $this->lexic_permissions['SCRIPT']) {
1754 $result .= '</span>';
1755 }
1756 } else {
1757 // Else not a block to highlight
1758 $result .= @htmlspecialchars($part, ENT_COMPAT, $this->encoding);
1759 }
1760 }
1761
1762 // Parse the last stuff (redundant?)
1763 $result .= $this->parse_non_string_part($stuff_to_parse);
1764
1765 // Lop off the very first and last spaces
1766 $result = substr($result, 1, strlen($result) - 1);
1767
1768 // Are we still in a string?
1769 if ($STRING_OPEN) {
1770 $result .= '</span>';
1771 }
1772
1773 // We're finished: stop timing
1774 $this->set_time($start_time, microtime());
1775
1776 return $this->finalise($result);
1777 }
1778
1779 /**
1780 * Swaps out spaces and tabs for HTML indentation. Not needed if
1781 * the code is in a pre block...
1782 *
1783 * @param string The source to indent
1784 * @return string The source with HTML indenting applied
1785 * @since 1.0.0
1786 * @access private
1787 */
1788 function indent ($result)
1789 {
1790 /// Replace tabs with the correct number of spaces
1791 if (false !== strpos($result, "\t")) {
1792 $lines = explode("\n", $result);
1793 foreach ($lines as $key => $line) {
1794 if (false === strpos($line, "\t")) {
1795 $lines[$key] = $line;
1796 continue;
1797 }//echo 'checking line ' . $key . '<br />';
1798
1799 $pos = 0;
1800 $tab_width = $this->tab_width;
1801 $length = strlen($line);
1802 $result_line = '';
1803
1804 //echo '<pre>line: ' . htmlspecialchars($line) . '</pre>';
1805 $IN_TAG = false;
1806 for ($i = 0; $i < $length; $i++) {
1807 $char = substr($line, $i, 1);
1808 // Simple engine to work out whether we're in a tag.
1809 // If we are we modify $pos. This is so we ignore HTML
1810 // in the line and only workout the tab replacement
1811 // via the actual content of the string
1812 // This test could be improved to include strings in the
1813 // html so that < or > would be allowed in user's styles
1814 // (e.g. quotes: '<' '>'; or similar)
1815 if ($IN_TAG && '>' == $char) {
1816 $IN_TAG = false;
1817 $result_line .= '>';
1818 ++$pos;
1819 } elseif (!$IN_TAG && '<' == $char) {
1820 $IN_TAG = true;
1821 $result_line .= '<';
1822 ++$pos;
1823 } elseif (!$IN_TAG && '&' == $char) {
1824 //echo "matched &amp; in line... ";
1825 $substr = substr($line, $i + 3, 4);
1826 //$substr_5 = substr($line, 5, 1);
1827 $posi = strpos($substr, ';');
1828 if (false !== $posi) {
1829 //echo "found entity at $posi\n";
1830 $pos += $posi + 3;
1831 }
1832 $result_line .= '&';
1833 } elseif (!$IN_TAG && "\t" == $char) {
1834 $str = '';
1835 // OPTIMISE - move $strs out. Make an array:
1836 // $tabs = array(
1837 // 1 => '&nbsp;',
1838 // 2 => '&nbsp; ',
1839 // 3 => '&nbsp; &nbsp;' etc etc
1840 // to use instead of building a string every time
1841 $strs = array(0 => '&nbsp;', 1 => ' ');
1842 //echo "building (pos=$pos i=$i) (" . ($i - $pos) . ") " . ($tab_width - (($i - $pos) % $tab_width)) . " spaces\n";
1843 for ($k = 0; $k < ($tab_width - (($i - $pos) % $tab_width)); $k++) $str .= $strs[$k % 2];
1844 $result_line .= $str;
1845 //$pos--;
1846 $pos++;
1847 //$pos -= $tab_width-1;
1848
1849 if (false === strpos($line, "\t", $i + 1)) {
1850 //$lines[$key] = $result_line;
1851 //echo 'got here';
1852 $result_line .= substr($line, $i + 1);
1853 break;
1854 }
1855 } elseif ( $IN_TAG ) {
1856 ++$pos;
1857 $result_line .= $char;
1858 } else {
1859 $result_line .= $char;
1860 //++$pos;
1861 }
1862 }
1863 $lines[$key] = $result_line;
1864 }
1865 $result = implode("\n", $lines);
1866 }
1867 // Other whitespace
1868 $result = str_replace(' ', '&nbsp; ', $result);
1869 $result = str_replace(' ', ' &nbsp;', $result);
1870 $result = str_replace("\n ", "\n&nbsp;", $result);
1871
1872 if ($this->line_numbers == GESHI_NO_LINE_NUMBERS) {
1873 $result = nl2br($result);
1874 }
1875 return $result;
1876 }
1877
1878 /**
1879 * Changes the case of a keyword for those languages where a change is asked for
1880 *
1881 * @param string The keyword to change the case of
1882 * @return string The keyword with its case changed
1883 * @since 1.0.0
1884 * @access private
1885 */
1886 function change_case ($instr)
1887 {
1888 if ($this->language_data['CASE_KEYWORDS'] == GESHI_CAPS_UPPER) {
1889 return strtoupper($instr);
1890 } elseif ($this->language_data['CASE_KEYWORDS'] == GESHI_CAPS_LOWER) {
1891 return strtolower($instr);
1892 }
1893 return $instr;
1894 }
1895
1896 /**
1897 * Adds a url to a keyword where needed.
1898 *
1899 * @param string The keyword to add the URL HTML to
1900 * @param int What group the keyword is from
1901 * @param boolean Whether to get the HTML for the start or end
1902 * @return The HTML for either the start or end of the HTML &lt;a&gt; tag
1903 * @since 1.0.2
1904 * @access private
1905 * @todo Get rid of ender
1906 */
1907 function add_url_to_keyword ($keyword, $group, $start_or_end)
1908 {
1909 if (isset($this->language_data['URLS'][$group]) &&
1910 $this->language_data['URLS'][$group] != '' &&
1911 substr($keyword, 0, 5) != '&lt;/') {
1912 // There is a base group for this keyword
1913 if ($start_or_end == 'BEGIN') {
1914 // HTML workaround... not good form (tm) but should work for 1.0.X
1915 $keyword = ( substr($keyword, 0, 4) == '&lt;' ) ? substr($keyword, 4) : $keyword;
1916 $keyword = ( substr($keyword, -4) == '&gt;' ) ? substr($keyword, 0, strlen($keyword) - 4) : $keyword;
1917 if ($keyword != '') {
1918 $keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
1919 return '<|UR1|"' .
1920 str_replace(
1921 array('{FNAME}', '.'),
1922 array(@htmlspecialchars($keyword, ENT_COMPAT, $this->encoding), '<DOT>'),
1923 $this->language_data['URLS'][$group]
1924 ) . '">';
1925 }
1926 return '';
1927 // HTML fix. Again, dirty hackage...
1928 } elseif (!($this->language == 'html4strict' && '&gt;' == $keyword)) {
1929 return '</a>';
1930 }
1931 }
1932 }
1933
1934 /**
1935 * Takes a string that has no strings or comments in it, and highlights
1936 * stuff like keywords, numbers and methods.
1937 *
1938 * @param string The string to parse for keyword, numbers etc.
1939 * @since 1.0.0
1940 * @access private
1941 * @todo BUGGY! Why? Why not build string and return?
1942 */
1943 function parse_non_string_part (&$stuff_to_parse)
1944 {
1945 $stuff_to_parse = ' ' . quotemeta(@htmlspecialchars($stuff_to_parse, ENT_COMPAT, $this->encoding));
1946 // These vars will disappear in the future
1947 $func = '$this->change_case';
1948 $func2 = '$this->add_url_to_keyword';
1949
1950 //
1951 // Regular expressions
1952 //
1953 foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
1954 if ($this->lexic_permissions['REGEXPS'][$key]) {
1955 if (is_array($regexp)) {
1956 $stuff_to_parse = preg_replace(
1957 "#" .
1958 $regexp[GESHI_SEARCH] .
1959 "#{$regexp[GESHI_MODIFIERS]}",
1960 "{$regexp[GESHI_BEFORE]}<|!REG3XP$key!>{$regexp[GESHI_REPLACE]}|>{$regexp[GESHI_AFTER]}",
1961 $stuff_to_parse
1962 );
1963 } else {
1964 $stuff_to_parse = preg_replace( "#(" . $regexp . ")#", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
1965 }
1966 }
1967 }
1968
1969 //
1970 // Highlight numbers. This regexp sucks... anyone with a regexp that WORKS
1971 // here wins a cookie if they send it to me. At the moment there's two doing
1972 // almost exactly the same thing, except the second one prevents a number
1973 // being highlighted twice (eg <span...><span...>5</span></span>)
1974 // Put /NUM!/ in for the styles, which gets replaced at the end.
1975 //
1976 if ($this->lexic_permissions['NUMBERS'] && preg_match('#[0-9]#', $stuff_to_parse )) {
1977 $stuff_to_parse = preg_replace('#([^a-zA-Z0-9\#])([0-9]+)([^a-zA-Z0-9])#', "\\1<|/NUM!/>\\2|>\\3", $stuff_to_parse);
1978 $stuff_to_parse = preg_replace('#([^a-zA-Z0-9\#>])([0-9]+)([^a-zA-Z0-9])#', "\\1<|/NUM!/>\\2|>\\3", $stuff_to_parse);
1979 }
1980
1981 // Highlight keywords
1982 // if there is a couple of alpha symbols there *might* be a keyword
1983 if (preg_match('#[a-zA-Z]{2,}#', $stuff_to_parse)) {
1984 foreach ($this->language_data['KEYWORDS'] as $k => $keywordset) {
1985 if ($this->lexic_permissions['KEYWORDS'][$k]) {
1986 foreach ($keywordset as $keyword) {
1987 $keyword = quotemeta($keyword);
1988 //
1989 // This replacement checks the word is on it's own (except if brackets etc
1990 // are next to it), then highlights it. We don't put the color=" for the span
1991 // in just yet - otherwise languages with the keywords "color" or "or" have
1992 // a fit.
1993 //
1994 if (false !== stristr($stuff_to_parse, $keyword )) {
1995 $stuff_to_parse .= ' ';
1996 // Might make a more unique string for putting the number in soon
1997 // Basically, we don't put the styles in yet because then the styles themselves will
1998 // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
1999 $styles = "/$k/";
2000 $keyword = quotemeta($keyword);
2001 if ($this->language_data['CASE_SENSITIVE'][$k]) {
2002 $stuff_to_parse = preg_replace(
2003 "#([^a-zA-Z0-9\$_\|\#;>])($keyword)([^a-zA-Z0-9_<\|%\-&])#e",
2004 "'\\1' . $func2('\\2', '$k', 'BEGIN') . '<|$styles>' . $func('\\2') . '|>' . $func2('\\2', '$k', 'END') . '\\3'",
2005 $stuff_to_parse
2006 );
2007 } else {
2008 // Change the case of the word.
2009 $stuff_to_parse = preg_replace(
2010 "#([^a-zA-Z0-9\$_\|\#;>])($keyword)([^a-zA-Z0-9_<\|%\-&])#ie",
2011 "'\\1' . $func2('\\2', '$k', 'BEGIN') . '<|$styles>' . $func('\\2') . '|>' . $func2('\\2', '$k', 'END') . '\\3'",
2012 $stuff_to_parse
2013 );
2014 }
2015 $stuff_to_parse = substr($stuff_to_parse, 0, strlen($stuff_to_parse) - 1);
2016 }
2017 }
2018 }
2019 }
2020 }
2021
2022 //
2023 // Now that's all done, replace /[number]/ with the correct styles
2024 //
2025 foreach ($this->language_data['KEYWORDS'] as $k => $kws) {
2026 if (!$this->use_classes) {
2027 $attributes = ' style="' . $this->language_data['STYLES']['KEYWORDS'][$k] . '"';
2028 } else {
2029 $attributes = ' class="kw' . $k . '"';
2030 }
2031 $stuff_to_parse = str_replace("/$k/", $attributes, $stuff_to_parse);
2032 }
2033
2034 // Put number styles in
2035 if (!$this->use_classes && $this->lexic_permissions['NUMBERS']) {
2036 $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][0] . '"';
2037 } else {
2038 $attributes = ' class="nu0"';
2039 }
2040 $stuff_to_parse = str_replace('/NUM!/', $attributes, $stuff_to_parse);
2041
2042 //
2043 // Highlight methods and fields in objects
2044 //
2045 if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
2046 foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
2047 if (false !== stristr($stuff_to_parse, $splitter)) {
2048 if (!$this->use_classes) {
2049 $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
2050 } else {
2051 $attributes = ' class="me' . $key . '"';
2052 }
2053 $stuff_to_parse = preg_replace("#(" . quotemeta($this->language_data['OBJECT_SPLITTERS'][$key]) . "[\s]*)([a-zA-Z\*\(][a-zA-Z0-9_\*]*)#", "\\1<|$attributes>\\2|>", $stuff_to_parse);
2054 }
2055 }
2056 }
2057
2058 //
2059 // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
2060 // You try it, and see what happens ;)
2061 // TODO: Fix lexic permissions not converting entities if shouldn't
2062 // be highlighting regardless
2063 //
2064 if ($this->lexic_permissions['BRACKETS']) {
2065 $code_entities_match = array('[', ']', '(', ')', '{', '}');
2066 if (!$this->use_classes) {
2067 $code_entities_replace = array(
2068 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
2069 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
2070 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
2071 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
2072 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
2073 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
2074 );
2075 } else {
2076 $code_entities_replace = array(
2077 '<| class="br0">&#91;|>',
2078 '<| class="br0">&#93;|>',
2079 '<| class="br0">&#40;|>',
2080 '<| class="br0">&#41;|>',
2081 '<| class="br0">&#123;|>',
2082 '<| class="br0">&#125;|>',
2083 );
2084 }
2085 $stuff_to_parse = str_replace( $code_entities_match, $code_entities_replace, $stuff_to_parse );
2086 }
2087
2088 //
2089 // Add class/style for regexps
2090 //
2091 foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
2092 if ($this->lexic_permissions['REGEXPS'][$key]) {
2093 if (!$this->use_classes) {
2094 $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
2095 } else {
2096 $attributes = ' class="re' . $key . '"';
2097 }
2098 $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
2099 }
2100 }
2101
2102 // Replace <DOT> with . for urls
2103 $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
2104 // Replace <|UR1| with <a href= for urls also
2105 if (isset($this->link_styles[GESHI_LINK])) {
2106 if ($this->use_classes) {
2107 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
2108 } else {
2109 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
2110 }
2111 } else {
2112 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
2113 }
2114
2115 //
2116 // NOW we add the span thingy ;)
2117 //
2118
2119 $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
2120 $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
2121
2122 return substr(stripslashes($stuff_to_parse), 1);
2123 }
2124
2125 /**
2126 * Sets the time taken to parse the code
2127 *
2128 * @param microtime The time when parsing started
2129 * @param microtime The time when parsing ended
2130 * @since 1.0.2
2131 * @access private
2132 */
2133 function set_time ($start_time, $end_time)
2134 {
2135 $start = explode(' ', $start_time);
2136 $end = explode(' ', $end_time);
2137 $this->time = $end[0] + $end[1] - $start[0] - $start[1];
2138 }
2139
2140 /**
2141 * Gets the time taken to parse the code
2142 *
2143 * @return double The time taken to parse the code
2144 * @since 1.0.2
2145 */
2146 function get_time ()
2147 {
2148 return $this->time;
2149 }
2150
2151 /**
2152 * Gets language information and stores it for later use
2153 *
2154 * @access private
2155 * @todo Needs to load keys for lexic permissions for keywords, regexps etc
2156 */
2157 function load_language ($file_name)
2158 {
2159 $language_data = array();
2160 require $file_name;
2161 // Perhaps some checking might be added here later to check that
2162 // $language data is a valid thing but maybe not
2163 $this->language_data = $language_data;
2164 // Set strict mode if should be set
2165 if ($this->language_data['STRICT_MODE_APPLIES'] == GESHI_ALWAYS) {
2166 $this->strict_mode = true;
2167 }
2168 // Set permissions for all lexics to true
2169 // so they'll be highlighted by default
2170 foreach ($this->language_data['KEYWORDS'] as $key => $words) {
2171 $this->lexic_permissions['KEYWORDS'][$key] = true;
2172 }
2173 foreach ($this->language_data['COMMENT_SINGLE'] as $key => $comment) {
2174 $this->lexic_permissions['COMMENTS'][$key] = true;
2175 }
2176 foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
2177 $this->lexic_permissions['REGEXPS'][$key] = true;
2178 }
2179 $this->enable_highlighting();
2180 // Set default class for CSS
2181 $this->overall_class = $this->language;
2182 }
2183
2184 /**
2185 * Takes the parsed code and various options, and creates the HTML
2186 * surrounding it to make it look nice.
2187 *
2188 * @param string The code already parsed
2189 * @return string The code nicely finalised
2190 * @since 1.0.0
2191 * @access private
2192 */
2193 function finalise ($parsed_code)
2194 {
2195 // Remove end parts of important declarations
2196 // This is BUGGY!! My fault for bad code: fix coming in 1.2
2197 // @todo Remove this crap
2198 if ($this->enable_important_blocks &&
2199 (strstr($parsed_code, @htmlspecialchars(GESHI_START_IMPORTANT, ENT_COMPAT, $this->encoding)) === false)) {
2200 $parsed_code = str_replace(@htmlspecialchars(GESHI_END_IMPORTANT, ENT_COMPAT, $this->encoding), '', $parsed_code);
2201 }
2202
2203 // Add HTML whitespace stuff if we're using the <div> header
2204 if ($this->header_type != GESHI_HEADER_PRE) {
2205 $parsed_code = $this->indent($parsed_code);
2206 }
2207
2208 // If we're using line numbers, we insert <li>s and appropriate
2209 // markup to style them (otherwise we don't need to do anything)
2210 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
2211 // If we're using the <pre> header, we shouldn't add newlines because
2212 // the <pre> will line-break them (and the <li>s already do this for us)
2213 $ls = ($this->header_type != GESHI_HEADER_PRE) ? "\n" : '';
2214 // Get code into lines
2215 $code = explode("\n", $parsed_code);
2216 // Set vars to defaults for following loop
2217 $parsed_code = '';
2218 $i = 0;
2219 // Foreach line...
2220 foreach ($code as $line) {
2221 $line = ( $line ) ? $line : '&nbsp;';
2222 // If this is a "special line"...
2223 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
2224 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
2225 // Set the attributes to style the line
2226 if ($this->use_classes) {
2227 $attr = ' class="li2"';
2228 $def_attr = ' class="de2"';
2229 } else {
2230 $attr = ' style="' . $this->line_style2 . '"';
2231 // This style "covers up" the special styles set for special lines
2232 // so that styles applied to special lines don't apply to the actual
2233 // code on that line
2234 $def_attr = ' style="' . $this->code_style . '"';
2235 }
2236 // Span or div?
2237 $start = "<div$def_attr>";
2238 $end = '</div>';
2239 } else {
2240 if ($this->use_classes) {
2241 $attr = ' class="li1"';
2242 $def_attr = ' class="de1"';
2243 } else {
2244 $attr = ' style="' . $this->line_style1 . '"';
2245 $def_attr = ' style="' . $this->code_style . '"';
2246 }
2247 $start = "<div$def_attr>";
2248 $end = '</div>';
2249 }
2250
2251 ++$i;
2252 // Are we supposed to use ids? If so, add them
2253 if ($this->add_ids) {
2254 $attr .= " id=\"{$this->overall_id}-{$i}\"";
2255 }
2256 if ($this->use_classes && in_array($i, $this->highlight_extra_lines)) {
2257 $attr .= " class=\"ln-xtra\"";
2258 }
2259 if (!$this->use_classes && in_array($i, $this->highlight_extra_lines)) {
2260 $attr .= " style=\"{$this->highlight_extra_lines_style}\"";
2261 }
2262
2263 // Add in the line surrounded by appropriate list HTML
2264 $parsed_code .= "<li$attr>$start$line$end</li>$ls";
2265 }
2266 } else {
2267 // No line numbers, but still need to handle highlighting lines extra.
2268 // Have to use divs so the full width of the code is highlighted
2269 $code = explode("\n", $parsed_code);
2270 $parsed_code = '';
2271 $i = 0;
2272 foreach ($code as $line)
2273 {
2274 // Make lines have at least one space in them if they're empty
2275 $line = ($line) ? $line : '&nbsp;';
2276 if (in_array(++$i, $this->highlight_extra_lines)) {
2277 if ($this->use_classes) {
2278 $parsed_code .= '<div class="ln-xtra">';
2279 } else {
2280 $parsed_code .= "<div style=\"{$this->highlight_extra_lines_style}\">";
2281 }
2282 $parsed_code .= $line . "</div>\n";
2283 } else {
2284 $parsed_code .= $line . "\n";
2285 }
2286 }
2287 }
2288
2289 // purge some unnecessary stuff
2290 $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
2291 $parsed_code = preg_replace('#<div[^>]+>(\s*)</div>#', '\\1', $parsed_code);
2292
2293 if ($this->header_type == GESHI_HEADER_PRE) {
2294 // enforce line numbers when using pre
2295 $parsed_code = str_replace('<li></li>', '<li>&nbsp;</li>', $parsed_code);
2296 }
2297
2298 return $this->header() . chop($parsed_code) . $this->footer();
2299 }
2300
2301 /**
2302 * Creates the header for the code block (with correct attributes)
2303 *
2304 * @return string The header for the code block
2305 * @since 1.0.0
2306 * @access private
2307 */
2308 function header ()
2309 {
2310 // Get attributes needed
2311 $attributes = $this->get_attributes();
2312
2313 $ol_attributes = '';
2314
2315 if ($this->line_numbers_start != 1) {
2316 $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
2317 }
2318
2319 // Get the header HTML
2320 $header = $this->format_header_content();
2321
2322 if (GESHI_HEADER_NONE == $this->header_type) {
2323 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
2324 return "$header<ol$ol_attributes>";
2325 }
2326 return $header;
2327 }
2328
2329 // Work out what to return and do it
2330 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
2331 if ($this->header_type == GESHI_HEADER_PRE) {
2332 return "<pre$attributes>$header<ol$ol_attributes>";
2333 } elseif ($this->header_type == GESHI_HEADER_DIV) {
2334 return "<div$attributes>$header<ol$ol_attributes>";
2335 }
2336 } else {
2337 if ($this->header_type == GESHI_HEADER_PRE) {
2338 return "<pre$attributes>$header";
2339 } elseif ($this->header_type == GESHI_HEADER_DIV) {
2340 return "<div$attributes>$header";
2341 }
2342 }
2343 }
2344
2345 /**
2346 * Returns the header content, formatted for output
2347 *
2348 * @return string The header content, formatted for output
2349 * @since 1.0.2
2350 * @access private
2351 */
2352 function format_header_content ()
2353 {
2354 $header = $this->header_content;
2355 if ($header) {
2356 if ($this->header_type == GESHI_HEADER_PRE) {
2357 $header = str_replace("\n", '', $header);
2358 }
2359 $header = $this->replace_keywords($header);
2360
2361 if ($this->use_classes) {
2362 $attr = ' class="head"';
2363 } else {
2364 $attr = " style=\"{$this->header_content_style}\"";
2365 }
2366 return "<div$attr>$header</div>";
2367 }
2368 }
2369
2370 /**
2371 * Returns the footer for the code block.
2372 *
2373 * @return string The footer for the code block
2374 * @since 1.0.0
2375 * @access private
2376 */
2377 function footer ()
2378 {
2379 $footer_content = $this->format_footer_content();
2380
2381 if (GESHI_HEADER_NONE == $this->header_type) {
2382 return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer_content
2383 : $footer_content;
2384 }
2385
2386 if ($this->header_type == GESHI_HEADER_DIV) {
2387 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
2388 return "</ol>$footer_content</div>";
2389 }
2390 return "$footer_content</div>";
2391 } else {
2392 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
2393 return "</ol>$footer_content</pre>";
2394 }
2395 return "$footer_content</pre>";
2396 }
2397 }
2398
2399 /**
2400 * Returns the footer content, formatted for output
2401 *
2402 * @return string The footer content, formatted for output
2403 * @since 1.0.2
2404 * @access private
2405 */
2406 function format_footer_content ()
2407 {
2408 $footer = $this->footer_content;
2409 if ($footer) {
2410 if ($this->header_type == GESHI_HEADER_PRE) {
2411 $footer = str_replace("\n", '', $footer);;
2412 }
2413 $footer = $this->replace_keywords($footer);
2414
2415 if ($this->use_classes) {
2416 $attr = ' class="foot"';
2417 } else {
2418 $attr = " style=\"{$this->footer_content_style}\"";
2419 }
2420 return "<div$attr>$footer</div>";
2421 }
2422 }
2423
2424 /**
2425 * Replaces certain keywords in the header and footer with
2426 * certain configuration values
2427 *
2428 * @param string The header or footer content to do replacement on
2429 * @return string The header or footer with replaced keywords
2430 * @since 1.0.2
2431 * @access private
2432 */
2433 function replace_keywords ($instr)
2434 {
2435 $keywords = $replacements = array();
2436
2437 $keywords[] = '<TIME>';
2438 $replacements[] = number_format($this->get_time(), 3);
2439
2440 $keywords[] = '<LANGUAGE>';
2441 $replacements[] = $this->language;
2442
2443 $keywords[] = '<VERSION>';
2444 $replacements[] = GESHI_VERSION;
2445
2446 return str_replace($keywords, $replacements, $instr);
2447 }
2448
2449 /**
2450 * Gets the CSS attributes for this code
2451 *
2452 * @return The CSS attributes for this code
2453 * @since 1.0.0
2454 * @access private
2455 * @todo Document behaviour change - class is outputted regardless of whether we're using classes or not.
2456 * Same with style
2457 */
2458 function get_attributes ()
2459 {
2460 $attributes = '';
2461
2462 if ($this->overall_class != '') {
2463 $attributes .= " class=\"{$this->overall_class}\"";
2464 }
2465 if ($this->overall_id != '') {
2466 $attributes .= " id=\"{$this->overall_id}\"";
2467 }
2468 if ($this->overall_style != '') {
2469 $attributes .= ' style="' . $this->overall_style . '"';
2470 }
2471 return $attributes;
2472 }
2473
2474 /**
2475 * Returns a stylesheet for the highlighted code. If $economy mode
2476 * is true, we only return the stylesheet declarations that matter for
2477 * this code block instead of the whole thing
2478 *
2479 * @param boolean Whether to use economy mode or not
2480 * @return string A stylesheet built on the data for the current language
2481 * @since 1.0.0
2482 */
2483 function get_stylesheet ($economy_mode = true)
2484 {
2485 // If there's an error, chances are that the language file
2486 // won't have populated the language data file, so we can't
2487 // risk getting a stylesheet...
2488 if ($this->error) {
2489 return '';
2490 }
2491 // First, work out what the selector should be. If there's an ID,
2492 // that should be used, the same for a class. Otherwise, a selector
2493 // of '' means that these styles will be applied anywhere
2494 $selector = ($this->overall_id != '') ? "#{$this->overall_id} " : '';
2495 $selector = ($selector == '' && $this->overall_class != '') ? ".{$this->overall_class} " : $selector;
2496
2497 // Header of the stylesheet
2498 if (!$economy_mode) {
2499 $stylesheet = "/**\n * GeSHi Dynamically Generated Stylesheet\n * --------------------------------------\n * Dynamically generated stylesheet for {$this->language}\n * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n * GeSHi (c) Nigel McNie 2004 (http://qbnz.com/highlighter)\n */\n";
2500 } else {
2501 $stylesheet = '/* GeSHi (c) Nigel McNie 2004 (http://qbnz.com/highlighter) */' . "\n";
2502 }
2503
2504 // Set the <ol> to have no effect at all if there are line numbers
2505 // (<ol>s have margins that should be destroyed so all layout is
2506 // controlled by the set_overall_style method, which works on the
2507 // <pre> or <div> container). Additionally, set default styles for lines
2508 if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
2509 //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
2510 $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
2511 }
2512
2513 // Add overall styles
2514 if (!$economy_mode || $this->overall_style != '') {
2515 $stylesheet .= "$selector {{$this->overall_style}}\n";
2516 }
2517
2518 // Add styles for links
2519 foreach ($this->link_styles as $key => $style) {
2520 if (!$economy_mode || $key == GESHI_LINK && $style != '') {
2521 $stylesheet .= "{$selector}a:link {{$style}}\n";
2522 }
2523 if (!$economy_mode || $key == GESHI_HOVER && $style != '') {
2524 $stylesheet .= "{$selector}a:hover {{$style}}\n";
2525 }
2526 if (!$economy_mode || $key == GESHI_ACTIVE && $style != '') {
2527 $stylesheet .= "{$selector}a:active {{$style}}\n";
2528 }
2529 if (!$economy_mode || $key == GESHI_VISITED && $style != '') {
2530 $stylesheet .= "{$selector}a:visited {{$style}}\n";
2531 }
2532 }
2533
2534 // Header and footer
2535 if (!$economy_mode || $this->header_content_style != '') {
2536 $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
2537 }
2538 if (!$economy_mode || $this->footer_content_style != '') {
2539 $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
2540 }
2541
2542 // Styles for important stuff
2543 if (!$economy_mode || $this->important_styles != '') {
2544 $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
2545 }
2546
2547 // Styles for lines being highlighted extra
2548 if (!$economy_mode || count($this->highlight_extra_lines)) {
2549 $stylesheet .= "$selector.ln-xtra {{$this->highlight_extra_lines_style}}\n";
2550 }
2551
2552 // Simple line number styles
2553 if (!$economy_mode || ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->line_style1 != '')) {
2554 $stylesheet .= "{$selector}li {{$this->line_style1}}\n";
2555 }
2556
2557 // If there is a style set for fancy line numbers, echo it out
2558 if (!$economy_mode || ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && $this->line_style2 != '')) {
2559 $stylesheet .= "{$selector}li.li2 {{$this->line_style2}}\n";
2560 }
2561
2562 foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
2563 if (!$economy_mode || !($economy_mode && (!$this->lexic_permissions['KEYWORDS'][$group] || $styles == ''))) {
2564 $stylesheet .= "$selector.kw$group {{$styles}}\n";
2565 }
2566 }
2567 foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
2568 if (!$economy_mode || !($economy_mode && $styles == '') &&
2569 !($economy_mode && !$this->lexic_permissions['COMMENTS'][$group])) {
2570 $stylesheet .= "$selector.co$group {{$styles}}\n";
2571 }
2572 }
2573 foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
2574 if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
2575 !$this->lexic_permissions['ESCAPE_CHAR'])) {
2576 $stylesheet .= "$selector.es$group {{$styles}}\n";
2577 }
2578 }
2579 foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
2580 if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
2581 !$this->lexic_permissions['BRACKETS'])) {
2582 $stylesheet .= "$selector.br$group {{$styles}}\n";
2583 }
2584 }
2585 foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
2586 if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
2587 !$this->lexic_permissions['STRINGS'])) {
2588 $stylesheet .= "$selector.st$group {{$styles}}\n";
2589 }
2590 }
2591 foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
2592 if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
2593 !$this->lexic_permissions['NUMBERS'])) {
2594 $stylesheet .= "$selector.nu$group {{$styles}}\n";
2595 }
2596 }
2597 foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
2598 if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
2599 !$this->lexic_permissions['METHODS'])) {
2600 $stylesheet .= "$selector.me$group {{$styles}}\n";
2601 }
2602 }
2603 foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
2604 if (!$economy_mode || !($economy_mode && $styles == '')) {
2605 $stylesheet .= "$selector.sc$group {{$styles}}\n";
2606 }
2607 }
2608 foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
2609 if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
2610 !$this->lexic_permissions['REGEXPS'][$group])) {
2611 $stylesheet .= "$selector.re$group {{$styles}}\n";
2612 }
2613 }
2614
2615 return $stylesheet;
2616 }
2617
2618 } // End Class GeSHi
2619
2620
2621 if (!function_exists('geshi_highlight')) {
2622 /**
2623 * Easy way to highlight stuff. Behaves just like highlight_string
2624 *
2625 * @param string The code to highlight
2626 * @param string The language to highlight the code in
2627 * @param string The path to the language files. You can leave this blank if you need
2628 * as from version 1.0.7 the path should be automatically detected
2629 * @param boolean Whether to return the result or to echo
2630 * @return string The code highlighted (if $return is true)
2631 * @since 1.0.2
2632 */
2633 function geshi_highlight ($string, $language, $path, $return = false)
2634 {
2635 $geshi = new GeSHi($string, $language, $path);
2636 $geshi->set_header_type(GESHI_HEADER_NONE);
2637 if ($return) {
2638 return '<code>' . $geshi->parse_code() . '</code>';
2639 }
2640 echo '<code>' . $geshi->parse_code() . '</code>';
2641 if ($geshi->error()) {
2642 return false;
2643 }
2644 return true;
2645 }
2646 }
2647
2648 ?>