<?php
/*======================================================================*\
|| #################################################################### ||
|| # vBulletin 4.0.5
|| # ---------------------------------------------------------------- # ||
|| # Copyright ©2000-2010 vBulletin Solutions Inc. All Rights Reserved. ||
|| # This file may not be redistributed in whole or significant part. # ||
|| # ---------------- VBULLETIN IS NOT FREE SOFTWARE ---------------- # ||
|| # http://www.vbulletin.com | http://www.vbulletin.com/license.html # ||
|| #################################################################### ||
\*======================================================================*/
if (!isset($GLOBALS['vbulletin']->db))
{
exit;
}
/**
* Class to parse the HTML generated by the WYSIWYG editor to BB code.
* Can be extended to parse additional tags or change the parsing behavior.
*
* This class can be used for generic HTML to BB code conversions, but it is
* not always ideally suited to this.
*
* @package vBulletin
*/
class vB_WysiwygHtmlParser
{
/**
* Primary vBulletin registry object
*
* @var vB_Registry
*/
protected $registry;
/**
* Whether HTML is allowed. If false, non parsed HTML will be stripped.
*
* @var boolean
*/
protected $allow_html = false;
/**
* The number of linebreaks a <p> tag generates. This is usually 1 when
* parsing from the WYSIWYG editors and 2 in other cases.
*
* @var int
*/
protected $p_linebreaks = 1;
/**
* The rules for the "normal" HTML tags that should be parsed. Only tags
* that are matched (ie, <x>...</x>) and tags that are parsed without additional
* context. See load_tag_rules for a format specification.
*
* @var array
*/
protected $tags = array();
/**
* Arbitrary array that can be used for tracking limited tag state when parsing.
* Use the push/pop state methods to modify and the in_state method to check.
* Useful if you want to parse a dependent tag differently if found in an
* unexpected place (eg, <li> tag not in a list).
*
* @var array
*/
protected $state = array();
/**
* Constructor. Automatically loads the tag rules.
*
* @param vB_Registry registry
*/
public function __construct(vB_Registry $registry)
{
$this->registry = $registry;
$this->tags = $this->load_tag_rules();
}
/**
* Returns the rule set for parsing matched tags. Array key is name of
* HTML tag to match. Value is either a simple callback or an array with
* keys 'callback' and 'param' (an optional extra value to pass in to the
* parsing callback function). Callbacks may refer to the string $this
* to refer to the current class instance.
*
* @return array
*/
public function load_tag_rules()
{
return array(
'b' => array(
'callback' => array('$this', 'parse_tag_basic'),
'param' => 'b'
),
'strong' => array(
'callback' => array('$this', 'parse_tag_basic'),
'param' => 'b'
),
'i' => array(
'callback' => array('$this', 'parse_tag_basic'),
'param' => 'i'
),
'em' => array(
'callback' => array('$this', 'parse_tag_basic'),
'param' => 'i'
),
'u' => array(
'callback' => array('$this', 'parse_tag_basic'),
'param' => 'u'
),
'blockquote' => array(
'callback' => array('$this', 'parse_tag_basic'),
'param' => 'indent'
),
'ol' => array('$this', 'parse_tag_list'),
'ul' => array('$this', 'parse_tag_list'),
'li' => array('$this', 'parse_tag_li'),
'span' => array('$this', 'parse_tag_span'),
'font' => array('$this', 'parse_tag_font'),
'a' => array('$this', 'parse_tag_a'),
'h1' => array('$this', 'parse_tag_heading'),
'h2' => array('$this', 'parse_tag_heading'),
'h3' => array('$this', 'parse_tag_heading'),
'h4' => array('$this', 'parse_tag_heading'),
'h5' => array('$this', 'parse_tag_heading'),
'h6' => array('$this', 'parse_tag_heading'),
'div' => array('$this', 'parse_tag_div'),
'p' => array('$this', 'parse_tag_p'),
);
}
/**
* Sets the number of line breaks a <p> tag inserts.
*
* @param int
*/
public function set_p_linebreaks($linebreaks)
{
$linebreaks = intval($linebreaks);
if ($linebreaks < 0)
{
$linebreaks = 0;
}
$this->p_linebreaks = $linebreaks;
}
/**
* Determines whether the parser is in the named state.
* Note that a parser can be in multiple states simultaneously.
* The state is not tracked with a stack.
*
* @param string State
*
* @return boolean
*/
public function in_state($state)
{
return !empty($this->state[$state]);
}
/**
* Pushes a new state into the list.
*
* @param string State
*/
protected function push_state($state)
{
if (isset($this->state[$state]))
{
$this->state[$state]++;
}
else
{
$this->state[$state] = 1;
}
}
/**
* Pops a state off the list.
*
* @param string State
*/
protected function pop_state($state)
{
if (isset($this->state[$state]))
{
$this->state[$state]--;
if ($this->state[$state] <= 0)
{
unset($this->state[$state]);
}
}
}
/**
* Parses the specified HTML into BB code
*
* @param string HTML to parse
* @param boolean Whether to allow unparsable HTML to remain
*
* @return string Parsed version (BB code)
*/
public function parse($unparsed, $allow_html = false)
{
$parsed = $unparsed;
$this->allow_html = $allow_html;
$parsed = $this->filter_before($parsed);
$parsed = $this->parse_html($parsed);
$parsed = $this->cleanup_after($parsed);
return $parsed;
}
/**
* Template method for pre-filtering the HTML before it is parsed.
* Filters things like BB code mixed into HTML, browser specific wrapping,
* and HTML within BB codes that don't support nested tags.
*
* @param string Text pre-filter
*
* @return string Text post-filter
*/
public function filter_before($text)
{
$text = $this->filter_html_tags($text);
$text = $this->filter_linebreaks_spaces($text);
$text = $this->filter_bbcode($text);
return $text;
}
/**
* Filters the HTML tags to fix common issues (HTML intertwined with BB code).
*
* @param string Text pre-filter
*
* @return string Text post-filter
*/
protected function filter_html_tags($text)
{
return preg_replace(array(
'#<a href="([^"]*)\[([^"]+)"(.*)>(.*)\[\\2</a>#siU', // check for the WYSIWYG editor being lame with URL tags followed by bbcode tags
'#(<[^<>]+ (src|href))=(\'|"|)??(.*)(\\3)#esiU' // make < and > safe in inside URL/IMG tags so they don't get stripped by strip_tags
), array(
'<a href="\1"\3>\4</a>[\2', // check for the browser (you know who you are!) being lame with URL tags followed by bbcode tags
"\$this->escape_within_url('\\1', '\\4', '\\3')" // make < and > safe in inside URL/IMG tags so they don't get stripped by strip_tags
), $text
);
}
/**
* PCRE callback for escaping special HTML characters within src/href attributes
* so they are not removed by strip_tags calls later.
*
* @param string Type of call (tag name and src/href)
* @param string URL that will be escaped
* @param string Delimiter for the attribute
*
* @return string Escaped output.
*/
protected function escape_within_url($type, $url, $delimiter = '\\"')
{
static $find, $replace;
if (!is_array($find))
{
$find = array('<', '>', '\\"');
$replace = array('<', '>', '"');
}
$delimiter = str_replace('\\"', '"', $delimiter);
return str_replace('\\"', '"', $type) . '=' . $delimiter . str_replace($find, $replace, $url) . $delimiter;
}
/**
* Filters line breaks and spaces within the HTML. Also handles a browser-specific
* behavior with soft wrapping.
*
* @param string Text pre-filter
*
* @return string Text post-filter
*/
protected function filter_linebreaks_spaces($text)
{
$text = str_replace(' ', ' ', $text);
// deal with newline characters
if (is_browser('mozilla'))
{
$text = preg_replace('#(?<!<br>|<br />|\r)(\r\n|\n|\r)#', ' ', $text);
}
$text = preg_replace('#(\r\n|\n|\r)#', '', $text);
return $text;
}
/**
* Filters BB code behaviors before the HTML is parsed. Includes removing
* HTML from BB codes that don't support it and removing linking HTML from
* a manually entered BB code.
*
* @param string Text pre-filter
*
* @return string Text post-filter
*/
protected function filter_bbcode($text)
{
$pregfind = array
(
'#\[(html|php)\]((?>[^\[]+?|(?R)|.))*\[/\\1\]#siUe', // strip html from php tags
'#\[url=(\'|"|"|)<A href="(.*)/??">\\2/??</A>#siU' // strip linked URLs from manually entered [url] tags (generic)
);
$pregreplace = array
(
"\$this->strip_html_from_bbcode('\\0')", // strip html from php tags
'[URL=$1$2' //`strip linked URLs from manually entered [url] tags (generic)
);
$text = preg_replace($pregfind, $pregreplace, $text);
return $text;
}
/**
* PCRE callback function to remove HTML from BB codes that don't support it.
* Standard line break HTML is maintinaed.
*
* @param string Text within the BB code (with HTML)
*
* @param return Text without the HTML
*/
protected function strip_html_from_bbcode($text)
{
$text = str_replace('\\"', '"', $text);
return strip_tags($text, '<p><br>');
}
/**
* Parses the HTML tags within a string.
* Handles matched and special unmatched tags.
*
* @param string Text pre-parsed
*
* @return string Parsed text (BB code)
*/
public function parse_html($text)
{
$text = $this->parse_unmatched_tags($text);
$text = $this->parse_matched_tags($text);
return $text;
}
/**
* Parses special unmatched HTML tags like <img> and <br>.
*
* @param string Text pre-parsed
*
* @return string Parsed text
*/
protected function parse_unmatched_tags($text)
{
require_once(DIR . '/includes/functions_wysiwyg.php');
$pregfind = array
(
'#<img[^>]+smilieid="(\d+)".*>#esiU', // smilies
'#<img[^>]+src=(\'|")(.*)(\\1).*>#esiU', // img tag
'#<br.*>#siU', // <br> to newline
);
$pregreplace = array
(
"\$this->translate_smilie_id_text(\\1)", // smilies
"handle_wysiwyg_img('\\2', '\\0')", // img tag
"\n", // <br> to newline
);
$text = preg_replace($pregfind, $pregreplace, $text);
return $text;
}
/**
* Translates the specified smilie ID to the text that represents that smilie.
*
* @param int Smilie ID
*
* @return string Smilie text
*/
protected function translate_smilie_id_text($smilieid)
{
static $smilies;
// build the smilies array if we haven't already
if (!is_array($smilies))
{
$smilies = array();
// attempt to get smilies from the datastore smiliecache
if (is_array($this->registry->smiliecache))
{
foreach($this->registry->smiliecache AS $smilie)
{
$smilies["$smilie[smilieid]"] = $smilie['smilietext'];
}
}
// query smilies from the database
else
{
$getsmilies = $this->registry->db->query_read_slave("SELECT smilieid, smilietext FROM " . TABLE_PREFIX . "smilie");
while ($smilie = $this->registry->db->fetch_array($getsmilies))
{
$smilies["$smilie[smilieid]"] = $smilie['smilietext'];
}
$this->registry->db->free_result($getsmilies);
}
}
// return the smilietext for this smilie
return $smilies["$smilieid"];
}
/**
* PCRE callback function to parse an <img> tag. Can only parse the src attribute.
*
* @param string The image's URL (src attribute)
*
* @return string An IMG BB code
*/
protected function parse_tag_img($img_url)
{
$img_url = str_replace('\\"', '"', $img_url);
if (!preg_match('#^https?://#i', $img_url))
{
// relative URL, prefix it with the URL to this board
$img_url = create_full_url($img_url);
}
return '[IMG]' . $img_url . '[/IMG]';
}
/**
* Parses "normal" matched HTML tags. This function (and the individual
* tag functions) are the primary places where the tag parsing rules are used.
*
* @param string Text pre-parsed
*
* @param string Text with matched HTML tags parsed (the ones specified in rules at least)
*/
protected function parse_matched_tags($text)
{
$pregfind = array
(
'#<a name=[^>]*>(.*)</a>#siU', // kill named anchors
);
$pregreplace = array
(
'\1', // kill named anchors
);
$text = preg_replace($pregfind, $pregreplace, $text);
foreach (array_keys($this->tags) AS $tag_name)
{
$text = $this->parse_tag_by_name($tag_name, $text);
}
return $text;
}
/**
* Parses a matched HTML tag by the name of the tag. This is resolved to the tag
* parsing rules array and handled from there.
*
* @param string Name of the HTML tag to parse
* @param string Text before this tag has been parsed
* @param mixed Extra param info to pass to the callback; overrides the param specified in the tag rules
*
* @param string Text with tag parsed
*/
public function parse_tag_by_name($tag_name, $text, $force_param = null)
{
$tag_name = strtolower($tag_name);
if (!isset($this->tags[$tag_name]))
{
return $text;
}
$tag_info = $this->tags[$tag_name];
if (isset($tag_info['callback']))
{
$callback = $tag_info['callback'];
$extra_param = isset($tag_info['param']) ? $tag_info['param'] : null;
}
else
{
$callback = $tag_info;
$extra_param = null;
}
if (is_array($callback) AND $callback[0] == '$this')
{
$callback[0] = $this;
}
if ($force_param !== null)
{
$extra_param = $force_param;
}
$params = array($tag_name, $text, $callback);
if ($extra_param !== null)
{
$params[] = $extra_param;
}
$text = call_user_func_array(
array($this, 'parse_tag'),
$params
);
return $text;
}
/**
* Post parsing clean up. Removes unparsed HTML and sanitizes some BB codes.
*
* @param string Text pre-cleanup
*
* @return string Text post-cleanup
*/
public function cleanup_after($text)
{
$text = $this->cleanup_html($text);
$text = $this->cleanup_smilies_from_images($text);
$text = $this->cleanup_bbcode($text);
return $text;
}
/**
* Cleans up HTML stragglers after the parsing.
*
* @param string Text pre-cleanup
*
* @return string Text post-cleanup
*/
protected function cleanup_html($text)
{
// regex find / replace #2
$pregfind = array(
'#<li>(.*)((?=<li>)|</li>)#iU', // fix some list issues
'#<p></p>#i', // kill empty <p> tags
'#<p.*>#iU', // kill any extra <p> tags
);
$pregreplace = array(
"\\1\n", // fix some list issues
'', // kill empty <p> tags
"\n", // kill any extra <p> tags
);
$text = preg_replace($pregfind, $pregreplace, $text);
// simple tag removals; mainly using PCRE for case insensitivity and /?
$text = preg_replace('#</?(A|LI|FONT|IMG)>#siU', '', $text);
if (!$this->allow_html)
{
$text = $this->cleanup_disallowed_html($text);
}
// basic string replacements #2; don't replace " because browsers don't auto-encode quotes
$strfind = array
(
'<', // un-htmlspecialchars <
'>', // un-htmlspecialchars >
'&', // un-htmlspecialchars &
);
$strreplace = array
(
'<', // un-htmlspecialchars <
'>', // un-htmlspecialchars >
'&', // un-htmlspecialchars &
);
$text = str_replace($strfind, $strreplace, $text);
return $text;
}
/**
* Cleans up disallowed HTML. This generally removes all HTML. It is normally
* called if HTML is not allowed.
*
* @param string Text pre-cleanup
*
* @return string Text post-cleanup
*/
protected function cleanup_disallowed_html($text)
{
$text = preg_replace('#<script[^>]*>(.*)</script>#siU', '', $text);
$text = preg_replace('#<style[^>]*>(.*)</style>#siU', '', $text);
$text = strip_tags($text);
return $text;
}
/**
* Translates image BB codes that represent smilies into the actual
* smilie representation.
*
* @param string Text pre-cleanup
*
* @return string Text post-cleanup
*/
protected function cleanup_smilies_from_images($text)
{
if (is_array($this->registry->smiliecache))
{
$strfind = array();
$strreplace = array();
foreach ($this->registry->smiliecache AS $smilie)
{
// [IMG]images/smilies/frown.gif[/IMG]
$strfind[] = '[IMG]' . $smilie['smiliepath'] . '[/IMG]';
$strreplace[] = $smilie['smilietext'];
// [IMG]http://domain.com/forum/images/smilies/frown.gif[/IMG]
$strfind[] = '[IMG]' . create_full_url($smilie['smiliepath']) . '[/IMG]';
$strreplace[] = $smilie['smilietext'];
}
$text = str_replace($strfind, $strreplace, $text);
}
return $text;
}
/**
* General BB code cleanup after HTML parsing.
*
* @param string Text pre-cleanup
*
* @return string Text post-cleanup
*/
protected function cleanup_bbcode($text)
{
if (is_browser('mozilla'))
{
// mozilla treats line breaks before/after lists a little differently from IE (see #5774)
$text = preg_replace('#\[(list)#i', "\n[\\1", $text);
$text = preg_replace('#\[(/list)\]#i', "[\\1]\n", $text);
}
$text = preg_replace('#(?<!\r|\n|^)\[(/list|list|\*)\]#i', "\n[\\1]", $text);
// replace advanced URL tags that should actually be basic ones
$text = preg_replace('#\[URL=("|\'|)(.*)\\1\]\\2\[/URL\]#siU', '[URL]$2[/URL]', $text);
return $text;
}
/**
* Parses the style attribute from a list of attributes and determines
* if tags need to be wrapped. This does not do the wrapping, but gives you
* the text to prepend/append.
*
* @param string Attribute string (multiple attributes within)
* @param string (return) Text to prepend
* @param string (return) Text to append
*/
protected function parse_style_attribute($tagoptions, &$prependtags, &$appendtags)
{
$searchlist = array(
array('tag' => 'left', 'option' => false, 'regex' => '#text-align:\s*(left);?#i'),
array('tag' => 'center', 'option' => false, 'regex' => '#text-align:\s*(center);?#i'),
array('tag' => 'right', 'option' => false, 'regex' => '#text-align:\s*(right);?#i'),
array('tag' => 'color', 'option' => true, 'regex' => '#(?<![a-z0-9-])color:\s*([^;]+);?#i', 'match' => 1),
array('tag' => 'font', 'option' => true, 'regex' => '#font-family:\s*(\'|)([^;,\']+)\\1[^;]*;?#i', 'match' => 2),
array('tag' => 'b', 'option' => false, 'regex' => '#font-weight:\s*(bold);?#i'),
array('tag' => 'i', 'option' => false, 'regex' => '#font-style:\s*(italic);?#i'),
array('tag' => 'u', 'option' => false, 'regex' => '#text-decoration:\s*(underline);?#i')
);
$style = $this->parse_wysiwyg_tag_attribute('style=', $tagoptions);
$style = preg_replace(
'#(?<![a-z0-9-])color:\s*rgb\((\d+),\s*(\d+),\s*(\d+)\)(;?)#ie',
'sprintf("color: #%02X%02X%02X$4", $1, $2, $3)',
$style
);
foreach ($searchlist AS $searchtag)
{
if (!$this->is_bbcode_tag_allowed($searchtag['tag']))
{
continue;
}
if (preg_match($searchtag['regex'], $style, $matches))
{
$prependtags .= '[' . strtoupper($searchtag['tag']) . iif($searchtag['option'] == true, '=' . $matches["$searchtag[match]"]) . ']';
$appendtags = '[/' . strtoupper($searchtag['tag']) . "]$appendtags";
}
}
}
/**
* Parses an <a> tag. Matches URL and EMAIL BB code.
*
* @param string String containing tag attributes
* @param string Text within tag
* @param string Name of HTML tag. Used if one function parses multiple tags
* @param mixed Extra arguments passed in to parsing call or tag rules
*/
protected function parse_tag_a($aoptions, $text, $tag_name, $args)
{
$href = $this->parse_wysiwyg_tag_attribute('href=', $aoptions);
if (!trim($href))
{
return $this->parse_tag_by_name('a', $text);
}
if (substr($href, 0, 7) == 'mailto:')
{
$tag = 'email';
$href = substr($href, 7);
}
else
{
$tag = 'url';
if (!preg_match('#^[a-z0-9]+:#i', $href))
{
// relative URL, prefix it with the URL to this board
$href = create_full_url($href);
}
}
$tag = strtoupper($tag);
if ($this->is_bbcode_tag_allowed($tag))
{
return "[$tag=\"$href\"]" . $this->parse_tag_by_name('a', $text) . "[/$tag]";
}
else
{
// can't auto link, return a plaintext version
$inner_text = $this->parse_tag_by_name('a', $text);
if ($inner_text != $href)
{
return "$inner_text ($href)";
}
else
{
return $href;
}
}
}
/**
* Parses <h1> through <h6> tags. Simply uses bold with line breaks.
*
* @param string String containing tag attributes
* @param string Text within tag
* @param string Name of HTML tag. Used if one function parses multiple tags
* @param mixed Extra arguments passed in to parsing call or tag rules
*/
protected function parse_tag_heading($options, $text, $tag_name, $args)
{
if ($this->is_bbcode_tag_allowed('b'))
{
return '[B]' . $text . "[/B]\n\n";
}
else
{
return "$text\n\n";
}
}
/**
* Parses a <p> tag. Supports alignments and style attributes. Gives a line break.
*
* @param string String containing tag attributes
* @param string Text within tag
* @param string Name of HTML tag. Used if one function parses multiple tags
* @param mixed Extra arguments passed in to parsing call or tag rules
*/
protected function parse_tag_p($poptions, $text, $tag_name, $args)
{
if (!$text)
{
return '';
}
$style = $this->parse_wysiwyg_tag_attribute('style=', $poptions);
$align = $this->parse_wysiwyg_tag_attribute('align=', $poptions);
// only allow left/center/right alignments
switch ($align)
{
case 'left':
case 'center':
case 'right':
break;
default:
$align = '';
}
$align = strtoupper($align);
$prepend = '';
$append = '';
$this->parse_style_attribute($poptions, $prepend, $append);
if ($align AND $this->is_bbcode_tag_allowed($align))
{
$prepend .= "[$align]";
$append .= "[/$align]";
}
if (preg_match("#^<table#si", $text))
{
$append .= str_repeat("\n", $this->p_linebreaks - 1);
}
else
{
$append .= str_repeat("\n", $this->p_linebreaks);
}
return $prepend . $this->parse_tag_by_name('p', $text) . $append;
}
/**
* Parses a <span> tag. Supports style attributes.
*
* @param string String containing tag attributes
* @param string Text within tag
* @param string Name of HTML tag. Used if one function parses multiple tags
* @param mixed Extra arguments passed in to parsing call or tag rules
*/
protected function parse_tag_span($spanoptions, $text, $tag_name, $args)
{
$prependtags = '';
$appendtags = '';
$this->parse_style_attribute($spanoptions, $prependtags, $appendtags);
return $prependtags . $this->parse_tag_by_name('span', $text) . $appendtags;
}
/**
* Parses a <div> tag. Supports alignments and style attributes. Gives a line break.
*
* @param string String containing tag attributes
* @param string Text within tag
* @param string Name of HTML tag. Used if one function parses multiple tags
* @param mixed Extra arguments passed in to parsing call or tag rules
*/
protected function parse_tag_div($divoptions, $text, $tag_name, $args)
{
$prepend = '';
$append = '';
$this->parse_style_attribute($divoptions, $prepend, $append);
$align = $this->parse_wysiwyg_tag_attribute('align=', $divoptions);
// only allow left/center/right alignments
switch ($align)
{
case 'left':
case 'center':
case 'right':
break;
default:
$align = '';
}
$align = strtoupper($align);
if ($align AND $this->is_bbcode_tag_allowed($align))
{
$prepend .= "[$align]";
$append .= "[/$align]";
}
$append .= "\n";
return $prepend . $this->parse_tag_by_name('div', $text) . $append;
}
/**
* Parses an <li> tag. Outputs the list element BB code if within a list state.
*
* @param string String containing tag attributes
* @param string Text within tag
* @param string Name of HTML tag. Used if one function parses multiple tags
* @param mixed Extra arguments passed in to parsing call or tag rules
*/
protected function parse_tag_li($listoptions, $text, $tag_name, $args)
{
if (!$this->is_bbcode_tag_allowed('list') OR !$this->in_state('list'))
{
return "$text\n";
}
return '[*]' . rtrim($text);
}
/**
* Parses <ol> and <ul> tags.
*
* @param string String containing tag attributes
* @param string Text within tag
* @param string Name of HTML tag. Used if one function parses multiple tags
* @param mixed Extra arguments passed in to parsing call or tag rules
*/
protected function parse_tag_list($listoptions, $text, $tagname, $args)
{
$longtype = $this->parse_wysiwyg_tag_attribute('class=', $listoptions);
$listtype = trim(preg_replace('#"?LIST-STYLE-TYPE:\s*([a-z0-9_-]+);?"?#si', '\\1', $longtype));
if (empty($listtype) AND $tagname == 'ol')
{
$listtype = 'decimal';
}
$this->push_state('list');
$text = preg_replace('#<li>((?'.'>[^[<]+?|(?!</li).)*)(?=</?ol|</?ul|<li|\[list|\[/list)#siU', '<li>\\1</li>', $text);
$text = $this->parse_tag_by_name('li', $text);
if (!$this->is_bbcode_tag_allowed('list'))
{
return $text;
}
$validtypes = array(
'upper-alpha' => 'A',
'lower-alpha' => 'a',
'upper-roman' => 'I',
'lower-roman' => 'i',
'decimal' => '1'
);
if (!isset($validtypes["$listtype"]))
{
$opentag = '[LIST]'; // default to bulleted
}
else
{
$opentag = '[LIST=' . $validtypes[$listtype] . ']';
}
$text = $this->parse_tag_by_name($tagname, $text);
$this->pop_state('list');
return $opentag . $text . '[/LIST]';
}
/**
* Parses a <font> tag. Supports font face, size, and color.
*
* @param string String containing tag attributes
* @param string Text within tag
* @param string Name of HTML tag. Used if one function parses multiple tags
* @param mixed Extra arguments passed in to parsing call or tag rules
*/
protected function parse_tag_font($fontoptions, $text, $tag_name, $args)
{
$tags = array(
'font' => 'face=',
'size' => 'size=',
'color' => 'color='
);
$prependtags = '';
$appendtags = '';
$fontoptionlen = strlen($fontoptions);
foreach ($tags AS $vbcode => $locate)
{
$optionvalue = $this->parse_wysiwyg_tag_attribute($locate, $fontoptions);
if ($optionvalue)
{
$vbcode = strtoupper($vbcode);
$prependtags .= "[$vbcode=$optionvalue]";
$appendtags = "[/$vbcode]$appendtags";
}
}
$this->parse_style_attribute($fontoptions, $prependtags, $appendtags);
return $prependtags . $this->parse_tag_by_name('font', $text) . $appendtags;
}
/**
* Parses and does a basic HTML replacement for the named tag. The
* argument passed in is the BB code to parse to.
*
* @param string String containing tag attributes
* @param string Text within tag
* @param string Name of HTML tag. Used if one function parses multiple tags
* @param mixed Name of the BB code to parse to
*/
protected function parse_tag_basic($options, $text, $tagname, $parseto)
{
$useoptions = array(); // array of (key) tag name; (val) option to read. If tag name isn't found, no option is used
if (trim($text) == '')
{
return '';
}
if (!$this->is_bbcode_tag_allowed($parseto))
{
return $text;
}
$parseto = strtoupper($parseto);
if (empty($useoptions["$tagname"]))
{
$text = $this->parse_tag_by_name($tagname, $text);
return "[$parseto]{$text}[/$parseto]";
}
else
{
$optionvalue = $this->parse_wysiwyg_tag_attribute($useoptions["$tagname"], $options);
if ($optionvalue)
{
return "[$parseto=$optionvalue]{$text}[/$parseto]";
}
else
{
return "[$parseto]{$text}[/$parseto]";
}
}
}
/**
* General matched tag HTML parser. Finds matched pairs of tags (outside pairs
* first) and calls the specified call back.
*
* @param string Name of the HTML tag to search for
* @param string Text to search
* @param callback Callback to call when found
* @param mixed Extra arguments to pass into the callback function
*
* @return string Text with named tag parsed
*/
protected function parse_tag($tagname, $text, $functionhandle, $extraargs = '')
{
$tagname = strtolower($tagname);
$open_tag = "<$tagname";
$open_tag_len = strlen($open_tag);
$close_tag = "</$tagname>";
$close_tag_len = strlen($close_tag);
$beginsearchpos = 0;
do {
$textlower = strtolower($text);
$tagbegin = @strpos($textlower, $open_tag, $beginsearchpos);
if ($tagbegin === false)
{
break;
}
$strlen = strlen($text);
// we've found the beginning of the tag, now extract the options
$inquote = '';
$found = false;
$tagnameend = false;
for ($optionend = $tagbegin; $optionend <= $strlen; $optionend++)
{
$char = $text{$optionend};
if (($char == '"' OR $char == "'") AND $inquote == '')
{
$inquote = $char; // wasn't in a quote, but now we are
}
else if (($char == '"' OR $char == "'") AND $inquote == $char)
{
$inquote = ''; // left the type of quote we were in
}
else if ($char == '>' AND !$inquote)
{
$found = true;
break; // this is what we want
}
else if (($char == '=' OR $char == ' ') AND !$tagnameend)
{
$tagnameend = $optionend;
}
}
if (!$found)
{
break;
}
if (!$tagnameend)
{
$tagnameend = $optionend;
}
$offset = $optionend - ($tagbegin + $open_tag_len);
$tagoptions = substr($text, $tagbegin + $open_tag_len, $offset);
$acttagname = substr($textlower, $tagbegin + 1, $tagnameend - $tagbegin - 1);
if ($acttagname != $tagname)
{
$beginsearchpos = $optionend;
continue;
}
// now find the "end"
$tagend = strpos($textlower, $close_tag, $optionend);
if ($tagend === false)
{
break;
}
// if there are nested tags, this </$tagname> won't match our open tag, so we need to bump it back
$nestedopenpos = strpos($textlower, $open_tag, $optionend);
while ($nestedopenpos !== false AND $tagend !== false)
{
if ($nestedopenpos > $tagend)
{ // the tag it found isn't actually nested -- it's past the </$tagname>
break;
}
$tagend = strpos($textlower, $close_tag, $tagend + $close_tag_len);
$nestedopenpos = strpos($textlower, $open_tag, $nestedopenpos + $open_tag_len);
}
if ($tagend === false)
{
$beginsearchpos = $optionend;
continue;
}
$localbegin = $optionend + 1;
$localtext = call_user_func($functionhandle,
$tagoptions, substr($text, $localbegin, $tagend - $localbegin), $tagname, $extraargs
);
$text = substr_replace($text, $localtext, $tagbegin, $tagend + $close_tag_len - $tagbegin);
// this adjusts for $localtext having more/less characters than the amount of text it's replacing
$beginsearchpos = $tagbegin + strlen($localtext);
} while ($tagbegin !== false);
return $text;
}
/**
* General attribute parses. Parses the named attribute out of a string
* of attributes.
*
* @param string Name of attribute to parse. Should be in form "attr="
* @param string Text to search
*
* @return string Value of named attribute
*/
protected function parse_wysiwyg_tag_attribute($option, $text)
{
$original_text = $text;
$text = strtolower($text);
$option = strtolower($option);
if (($position = strpos($text, $option)) !== false)
{
$delimiter = $position + strlen($option);
if ($text{$delimiter} == '"')
{ // read to another "
$delimchar = '"';
}
else if ($text{$delimiter} == '\'')
{
$delimchar = '\'';
}
else
{ // read to a space
$delimchar = ' ';
}
$delimloc = strpos($text, $delimchar, $delimiter + 1);
if ($delimloc === false)
{
$delimloc = strlen($text);
}
else if ($delimchar == '"' OR $delimchar == '\'')
{
// don't include the delimiters
$delimiter++;
}
return trim(substr($original_text, $delimiter, $delimloc - $delimiter));
}
else
{
return '';
}
}
/**
* Determines if the specified BB code tag is globally enabled.
*
* @param string Tag name
*
* @return bool
*/
protected function is_bbcode_tag_allowed($tag)
{
$flag_value = 0;
switch (strtolower($tag))
{
case 'b':
case 'i':
case 'u':
$flag_value = ALLOW_BBCODE_BASIC;
break;
case 'color':
$flag_value = ALLOW_BBCODE_COLOR;
break;
case 'size':
$flag_value = ALLOW_BBCODE_SIZE;
break;
case 'font':
$flag_value = ALLOW_BBCODE_FONT;
break;
case 'left':
case 'right':
case 'center':
$flag_value = ALLOW_BBCODE_ALIGN;
break;
case 'list':
$flag_value = ALLOW_BBCODE_LIST;
break;
case 'indent':
// allowed if either is enabled
$flag_value = ALLOW_BBCODE_ALIGN | ALLOW_BBCODE_LIST;
break;
case 'email':
case 'url':
case 'thread':
case 'post':
$flag_value = ALLOW_BBCODE_URL;
break;
case 'php':
$flag_value = ALLOW_BBCODE_PHP;
break;
case 'code':
$flag_value = ALLOW_BBCODE_CODE;
break;
case 'html':
$flag_value = ALLOW_BBCODE_HTML;
break;
default:
return true;
}
return ($this->registry->options['allowedbbcodes'] & $flag_value ? true : false);
}
}
/*======================================================================*\
|| ####################################################################
|| # CVS: $RCSfile$ - $Revision: 29480 $
|| ####################################################################
\*======================================================================*/