View file application/libraries/Engine/Filter/Html.php

File size: 9.96Kb
<?php
/**
 * SocialEngine
 *
 * @category   Engine
 * @package    Engine_Filter
 * @copyright  Copyright 2006-2010 Webligo Developments
 * @license    http://www.socialengine.com/license/
 * @version    $Id: Html.php 9747 2012-07-26 02:08:08Z john $
 */

/**
 * @category   Engine
 * @package    Engine_Filter
 * @copyright  Copyright 2006-2010 Webligo Developments
 * @license    http://www.socialengine.com/license/
 */
class Engine_Filter_Html implements Zend_Filter_Interface
{
  protected $_useDefaultLists = true;
  protected $_allowedTags;
  protected $_forbiddenTags;
  protected $_allowedAttributes;
  protected $_forbiddenAttributes;
  protected $_forbiddenAttributeValues;
  protected $_decode = true;

  // Static
  public static function process($text, $options = array())
  {
    $instance = new InputFilter($options);
    return $instance->execute($text);
  }

  // Constructor
  function __construct($options = array())
  {
    foreach( $options as $key => $value )
    {
      $method = 'set'.ucfirst($key);
      if( method_exists($this, $method) )
      {
        $this->$method($value);
      }
    }

    if( $this->_useDefaultLists )
    {
      $this->loadDefaultLists();
    }
  }

  public function filter($value)
  {
    return $this->execute($value);
  }

  public function loadDefaultLists()
  {
    $this->_allowedAttributes = array_unique(array_merge((array) $this->_allowedAttributes,
      array('href', 'src', 'alt', 'border', 'align', 'width', 'height', 'vspace', 'hspace', 'target', 'style', 'name', 'value')
    ));

    $this->_forbiddenTags = array_unique(array_merge((array) $this->_forbiddenTags,
      array('applet', 'body', 'bgsound', 'base', 'basefont', 'frame', 'frameset', 'head', 'html', 'id', 'ilayer', 'layer', 'link', 'meta', 'name', 'script', 'style', 'title', 'xml')
    ));

    $this->_forbiddenAttributes = array_unique(array_merge((array) $this->_forbiddenAttributes,
      array('action', 'background', 'codebase', 'dynsrc', 'lowsrc', 'on*')
    ));

    $this->_forbiddenAttributeValues = array_unique(array_merge((array) $this->_forbiddenAttributeValues,
      array('*expression*', 'javascript:*', 'behaviour:*', 'vbscript:*', 'mocha:*', 'livescript:*')
    ));
  }



  // Options

  public function setDecode($flag = true)
  {
    $this->_decode = $flag;
  }

  public function setUseDefaultLists($flag = true)
  {
    $this->_useDefaultLists = (bool) $flag;
  }

  public function setAllowedTags($allowedTags = null)
  {
    if( is_string($allowedTags) )
    {
      $allowedTagsArray = explode(',', $allowedTags);
      foreach($allowedTagsArray as $index => $tag)
        $allowedTagsArray[$index] = trim($tag);

      $this->_allowedTags = $allowedTagsArray;
    }
    else if( is_array($allowedTags) )
    {
      $this->_allowedTags = $allowedTags;
    }
    else
    {
      $this->_allowedTags = null;
    }
  }

  public function setForbiddenTags($forbiddenTags = null)
  {
    if( is_string($forbiddenTags) )
    {
      $this->_forbiddenTags = explode(',', $forbiddenTags);
    }
    else if( is_array($forbiddenTags) )
    {
      $this->_forbiddenTags = $forbiddenTags;
    }
    else
    {
      $this->_forbiddenTags = null;
    }
  }

  public function setAllowedAttributes($allowedAttributes = null)
  {
    if( is_string($allowedAttributes) )
    {
      $this->_allowedAttributes = explode(',', $allowedAttributes);
    }
    else if( is_array($allowedAttributes) )
    {
      $this->_allowedAttributes = $allowedAttributes;
    }
    else
    {
      $this->_allowedAttributes = null;
    }
  }

  public function setForbiddenAttributes($forbiddenAttributes = null)
  {
    if( is_string($forbiddenAttributes) )
    {
      $this->_forbiddenAttributes = explode(',', $forbiddenAttributes);
    }
    else if( is_array($forbiddenAttributes) )
    {
      $this->_forbiddenAttributes = $forbiddenAttributes;
    }
    else
    {
      $this->_forbiddenAttributes = null;
    }
  }

  // Main
  public function execute($text)
  {
    // Process tags

    // Decode
    if( $this->_decode )
    {
      $text = htmlspecialchars_decode($text, ENT_QUOTES);
    }

    // Nothing was specified? Just strip everything
    if( !$this->_allowedTags && !$this->_forbiddenTags )
    {
      return strip_tags($text);
    }


    $text = preg_replace_callback('/<(pre)(?:(?!<\/?\1).)*?<\/\1>/s', array($this, '_handlePre'), $text);

    // Close any open-ended tags, escape all non-html lt and gt
    $text = str_replace(array('[TO]', '[TC]'), array('', ''), $text);
    $text = preg_replace('/<(\/?[a-zA-Z][^<>]*?)((<)|>)/', '<$1>$3', $text);
    $text = preg_replace('/<(\/?[a-zA-Z][^<>]*?)>/', '[TO]$1[TC]', $text);
    $text = str_replace(array('<', '>'), array('&lt;', '&gt;'), $text);
    $text = str_replace(array('[TO]', '[TC]'), array('<', '>'), $text);
    
    // Strip everything but allowable tags
    if( $this->_allowedTags )
    {
      $text = strip_tags($text, '<' . join('><', $this->_allowedTags) . '>');
    }

    // Strip all forbidden tags
    if( $this->_forbiddenTags )
    {
      $text = $this->stripTags($text, $this->_forbiddenTags);
    }

    // Strip all but allowed attributes
    if( $this->_allowedAttributes || $this->_forbiddenAttributes )
    {
      // </?\w+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/? >
      preg_match_all('/(<\/?\w+)\s+([^<>]*?)>/i', $text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
      /* preg_match_all('/<\/?(\w+)((\s+\w+(\s*=\s*(?:".*?"|\'.*?\'|[^\'">\s]+))?)+\s*|\s*)\/?>/ims', $text, $matches, PREG_SET_ORDER); */

      $delta = 0;
      foreach( $matches as $match ) // Each one of these is an element with attributes
      {
        $current = $match[1][0];
        $modified = false;
        preg_match_all('/(\w+(\s*=\s*(?:".*?(?<!\\\\)"|\'.*?(?<!\\\\)\'|[^\'">\s]+))?)/', $match[0][0], $m, PREG_SET_ORDER);

        for( $i = 1; $i < count($m); $i++ )
        {
          $attribString = $m[$i][0];
          @list($attribName, $attribValue) = explode('=', $attribString, 2);

          if( !$this->checkAttribute($attribName, $attribValue) )
          {
            $modified = true;
            continue; // Throw it away!
          }

          $current .= ' ' . $attribName;
          if( !empty($attribValue) )
          {
            $current .= '=' . $attribValue;
          }
        }
        $current .= '>';

        // Here we replace the original
        $offset = $match[0][1] + $delta;
        $length = strlen($match[0][0]);
        if( $modified == true )
        {
          $currentLength = strlen($current);
          $tmp = substr($text, 0, $offset + 0)
            . $current
            . substr($text, $offset + $length);
          $delta -= ($length - $currentLength);
          $text = $tmp;
        }
      }
    }

    if( $this->_pre ) {
      foreach( $this->_pre as $id => $data ) {
        $text = str_replace('[PRE="' . $id . '"]', $data, $text);
      }
    }

    return $text;
  }

  public function checkAttribute($name, $value)
  {
    $name = strtolower($name);
    $value = trim($value, "\x00..\x20\x22\x27");
    $softAllow = false;

    // This attribute has been specifically whitelisted
    if( is_array($this->_allowedAttributes) && in_array($name, $this->_allowedAttributes) )
    {
       $softAllow = true;
      //return true;
    }

    // This attribute has been specifically blacklisted
    if( is_array($this->_forbiddenAttributes) && in_array($name, $this->_forbiddenAttributes) )
    {
      return false;
    }

    // Now check for wildcards

    // Allowed Attributes
    if( is_array($this->_allowedAttributes) )
    {
      foreach( $this->_allowedAttributes as $allowedAttribute )
      {
        if( strpos($allowedAttribute, '*') === false )
        {
          continue;
        }

        // Make a regex for the wildcard
        $regex = '/^' . str_replace('*', '.+?', preg_quote($allowedAttribute)) . '$/i';
        if( preg_match($regex, $name) )
        {
          $softAllow = true;
          //return true;
        }
      }
    }


    // Forbidden Attributes
    if( is_array($this->_forbiddenAttributes) )
    {
      foreach( $this->_forbiddenAttributes as $forbiddenAttribute )
      {
        if( strpos($forbiddenAttribute, '*') === false )
        {
          continue;
        }

        // Make a regex for the wildcard
        $regex = '/^' . str_replace('*', '.+?', preg_quote($forbiddenAttribute)) . '$/i';
        if( preg_match($regex, $name) )
        {
          return false;
        }
      }
    }

    // Forbidden attribute values
    if( is_array($this->_forbiddenAttributeValues) )
    {
      foreach( $this->_forbiddenAttributeValues as $forbiddenAttributeValue )
      {
        if( strpos($forbiddenAttributeValue, '*') === false )
        {
          continue;
        }

        // Make a regex for the wildcard
        $regex = '/^' . str_replace('\*', '.+?', preg_quote($forbiddenAttributeValue)) . '$/i';
        if( preg_match($regex, $value) )
        {
          return false;
        }
      }
    }

    return $softAllow;
  }

  public function stripTags($str, $params)
  {
    for( $i = 0; $i < count($params); $i++ )
    {
      $str = preg_replace('/<' . $params[$i] . '\b[^>]*>/i', '', $str);
      $str = preg_replace('/<\/' . $params[$i] . '[^>]*>/i', '', $str);
    }
    return $str;
  }

  private $_preIteration = 0;
  private $_pre = array();

  private function _handlePre($matches)
  {
    $text = $matches[0];
    $text = preg_replace('/<pre(.*?)><code>/i', '', $text);
    $text = str_replace('</code></pre>', '', $text);
    $text = str_replace(array('<', '>'), array('&lt;', '&gt;'), $text);
    $this->_preIteration++;
    $this->_pre[$this->_preIteration] = '<code>' . $text . '</code>';

    return '[PRE="' . $this->_preIteration . '"]';
  }
}