[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3master/var/www/se3/includes/library/HTMLPurifier/Filter/ -> ExtractStyleBlocks.php (source)

   1  <?php
   2  
   3  // why is this a top level function? Because PHP 5.2.0 doesn't seem to
   4  // understand how to interpret this filter if it's a static method.
   5  // It's all really silly, but if we go this route it might be reasonable
   6  // to coalesce all of these methods into one.
   7  function htmlpurifier_filter_extractstyleblocks_muteerrorhandler()
   8  {
   9  }
  10  
  11  /**
  12   * This filter extracts <style> blocks from input HTML, cleans them up
  13   * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
  14   * so they can be used elsewhere in the document.
  15   *
  16   * @note
  17   *      See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php for
  18   *      sample usage.
  19   *
  20   * @note
  21   *      This filter can also be used on stylesheets not included in the
  22   *      document--something purists would probably prefer. Just directly
  23   *      call HTMLPurifier_Filter_ExtractStyleBlocks->cleanCSS()
  24   */
  25  class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
  26  {
  27      /**
  28       * @type string
  29       */
  30      public $name = 'ExtractStyleBlocks';
  31  
  32      /**
  33       * @type array
  34       */
  35      private $_styleMatches = array();
  36  
  37      /**
  38       * @type csstidy
  39       */
  40      private $_tidy;
  41  
  42      /**
  43       * @type HTMLPurifier_AttrDef_HTML_ID
  44       */
  45      private $_id_attrdef;
  46  
  47      /**
  48       * @type HTMLPurifier_AttrDef_CSS_Ident
  49       */
  50      private $_class_attrdef;
  51  
  52      /**
  53       * @type HTMLPurifier_AttrDef_Enum
  54       */
  55      private $_enum_attrdef;
  56  
  57      public function __construct()
  58      {
  59          $this->_tidy = new csstidy();
  60          $this->_tidy->set_cfg('lowercase_s', false);
  61          $this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true);
  62          $this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident();
  63          $this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(
  64              array(
  65                  'first-child',
  66                  'link',
  67                  'visited',
  68                  'active',
  69                  'hover',
  70                  'focus'
  71              )
  72          );
  73      }
  74  
  75      /**
  76       * Save the contents of CSS blocks to style matches
  77       * @param array $matches preg_replace style $matches array
  78       */
  79      protected function styleCallback($matches)
  80      {
  81          $this->_styleMatches[] = $matches[1];
  82      }
  83  
  84      /**
  85       * Removes inline <style> tags from HTML, saves them for later use
  86       * @param string $html
  87       * @param HTMLPurifier_Config $config
  88       * @param HTMLPurifier_Context $context
  89       * @return string
  90       * @todo Extend to indicate non-text/css style blocks
  91       */
  92      public function preFilter($html, $config, $context)
  93      {
  94          $tidy = $config->get('Filter.ExtractStyleBlocks.TidyImpl');
  95          if ($tidy !== null) {
  96              $this->_tidy = $tidy;
  97          }
  98          $html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
  99          $style_blocks = $this->_styleMatches;
 100          $this->_styleMatches = array(); // reset
 101          $context->register('StyleBlocks', $style_blocks); // $context must not be reused
 102          if ($this->_tidy) {
 103              foreach ($style_blocks as &$style) {
 104                  $style = $this->cleanCSS($style, $config, $context);
 105              }
 106          }
 107          return $html;
 108      }
 109  
 110      /**
 111       * Takes CSS (the stuff found in <style>) and cleans it.
 112       * @warning Requires CSSTidy <http://csstidy.sourceforge.net/>
 113       * @param string $css CSS styling to clean
 114       * @param HTMLPurifier_Config $config
 115       * @param HTMLPurifier_Context $context
 116       * @throws HTMLPurifier_Exception
 117       * @return string Cleaned CSS
 118       */
 119      public function cleanCSS($css, $config, $context)
 120      {
 121          // prepare scope
 122          $scope = $config->get('Filter.ExtractStyleBlocks.Scope');
 123          if ($scope !== null) {
 124              $scopes = array_map('trim', explode(',', $scope));
 125          } else {
 126              $scopes = array();
 127          }
 128          // remove comments from CSS
 129          $css = trim($css);
 130          if (strncmp('<!--', $css, 4) === 0) {
 131              $css = substr($css, 4);
 132          }
 133          if (strlen($css) > 3 && substr($css, -3) == '-->') {
 134              $css = substr($css, 0, -3);
 135          }
 136          $css = trim($css);
 137          set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler');
 138          $this->_tidy->parse($css);
 139          restore_error_handler();
 140          $css_definition = $config->getDefinition('CSS');
 141          $html_definition = $config->getDefinition('HTML');
 142          $new_css = array();
 143          foreach ($this->_tidy->css as $k => $decls) {
 144              // $decls are all CSS declarations inside an @ selector
 145              $new_decls = array();
 146              foreach ($decls as $selector => $style) {
 147                  $selector = trim($selector);
 148                  if ($selector === '') {
 149                      continue;
 150                  } // should not happen
 151                  // Parse the selector
 152                  // Here is the relevant part of the CSS grammar:
 153                  //
 154                  // ruleset
 155                  //   : selector [ ',' S* selector ]* '{' ...
 156                  // selector
 157                  //   : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
 158                  // combinator
 159                  //   : '+' S*
 160                  //   : '>' S*
 161                  // simple_selector
 162                  //   : element_name [ HASH | class | attrib | pseudo ]*
 163                  //   | [ HASH | class | attrib | pseudo ]+
 164                  // element_name
 165                  //   : IDENT | '*'
 166                  //   ;
 167                  // class
 168                  //   : '.' IDENT
 169                  //   ;
 170                  // attrib
 171                  //   : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
 172                  //     [ IDENT | STRING ] S* ]? ']'
 173                  //   ;
 174                  // pseudo
 175                  //   : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
 176                  //   ;
 177                  //
 178                  // For reference, here are the relevant tokens:
 179                  //
 180                  // HASH         #{name}
 181                  // IDENT        {ident}
 182                  // INCLUDES     ==
 183                  // DASHMATCH    |=
 184                  // STRING       {string}
 185                  // FUNCTION     {ident}\(
 186                  //
 187                  // And the lexical scanner tokens
 188                  //
 189                  // name         {nmchar}+
 190                  // nmchar       [_a-z0-9-]|{nonascii}|{escape}
 191                  // nonascii     [\240-\377]
 192                  // escape       {unicode}|\\[^\r\n\f0-9a-f]
 193                  // unicode      \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
 194                  // ident        -?{nmstart}{nmchar*}
 195                  // nmstart      [_a-z]|{nonascii}|{escape}
 196                  // string       {string1}|{string2}
 197                  // string1      \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
 198                  // string2      \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
 199                  //
 200                  // We'll implement a subset (in order to reduce attack
 201                  // surface); in particular:
 202                  //
 203                  //      - No Unicode support
 204                  //      - No escapes support
 205                  //      - No string support (by proxy no attrib support)
 206                  //      - element_name is matched against allowed
 207                  //        elements (some people might find this
 208                  //        annoying...)
 209                  //      - Pseudo-elements one of :first-child, :link,
 210                  //        :visited, :active, :hover, :focus
 211  
 212                  // handle ruleset
 213                  $selectors = array_map('trim', explode(',', $selector));
 214                  $new_selectors = array();
 215                  foreach ($selectors as $sel) {
 216                      // split on +, > and spaces
 217                      $basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
 218                      // even indices are chunks, odd indices are
 219                      // delimiters
 220                      $nsel = null;
 221                      $delim = null; // guaranteed to be non-null after
 222                      // two loop iterations
 223                      for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
 224                          $x = $basic_selectors[$i];
 225                          if ($i % 2) {
 226                              // delimiter
 227                              if ($x === ' ') {
 228                                  $delim = ' ';
 229                              } else {
 230                                  $delim = ' ' . $x . ' ';
 231                              }
 232                          } else {
 233                              // simple selector
 234                              $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
 235                              $sdelim = null;
 236                              $nx = null;
 237                              for ($j = 0, $cc = count($components); $j < $cc; $j++) {
 238                                  $y = $components[$j];
 239                                  if ($j === 0) {
 240                                      if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
 241                                          $nx = $y;
 242                                      } else {
 243                                          // $nx stays null; this matters
 244                                          // if we don't manage to find
 245                                          // any valid selector content,
 246                                          // in which case we ignore the
 247                                          // outer $delim
 248                                      }
 249                                  } elseif ($j % 2) {
 250                                      // set delimiter
 251                                      $sdelim = $y;
 252                                  } else {
 253                                      $attrdef = null;
 254                                      if ($sdelim === '#') {
 255                                          $attrdef = $this->_id_attrdef;
 256                                      } elseif ($sdelim === '.') {
 257                                          $attrdef = $this->_class_attrdef;
 258                                      } elseif ($sdelim === ':') {
 259                                          $attrdef = $this->_enum_attrdef;
 260                                      } else {
 261                                          throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
 262                                      }
 263                                      $r = $attrdef->validate($y, $config, $context);
 264                                      if ($r !== false) {
 265                                          if ($r !== true) {
 266                                              $y = $r;
 267                                          }
 268                                          if ($nx === null) {
 269                                              $nx = '';
 270                                          }
 271                                          $nx .= $sdelim . $y;
 272                                      }
 273                                  }
 274                              }
 275                              if ($nx !== null) {
 276                                  if ($nsel === null) {
 277                                      $nsel = $nx;
 278                                  } else {
 279                                      $nsel .= $delim . $nx;
 280                                  }
 281                              } else {
 282                                  // delimiters to the left of invalid
 283                                  // basic selector ignored
 284                              }
 285                          }
 286                      }
 287                      if ($nsel !== null) {
 288                          if (!empty($scopes)) {
 289                              foreach ($scopes as $s) {
 290                                  $new_selectors[] = "$s $nsel";
 291                              }
 292                          } else {
 293                              $new_selectors[] = $nsel;
 294                          }
 295                      }
 296                  }
 297                  if (empty($new_selectors)) {
 298                      continue;
 299                  }
 300                  $selector = implode(', ', $new_selectors);
 301                  foreach ($style as $name => $value) {
 302                      if (!isset($css_definition->info[$name])) {
 303                          unset($style[$name]);
 304                          continue;
 305                      }
 306                      $def = $css_definition->info[$name];
 307                      $ret = $def->validate($value, $config, $context);
 308                      if ($ret === false) {
 309                          unset($style[$name]);
 310                      } else {
 311                          $style[$name] = $ret;
 312                      }
 313                  }
 314                  $new_decls[$selector] = $style;
 315              }
 316              $new_css[$k] = $new_decls;
 317          }
 318          // remove stuff that shouldn't be used, could be reenabled
 319          // after security risks are analyzed
 320          $this->_tidy->css = $new_css;
 321          $this->_tidy->import = array();
 322          $this->_tidy->charset = null;
 323          $this->_tidy->namespace = null;
 324          $css = $this->_tidy->print->plain();
 325          // we are going to escape any special characters <>& to ensure
 326          // that no funny business occurs (i.e. </style> in a font-family prop).
 327          if ($config->get('Filter.ExtractStyleBlocks.Escaping')) {
 328              $css = str_replace(
 329                  array('<', '>', '&'),
 330                  array('\3C ', '\3E ', '\26 '),
 331                  $css
 332              );
 333          }
 334          return $css;
 335      }
 336  }
 337  
 338  // vim: et sw=4 sts=4


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1