[ Index ]

PHP Cross Reference of Wordpress 2.7.1

title

Body

[close]

/wp-includes/ -> kses.php (source)

   1  <?php
   2  /**
   3   * HTML/XHTML filter that only allows some elements and attributes
   4   *
   5   * Added wp_ prefix to avoid conflicts with existing kses users
   6   *
   7   * @version 0.2.2
   8   * @copyright (C) 2002, 2003, 2005
   9   * @author Ulf Harnhammar <metaur@users.sourceforge.net>
  10   *
  11   * @package External
  12   * @subpackage KSES
  13   *
  14   * @internal
  15   * *** CONTACT INFORMATION ***
  16   * E-mail:      metaur at users dot sourceforge dot net
  17   * Web page:    http://sourceforge.net/projects/kses
  18   * Paper mail:  Ulf Harnhammar
  19   *              Ymergatan 17 C
  20   *              753 25  Uppsala
  21   *              SWEDEN
  22   *
  23   * [kses strips evil scripts!]
  24   */
  25  
  26  /**
  27   * You can override this in your my-hacks.php file You can also override this
  28   * in a plugin file. The my-hacks.php is deprecated in its usage.
  29   *
  30   * @since 1.2.0
  31   */
  32  if (!defined('CUSTOM_TAGS'))
  33      define('CUSTOM_TAGS', false);
  34  
  35  if (!CUSTOM_TAGS) {
  36      /**
  37       * Kses global for default allowable HTML tags.
  38       *
  39       * Can be override by using CUSTOM_TAGS constant.
  40       *
  41       * @global array $allowedposttags
  42       * @since 2.0.0
  43       */
  44      $allowedposttags = array(
  45          'address' => array(),
  46          'a' => array(
  47              'class' => array (),
  48              'href' => array (),
  49              'id' => array (),
  50              'title' => array (),
  51              'rel' => array (),
  52              'rev' => array (),
  53              'name' => array (),
  54              'target' => array()),
  55          'abbr' => array(
  56              'class' => array (),
  57              'title' => array ()),
  58          'acronym' => array(
  59              'title' => array ()),
  60          'b' => array(),
  61          'big' => array(),
  62          'blockquote' => array(
  63              'id' => array (),
  64              'cite' => array (),
  65              'class' => array(),
  66              'lang' => array(),
  67              'xml:lang' => array()),
  68          'br' => array (
  69              'class' => array ()),
  70          'button' => array(
  71              'disabled' => array (),
  72              'name' => array (),
  73              'type' => array (),
  74              'value' => array ()),
  75          'caption' => array(
  76              'align' => array (),
  77              'class' => array ()),
  78          'cite' => array (
  79              'class' => array(),
  80              'dir' => array(),
  81              'lang' => array(),
  82              'title' => array ()),
  83          'code' => array (
  84              'style' => array()),
  85          'col' => array(
  86              'align' => array (),
  87              'char' => array (),
  88              'charoff' => array (),
  89              'span' => array (),
  90              'dir' => array(),
  91              'style' => array (),
  92              'valign' => array (),
  93              'width' => array ()),
  94          'del' => array(
  95              'datetime' => array ()),
  96          'dd' => array(),
  97          'div' => array(
  98              'align' => array (),
  99              'class' => array (),
 100              'dir' => array (),
 101              'lang' => array(),
 102              'style' => array (),
 103              'xml:lang' => array()),
 104          'dl' => array(),
 105          'dt' => array(),
 106          'em' => array(),
 107          'fieldset' => array(),
 108          'font' => array(
 109              'color' => array (),
 110              'face' => array (),
 111              'size' => array ()),
 112          'form' => array(
 113              'action' => array (),
 114              'accept' => array (),
 115              'accept-charset' => array (),
 116              'enctype' => array (),
 117              'method' => array (),
 118              'name' => array (),
 119              'target' => array ()),
 120          'h1' => array(
 121              'align' => array (),
 122              'class' => array ()),
 123          'h2' => array(
 124              'align' => array (),
 125              'class' => array ()),
 126          'h3' => array(
 127              'align' => array (),
 128              'class' => array ()),
 129          'h4' => array(
 130              'align' => array (),
 131              'class' => array ()),
 132          'h5' => array(
 133              'align' => array (),
 134              'class' => array ()),
 135          'h6' => array(
 136              'align' => array (),
 137              'class' => array ()),
 138          'hr' => array(
 139              'align' => array (),
 140              'class' => array (),
 141              'noshade' => array (),
 142              'size' => array (),
 143              'width' => array ()),
 144          'i' => array(),
 145          'img' => array(
 146              'alt' => array (),
 147              'align' => array (),
 148              'border' => array (),
 149              'class' => array (),
 150              'height' => array (),
 151              'hspace' => array (),
 152              'longdesc' => array (),
 153              'vspace' => array (),
 154              'src' => array (),
 155              'style' => array (),
 156              'width' => array ()),
 157          'ins' => array(
 158              'datetime' => array (),
 159              'cite' => array ()),
 160          'kbd' => array(),
 161          'label' => array(
 162              'for' => array ()),
 163          'legend' => array(
 164              'align' => array ()),
 165          'li' => array (
 166              'align' => array (),
 167              'class' => array ()),
 168          'p' => array(
 169              'class' => array (),
 170              'align' => array (),
 171              'dir' => array(),
 172              'lang' => array(),
 173              'style' => array (),
 174              'xml:lang' => array()),
 175          'pre' => array(
 176              'style' => array(),
 177              'width' => array ()),
 178          'q' => array(
 179              'cite' => array ()),
 180          's' => array(),
 181          'span' => array (
 182              'class' => array (),
 183              'dir' => array (),
 184              'align' => array (),
 185              'lang' => array (),
 186              'style' => array (),
 187              'title' => array (),
 188              'xml:lang' => array()),
 189          'strike' => array(),
 190          'strong' => array(),
 191          'sub' => array(),
 192          'sup' => array(),
 193          'table' => array(
 194              'align' => array (),
 195              'bgcolor' => array (),
 196              'border' => array (),
 197              'cellpadding' => array (),
 198              'cellspacing' => array (),
 199              'class' => array (),
 200              'dir' => array(),
 201              'id' => array(),
 202              'rules' => array (),
 203              'style' => array (),
 204              'summary' => array (),
 205              'width' => array ()),
 206          'tbody' => array(
 207              'align' => array (),
 208              'char' => array (),
 209              'charoff' => array (),
 210              'valign' => array ()),
 211          'td' => array(
 212              'abbr' => array (),
 213              'align' => array (),
 214              'axis' => array (),
 215              'bgcolor' => array (),
 216              'char' => array (),
 217              'charoff' => array (),
 218              'class' => array (),
 219              'colspan' => array (),
 220              'dir' => array(),
 221              'headers' => array (),
 222              'height' => array (),
 223              'nowrap' => array (),
 224              'rowspan' => array (),
 225              'scope' => array (),
 226              'style' => array (),
 227              'valign' => array (),
 228              'width' => array ()),
 229          'textarea' => array(
 230              'cols' => array (),
 231              'rows' => array (),
 232              'disabled' => array (),
 233              'name' => array (),
 234              'readonly' => array ()),
 235          'tfoot' => array(
 236              'align' => array (),
 237              'char' => array (),
 238              'class' => array (),
 239              'charoff' => array (),
 240              'valign' => array ()),
 241          'th' => array(
 242              'abbr' => array (),
 243              'align' => array (),
 244              'axis' => array (),
 245              'bgcolor' => array (),
 246              'char' => array (),
 247              'charoff' => array (),
 248              'class' => array (),
 249              'colspan' => array (),
 250              'headers' => array (),
 251              'height' => array (),
 252              'nowrap' => array (),
 253              'rowspan' => array (),
 254              'scope' => array (),
 255              'valign' => array (),
 256              'width' => array ()),
 257          'thead' => array(
 258              'align' => array (),
 259              'char' => array (),
 260              'charoff' => array (),
 261              'class' => array (),
 262              'valign' => array ()),
 263          'title' => array(),
 264          'tr' => array(
 265              'align' => array (),
 266              'bgcolor' => array (),
 267              'char' => array (),
 268              'charoff' => array (),
 269              'class' => array (),
 270              'style' => array (),
 271              'valign' => array ()),
 272          'tt' => array(),
 273          'u' => array(),
 274          'ul' => array (
 275              'class' => array (),
 276              'style' => array (),
 277              'type' => array ()),
 278          'ol' => array (
 279              'class' => array (),
 280              'start' => array (),
 281              'style' => array (),
 282              'type' => array ()),
 283          'var' => array ());
 284  
 285      /**
 286       * Kses allowed HTML elements.
 287       *
 288       * @global array $allowedtags
 289       * @since 1.0.0
 290       */
 291      $allowedtags = array(
 292          'a' => array(
 293              'href' => array (),
 294              'title' => array ()),
 295          'abbr' => array(
 296              'title' => array ()),
 297          'acronym' => array(
 298              'title' => array ()),
 299          'b' => array(),
 300          'blockquote' => array(
 301              'cite' => array ()),
 302          //    'br' => array(),
 303          'cite' => array (),
 304          'code' => array(),
 305          'del' => array(
 306              'datetime' => array ()),
 307          //    'dd' => array(),
 308          //    'dl' => array(),
 309          //    'dt' => array(),
 310          'em' => array (), 'i' => array (),
 311          //    'ins' => array('datetime' => array(), 'cite' => array()),
 312          //    'li' => array(),
 313          //    'ol' => array(),
 314          //    'p' => array(),
 315          'q' => array(
 316              'cite' => array ()),
 317          'strike' => array(),
 318          'strong' => array(),
 319          //    'sub' => array(),
 320          //    'sup' => array(),
 321          //    'u' => array(),
 322          //    'ul' => array(),
 323      );
 324  }
 325  
 326  /**
 327   * Filters content and keeps only allowable HTML elements.
 328   *
 329   * This function makes sure that only the allowed HTML element names, attribute
 330   * names and attribute values plus only sane HTML entities will occur in
 331   * $string. You have to remove any slashes from PHP's magic quotes before you
 332   * call this function.
 333   *
 334   * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news',
 335   * 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This covers all common
 336   * link protocols, except for 'javascript' which should not be allowed for
 337   * untrusted users.
 338   *
 339   * @since 1.0.0
 340   *
 341   * @param string $string Content to filter through kses
 342   * @param array $allowed_html List of allowed HTML elements
 343   * @param array $allowed_protocols Optional. Allowed protocol in links.
 344   * @return string Filtered content with only allowed HTML elements
 345   */
 346  function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) {
 347      $string = wp_kses_no_null($string);
 348      $string = wp_kses_js_entities($string);
 349      $string = wp_kses_normalize_entities($string);
 350      $allowed_html_fixed = wp_kses_array_lc($allowed_html);
 351      $string = wp_kses_hook($string, $allowed_html_fixed, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook
 352      return wp_kses_split($string, $allowed_html_fixed, $allowed_protocols);
 353  }
 354  
 355  /**
 356   * You add any kses hooks here.
 357   *
 358   * There is currently only one kses WordPress hook and it is called here. All
 359   * parameters are passed to the hooks and expected to recieve a string.
 360   *
 361   * @since 1.0.0
 362   *
 363   * @param string $string Content to filter through kses
 364   * @param array $allowed_html List of allowed HTML elements
 365   * @param array $allowed_protocols Allowed protocol in links
 366   * @return string Filtered content through 'pre_kses' hook
 367   */
 368  function wp_kses_hook($string, $allowed_html, $allowed_protocols) {
 369      $string = apply_filters('pre_kses', $string, $allowed_html, $allowed_protocols);
 370      return $string;
 371  }
 372  
 373  /**
 374   * This function returns kses' version number.
 375   *
 376   * @since 1.0.0
 377   *
 378   * @return string KSES Version Number
 379   */
 380  function wp_kses_version() {
 381      return '0.2.2';
 382  }
 383  
 384  /**
 385   * Searches for HTML tags, no matter how malformed.
 386   *
 387   * It also matches stray ">" characters.
 388   *
 389   * @since 1.0.0
 390   *
 391   * @param string $string Content to filter
 392   * @param array $allowed_html Allowed HTML elements
 393   * @param array $allowed_protocols Allowed protocols to keep
 394   * @return string Content with fixed HTML tags
 395   */
 396  function wp_kses_split($string, $allowed_html, $allowed_protocols) {
 397      global $pass_allowed_html, $pass_allowed_protocols;
 398      $pass_allowed_html = $allowed_html;
 399      $pass_allowed_protocols = $allowed_protocols;
 400      return preg_replace_callback('%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%',
 401          create_function('$match', 'global $pass_allowed_html, $pass_allowed_protocols; return wp_kses_split2($match[1], $pass_allowed_html, $pass_allowed_protocols);'), $string);
 402  }
 403  
 404  /**
 405   * Callback for wp_kses_split for fixing malformed HTML tags.
 406   *
 407   * This function does a lot of work. It rejects some very malformed things like
 408   * <:::>. It returns an empty string, if the element isn't allowed (look ma, no
 409   * strip_tags()!). Otherwise it splits the tag into an element and an attribute
 410   * list.
 411   *
 412   * After the tag is split into an element and an attribute list, it is run
 413   * through another filter which will remove illegal attributes and once that is
 414   * completed, will be returned.
 415   *
 416   * @access private
 417   * @since 1.0.0
 418   * @uses wp_kses_attr()
 419   *
 420   * @param string $string Content to filter
 421   * @param array $allowed_html Allowed HTML elements
 422   * @param array $allowed_protocols Allowed protocols to keep
 423   * @return string Fixed HTML element
 424   */
 425  function wp_kses_split2($string, $allowed_html, $allowed_protocols) {
 426      $string = wp_kses_stripslashes($string);
 427  
 428      if (substr($string, 0, 1) != '<')
 429          return '&gt;';
 430      # It matched a ">" character
 431  
 432      if (preg_match('%^<!--(.*?)(-->)?$%', $string, $matches)) {
 433          $string = str_replace(array('<!--', '-->'), '', $matches[1]);
 434          while ( $string != $newstring = wp_kses($string, $allowed_html, $allowed_protocols) )
 435              $string = $newstring;
 436          if ( $string == '' )
 437              return '';
 438          // prevent multiple dashes in comments
 439          $string = preg_replace('/--+/', '-', $string);
 440          // prevent three dashes closing a comment
 441          $string = preg_replace('/-$/', '', $string);
 442          return "<!--{$string}-->";
 443      }
 444      # Allow HTML comments
 445  
 446      if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
 447          return '';
 448      # It's seriously malformed
 449  
 450      $slash = trim($matches[1]);
 451      $elem = $matches[2];
 452      $attrlist = $matches[3];
 453  
 454      if (!@isset($allowed_html[strtolower($elem)]))
 455          return '';
 456      # They are using a not allowed HTML element
 457  
 458      if ($slash != '')
 459          return "<$slash$elem>";
 460      # No attributes are allowed for closing elements
 461  
 462      return wp_kses_attr("$slash$elem", $attrlist, $allowed_html, $allowed_protocols);
 463  }
 464  
 465  /**
 466   * Removes all attributes, if none are allowed for this element.
 467   *
 468   * If some are allowed it calls wp_kses_hair() to split them further, and then
 469   * it builds up new HTML code from the data that kses_hair() returns. It also
 470   * removes "<" and ">" characters, if there are any left. One more thing it does
 471   * is to check if the tag has a closing XHTML slash, and if it does, it puts one
 472   * in the returned code as well.
 473   *
 474   * @since 1.0.0
 475   *
 476   * @param string $element HTML element/tag
 477   * @param string $attr HTML attributes from HTML element to closing HTML element tag
 478   * @param array $allowed_html Allowed HTML elements
 479   * @param array $allowed_protocols Allowed protocols to keep
 480   * @return string Sanitized HTML element
 481   */
 482  function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) {
 483      # Is there a closing XHTML slash at the end of the attributes?
 484  
 485      $xhtml_slash = '';
 486      if (preg_match('%\s/\s*$%', $attr))
 487          $xhtml_slash = ' /';
 488  
 489      # Are any attributes allowed at all for this element?
 490  
 491      if (@ count($allowed_html[strtolower($element)]) == 0)
 492          return "<$element$xhtml_slash>";
 493  
 494      # Split it
 495  
 496      $attrarr = wp_kses_hair($attr, $allowed_protocols);
 497  
 498      # Go through $attrarr, and save the allowed attributes for this element
 499      # in $attr2
 500  
 501      $attr2 = '';
 502  
 503      foreach ($attrarr as $arreach) {
 504          if (!@ isset ($allowed_html[strtolower($element)][strtolower($arreach['name'])]))
 505              continue; # the attribute is not allowed
 506  
 507          $current = $allowed_html[strtolower($element)][strtolower($arreach['name'])];
 508          if ($current == '')
 509              continue; # the attribute is not allowed
 510  
 511          if (!is_array($current))
 512              $attr2 .= ' '.$arreach['whole'];
 513          # there are no checks
 514  
 515          else {
 516              # there are some checks
 517              $ok = true;
 518              foreach ($current as $currkey => $currval)
 519                  if (!wp_kses_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval)) {
 520                      $ok = false;
 521                      break;
 522                  }
 523  
 524              if ($ok)
 525                  $attr2 .= ' '.$arreach['whole']; # it passed them
 526          } # if !is_array($current)
 527      } # foreach
 528  
 529      # Remove any "<" or ">" characters
 530  
 531      $attr2 = preg_replace('/[<>]/', '', $attr2);
 532  
 533      return "<$element$attr2$xhtml_slash>";
 534  }
 535  
 536  /**
 537   * Builds an attribute list from string containing attributes.
 538   *
 539   * This function does a lot of work. It parses an attribute list into an array
 540   * with attribute data, and tries to do the right thing even if it gets weird
 541   * input. It will add quotes around attribute values that don't have any quotes
 542   * or apostrophes around them, to make it easier to produce HTML code that will
 543   * conform to W3C's HTML specification. It will also remove bad URL protocols
 544   * from attribute values.  It also reduces duplicate attributes by using the
 545   * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
 546   *
 547   * @since 1.0.0
 548   *
 549   * @param string $attr Attribute list from HTML element to closing HTML element tag
 550   * @param array $allowed_protocols Allowed protocols to keep
 551   * @return array List of attributes after parsing
 552   */
 553  function wp_kses_hair($attr, $allowed_protocols) {
 554      $attrarr = array ();
 555      $mode = 0;
 556      $attrname = '';
 557      $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
 558  
 559      # Loop through the whole attribute list
 560  
 561      while (strlen($attr) != 0) {
 562          $working = 0; # Was the last operation successful?
 563  
 564          switch ($mode) {
 565              case 0 : # attribute name, href for instance
 566  
 567                  if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
 568                      $attrname = $match[1];
 569                      $working = $mode = 1;
 570                      $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
 571                  }
 572  
 573                  break;
 574  
 575              case 1 : # equals sign or valueless ("selected")
 576  
 577                  if (preg_match('/^\s*=\s*/', $attr)) # equals sign
 578                      {
 579                      $working = 1;
 580                      $mode = 2;
 581                      $attr = preg_replace('/^\s*=\s*/', '', $attr);
 582                      break;
 583                  }
 584  
 585                  if (preg_match('/^\s+/', $attr)) # valueless
 586                      {
 587                      $working = 1;
 588                      $mode = 0;
 589                      if(FALSE === array_key_exists($attrname, $attrarr)) {
 590                          $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
 591                      }
 592                      $attr = preg_replace('/^\s+/', '', $attr);
 593                  }
 594  
 595                  break;
 596  
 597              case 2 : # attribute value, a URL after href= for instance
 598  
 599                  if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match))
 600                      # "value"
 601                      {
 602                      $thisval = $match[1];
 603                      if ( in_array($attrname, $uris) )
 604                          $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
 605  
 606                      if(FALSE === array_key_exists($attrname, $attrarr)) {
 607                          $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
 608                      }
 609                      $working = 1;
 610                      $mode = 0;
 611                      $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
 612                      break;
 613                  }
 614  
 615                  if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match))
 616                      # 'value'
 617                      {
 618                      $thisval = $match[1];
 619                      if ( in_array($attrname, $uris) )
 620                          $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
 621  
 622                      if(FALSE === array_key_exists($attrname, $attrarr)) {
 623                          $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n');
 624                      }
 625                      $working = 1;
 626                      $mode = 0;
 627                      $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
 628                      break;
 629                  }
 630  
 631                  if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match))
 632                      # value
 633                      {
 634                      $thisval = $match[1];
 635                      if ( in_array($attrname, $uris) )
 636                          $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
 637  
 638                      if(FALSE === array_key_exists($attrname, $attrarr)) {
 639                          $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
 640                      }
 641                      # We add quotes to conform to W3C's HTML spec.
 642                      $working = 1;
 643                      $mode = 0;
 644                      $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
 645                  }
 646  
 647                  break;
 648          } # switch
 649  
 650          if ($working == 0) # not well formed, remove and try again
 651          {
 652              $attr = wp_kses_html_error($attr);
 653              $mode = 0;
 654          }
 655      } # while
 656  
 657      if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr))
 658          # special case, for when the attribute list ends with a valueless
 659          # attribute like "selected"
 660          $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
 661  
 662      return $attrarr;
 663  }
 664  
 665  /**
 666   * Performs different checks for attribute values.
 667   *
 668   * The currently implemented checks are "maxlen", "minlen", "maxval", "minval"
 669   * and "valueless" with even more checks to come soon.
 670   *
 671   * @since 1.0.0
 672   *
 673   * @param string $value Attribute value
 674   * @param string $vless Whether the value is valueless or not. Use 'y' or 'n'
 675   * @param string $checkname What $checkvalue is checking for.
 676   * @param mixed $checkvalue What constraint the value should pass
 677   * @return bool Whether check passes (true) or not (false)
 678   */
 679  function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) {
 680      $ok = true;
 681  
 682      switch (strtolower($checkname)) {
 683          case 'maxlen' :
 684              # The maxlen check makes sure that the attribute value has a length not
 685              # greater than the given value. This can be used to avoid Buffer Overflows
 686              # in WWW clients and various Internet servers.
 687  
 688              if (strlen($value) > $checkvalue)
 689                  $ok = false;
 690              break;
 691  
 692          case 'minlen' :
 693              # The minlen check makes sure that the attribute value has a length not
 694              # smaller than the given value.
 695  
 696              if (strlen($value) < $checkvalue)
 697                  $ok = false;
 698              break;
 699  
 700          case 'maxval' :
 701              # The maxval check does two things: it checks that the attribute value is
 702              # an integer from 0 and up, without an excessive amount of zeroes or
 703              # whitespace (to avoid Buffer Overflows). It also checks that the attribute
 704              # value is not greater than the given value.
 705              # This check can be used to avoid Denial of Service attacks.
 706  
 707              if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
 708                  $ok = false;
 709              if ($value > $checkvalue)
 710                  $ok = false;
 711              break;
 712  
 713          case 'minval' :
 714              # The minval check checks that the attribute value is a positive integer,
 715              # and that it is not smaller than the given value.
 716  
 717              if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
 718                  $ok = false;
 719              if ($value < $checkvalue)
 720                  $ok = false;
 721              break;
 722  
 723          case 'valueless' :
 724              # The valueless check checks if the attribute has a value
 725              # (like <a href="blah">) or not (<option selected>). If the given value
 726              # is a "y" or a "Y", the attribute must not have a value.
 727              # If the given value is an "n" or an "N", the attribute must have one.
 728  
 729              if (strtolower($checkvalue) != $vless)
 730                  $ok = false;
 731              break;
 732      } # switch
 733  
 734      return $ok;
 735  }
 736  
 737  /**
 738   * Sanitize string from bad protocols.
 739   *
 740   * This function removes all non-allowed protocols from the beginning of
 741   * $string. It ignores whitespace and the case of the letters, and it does
 742   * understand HTML entities. It does its work in a while loop, so it won't be
 743   * fooled by a string like "javascript:javascript:alert(57)".
 744   *
 745   * @since 1.0.0
 746   *
 747   * @param string $string Content to filter bad protocols from
 748   * @param array $allowed_protocols Allowed protocols to keep
 749   * @return string Filtered content
 750   */
 751  function wp_kses_bad_protocol($string, $allowed_protocols) {
 752      $string = wp_kses_no_null($string);
 753      $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
 754      $string2 = $string.'a';
 755  
 756      while ($string != $string2) {
 757          $string2 = $string;
 758          $string = wp_kses_bad_protocol_once($string, $allowed_protocols);
 759      } # while
 760  
 761      return $string;
 762  }
 763  
 764  /**
 765   * Removes any NULL characters in $string.
 766   *
 767   * @since 1.0.0
 768   *
 769   * @param string $string
 770   * @return string
 771   */
 772  function wp_kses_no_null($string) {
 773      $string = preg_replace('/\0+/', '', $string);
 774      $string = preg_replace('/(\\\\0)+/', '', $string);
 775  
 776      return $string;
 777  }
 778  
 779  /**
 780   * Strips slashes from in front of quotes.
 781   *
 782   * This function changes the character sequence  \"  to just  ". It leaves all
 783   * other slashes alone. It's really weird, but the quoting from
 784   * preg_replace(//e) seems to require this.
 785   *
 786   * @since 1.0.0
 787   *
 788   * @param string $string String to strip slashes
 789   * @return string Fixed strings with quoted slashes
 790   */
 791  function wp_kses_stripslashes($string) {
 792      return preg_replace('%\\\\"%', '"', $string);
 793  }
 794  
 795  /**
 796   * Goes through an array and changes the keys to all lower case.
 797   *
 798   * @since 1.0.0
 799   *
 800   * @param array $inarray Unfiltered array
 801   * @return array Fixed array with all lowercase keys
 802   */
 803  function wp_kses_array_lc($inarray) {
 804      $outarray = array ();
 805  
 806      foreach ( (array) $inarray as $inkey => $inval) {
 807          $outkey = strtolower($inkey);
 808          $outarray[$outkey] = array ();
 809  
 810          foreach ( (array) $inval as $inkey2 => $inval2) {
 811              $outkey2 = strtolower($inkey2);
 812              $outarray[$outkey][$outkey2] = $inval2;
 813          } # foreach $inval
 814      } # foreach $inarray
 815  
 816      return $outarray;
 817  }
 818  
 819  /**
 820   * Removes the HTML JavaScript entities found in early versions of Netscape 4.
 821   *
 822   * @since 1.0.0
 823   *
 824   * @param string $string
 825   * @return string
 826   */
 827  function wp_kses_js_entities($string) {
 828      return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
 829  }
 830  
 831  /**
 832   * Handles parsing errors in wp_kses_hair().
 833   *
 834   * The general plan is to remove everything to and including some whitespace,
 835   * but it deals with quotes and apostrophes as well.
 836   *
 837   * @since 1.0.0
 838   *
 839   * @param string $string
 840   * @return string
 841   */
 842  function wp_kses_html_error($string) {
 843      return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
 844  }
 845  
 846  /**
 847   * Sanitizes content from bad protocols and other characters.
 848   *
 849   * This function searches for URL protocols at the beginning of $string, while
 850   * handling whitespace and HTML entities.
 851   *
 852   * @since 1.0.0
 853   *
 854   * @param string $string Content to check for bad protocols
 855   * @param string $allowed_protocols Allowed protocols
 856   * @return string Sanitized content
 857   */
 858  function wp_kses_bad_protocol_once($string, $allowed_protocols) {
 859      global $_kses_allowed_protocols;
 860      $_kses_allowed_protocols = $allowed_protocols;
 861  
 862      $string2 = preg_split('/:|&#58;|&#x3a;/i', $string, 2);
 863      if ( isset($string2[1]) && !preg_match('%/\?%', $string2[0]) )
 864          $string = wp_kses_bad_protocol_once2($string2[0]) . trim($string2[1]);
 865      else
 866          $string = preg_replace_callback('/^((&[^;]*;|[\sA-Za-z0-9])*)'.'(:|&#58;|&#[Xx]3[Aa];)\s*/', 'wp_kses_bad_protocol_once2', $string);
 867  
 868      return $string;
 869  }
 870  
 871  /**
 872   * Callback for wp_kses_bad_protocol_once() regular expression.
 873   *
 874   * This function processes URL protocols, checks to see if they're in the
 875   * white-list or not, and returns different data depending on the answer.
 876   *
 877   * @access private
 878   * @since 1.0.0
 879   *
 880   * @param mixed $matches string or preg_replace_callback() matches array to check for bad protocols
 881   * @return string Sanitized content
 882   */
 883  function wp_kses_bad_protocol_once2($matches) {
 884      global $_kses_allowed_protocols;
 885  
 886      if ( is_array($matches) ) {
 887          if ( ! isset($matches[1]) || empty($matches[1]) )
 888              return '';
 889  
 890          $string = $matches[1];
 891      } else {
 892          $string = $matches;
 893      }
 894  
 895      $string2 = wp_kses_decode_entities($string);
 896      $string2 = preg_replace('/\s/', '', $string2);
 897      $string2 = wp_kses_no_null($string2);
 898      $string2 = preg_replace('/\xad+/', '', $string2);
 899      # deals with Opera "feature"
 900      $string2 = strtolower($string2);
 901  
 902      $allowed = false;
 903      foreach ( (array) $_kses_allowed_protocols as $one_protocol)
 904          if (strtolower($one_protocol) == $string2) {
 905              $allowed = true;
 906              break;
 907          }
 908  
 909      if ($allowed)
 910          return "$string2:";
 911      else
 912          return '';
 913  }
 914  
 915  /**
 916   * Converts and fixes HTML entities.
 917   *
 918   * This function normalizes HTML entities. It will convert "AT&T" to the correct
 919   * "AT&amp;T", "&#00058;" to "&#58;", "&#XYZZY;" to "&amp;#XYZZY;" and so on.
 920   *
 921   * @since 1.0.0
 922   *
 923   * @param string $string Content to normalize entities
 924   * @return string Content with normalized entities
 925   */
 926  function wp_kses_normalize_entities($string) {
 927      # Disarm all entities by converting & to &amp;
 928  
 929      $string = str_replace('&', '&amp;', $string);
 930  
 931      # Change back the allowed entities in our entity whitelist
 932  
 933      $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string);
 934      $string = preg_replace_callback('/&amp;#0*([0-9]{1,5});/', 'wp_kses_normalize_entities2', $string);
 935      $string = preg_replace_callback('/&amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 'wp_kses_normalize_entities3', $string);
 936  
 937      return $string;
 938  }
 939  
 940  /**
 941   * Callback for wp_kses_normalize_entities() regular expression.
 942   *
 943   * This function helps wp_kses_normalize_entities() to only accept 16 bit values
 944   * and nothing more for &#number; entities.
 945   *
 946   * @access private
 947   * @since 1.0.0
 948   *
 949   * @param array $matches preg_replace_callback() matches array
 950   * @return string Correctly encoded entity
 951   */
 952  function wp_kses_normalize_entities2($matches) {
 953      if ( ! isset($matches[1]) || empty($matches[1]) )
 954          return '';
 955  
 956      $i = $matches[1];
 957      return ( ( ! valid_unicode($i) ) || ($i > 65535) ? "&amp;#$i;" : "&#$i;" );
 958  }
 959  
 960  /**
 961   * Callback for wp_kses_normalize_entities() for regular expression.
 962   *
 963   * This function helps wp_kses_normalize_entities() to only accept valid Unicode
 964   * numeric entities in hex form.
 965   *
 966   * @access private
 967   *
 968   * @param array $matches preg_replace_callback() matches array
 969   * @return string Correctly encoded entity
 970   */
 971  function wp_kses_normalize_entities3($matches) {
 972      if ( ! isset($matches[2]) || empty($matches[2]) )
 973          return '';
 974  
 975      $hexchars = $matches[2];
 976      return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&amp;#x$hexchars;" : "&#x$hexchars;" );
 977  }
 978  
 979  /**
 980   * Helper function to determine if a Unicode value is valid.
 981   *
 982   * @param int $i Unicode value
 983   * @return bool true if the value was a valid Unicode number
 984   */
 985  function valid_unicode($i) {
 986      return ( $i == 0x9 || $i == 0xa || $i == 0xd ||
 987              ($i >= 0x20 && $i <= 0xd7ff) ||
 988              ($i >= 0xe000 && $i <= 0xfffd) ||
 989              ($i >= 0x10000 && $i <= 0x10ffff) );
 990  }
 991  
 992  /**
 993   * Convert all entities to their character counterparts.
 994   *
 995   * This function decodes numeric HTML entities (&#65; and &#x41;). It doesn't do
 996   * anything with other entities like &auml;, but we don't need them in the URL
 997   * protocol whitelisting system anyway.
 998   *
 999   * @since 1.0.0
1000   *
1001   * @param string $string Content to change entities
1002   * @return string Content after decoded entities
1003   */
1004  function wp_kses_decode_entities($string) {
1005      $string = preg_replace_callback('/&#([0-9]+);/', create_function('$match', 'return chr($match[1]);'), $string);
1006      $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', create_function('$match', 'return chr(hexdec($match[1]));'), $string);
1007  
1008      return $string;
1009  }
1010  
1011  /**
1012   * Sanitize content with allowed HTML Kses rules.
1013   *
1014   * @since 1.0.0
1015   * @uses $allowedtags
1016   *
1017   * @param string $data Content to filter
1018   * @return string Filtered content
1019   */
1020  function wp_filter_kses($data) {
1021      global $allowedtags;
1022      return addslashes( wp_kses(stripslashes( $data ), $allowedtags) );
1023  }
1024  
1025  /**
1026   * Sanitize content for allowed HTML tags for post content.
1027   *
1028   * Post content refers to the page contents of the 'post' type and not $_POST
1029   * data from forms.
1030   *
1031   * @since 2.0.0
1032   * @uses $allowedposttags
1033   *
1034   * @param string $data Post content to filter
1035   * @return string Filtered post content with allowed HTML tags and attributes intact.
1036   */
1037  function wp_filter_post_kses($data) {
1038      global $allowedposttags;
1039      return addslashes ( wp_kses(stripslashes( $data ), $allowedposttags) );
1040  }
1041  
1042  /**
1043   * Strips all of the HTML in the content.
1044   *
1045   * @since 2.1.0
1046   *
1047   * @param string $data Content to strip all HTML from
1048   * @return string Filtered content without any HTML
1049   */
1050  function wp_filter_nohtml_kses($data) {
1051      return addslashes ( wp_kses(stripslashes( $data ), array()) );
1052  }
1053  
1054  /**
1055   * Adds all Kses input form content filters.
1056   *
1057   * All hooks have default priority. The wp_filter_kses() function is added to
1058   * the 'pre_comment_content' and 'title_save_pre' hooks.
1059   *
1060   * The wp_filter_post_kses() function is added to the 'content_save_pre',
1061   * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks.
1062   *
1063   * @since 2.0.0
1064   * @uses add_filter() See description for what functions are added to what hooks.
1065   */
1066  function kses_init_filters() {
1067      // Normal filtering.
1068      add_filter('pre_comment_content', 'wp_filter_kses');
1069      add_filter('title_save_pre', 'wp_filter_kses');
1070  
1071      // Post filtering
1072      add_filter('content_save_pre', 'wp_filter_post_kses');
1073      add_filter('excerpt_save_pre', 'wp_filter_post_kses');
1074      add_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1075  }
1076  
1077  /**
1078   * Removes all Kses input form content filters.
1079   *
1080   * A quick procedural method to removing all of the filters that kses uses for
1081   * content in WordPress Loop.
1082   *
1083   * Does not remove the kses_init() function from 'init' hook (priority is
1084   * default). Also does not remove kses_init() function from 'set_current_user'
1085   * hook (priority is also default).
1086   *
1087   * @since 2.0.6
1088   */
1089  function kses_remove_filters() {
1090      // Normal filtering.
1091      remove_filter('pre_comment_content', 'wp_filter_kses');
1092      remove_filter('title_save_pre', 'wp_filter_kses');
1093  
1094      // Post filtering
1095      remove_filter('content_save_pre', 'wp_filter_post_kses');
1096      remove_filter('excerpt_save_pre', 'wp_filter_post_kses');
1097      remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1098  }
1099  
1100  /**
1101   * Sets up most of the Kses filters for input form content.
1102   *
1103   * If you remove the kses_init() function from 'init' hook and
1104   * 'set_current_user' (priority is default), then none of the Kses filter hooks
1105   * will be added.
1106   *
1107   * First removes all of the Kses filters in case the current user does not need
1108   * to have Kses filter the content. If the user does not have unfiltered html
1109   * capability, then Kses filters are added.
1110   *
1111   * @uses kses_remove_filters() Removes the Kses filters
1112   * @uses kses_init_filters() Adds the Kses filters back if the user
1113   *        does not have unfiltered HTML capability.
1114   * @since 2.0.0
1115   */
1116  function kses_init() {
1117      kses_remove_filters();
1118  
1119      if (current_user_can('unfiltered_html') == false)
1120          kses_init_filters();
1121  }
1122  
1123  add_action('init', 'kses_init');
1124  add_action('set_current_user', 'kses_init');
1125  ?>


Generated: Mon Mar 23 16:23:02 2009 Cross-referenced by PHPXref 0.7