Upgrade 1-11.38

2026-03-30 14:10:30 +02:00
parent f2a7e6d1fc
commit ac648ef29d
24665 changed files with 69682 additions and 2205004 deletions
@@ -7,7 +7,7 @@
 * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
 * FILE, changes will be overwritten the next time the script is run.
 *
- * @version 4.18.0
+ * @version 4.19.0
 *
 * @warning
 *      You must *not* include any other HTML Purifier files before this file,
@@ -19,7 +19,7 @@
 */

 /*
-    HTML Purifier 4.18.0 - Standards Compliant HTML Filtering
+    HTML Purifier 4.19.0 - Standards Compliant HTML Filtering
    Copyright (C) 2006-2008 Edward Z. Yang

    This library is free software; you can redistribute it and/or
@@ -58,12 +58,12 @@ class HTMLPurifier
     * Version of HTML Purifier.
     * @type string
     */
-    public $version = '4.18.0';
+    public $version = '4.19.0';

    /**
     * Constant with version of HTML Purifier.
     */
-    const VERSION = '4.18.0';
+    const VERSION = '4.19.0';

    /**
     * Global configuration object.
@@ -195,7 +195,7 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
            // transforms don't pose a security risk (as \\ and \"
            // might--these escapes are not supported by most browsers).
            // We could try to be clever and use single-quote wrapping
-            // when there is a double quote present, but I have choosen
+            // when there is a double quote present, but I have chosen
            // not to implement that.  (NOTE: you can reduce the amount
            // of escapes by one depending on what quoting style you use)
            // $font = str_replace('\\', '\\5C ', $font);
@@ -25,12 +25,7 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
            'rev' => 'AllowedRev'
        );
        if (!isset($configLookup[$name])) {
-            trigger_error(
-                'Unrecognized attribute name for link ' .
-                'relationship.',
-                E_USER_ERROR
-            );
-            return;
+            throw new Exception('Unrecognized attribute name for link relationship.');
        }
        $this->name = $configLookup[$name];
    }
@@ -37,7 +37,7 @@ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
            }
        }

-        //      IPv4-compatiblity check
+        //      IPv4-compatibility check
        if (preg_match('#(?<=:' . ')' . $this->ip4 . '$#s', $aIP, $find)) {
            $aIP = substr($aIP, 0, 0 - strlen($find[0]));
            $ip = explode('.', $find[0]);
@@ -3,7 +3,7 @@
 // this MUST be placed in post, as it assumes that any value in dir is valid

 /**
- * Post-trasnform that ensures that bdo tags have the dir attribute set.
+ * Post-transform that ensures that bdo tags have the dir attribute set.
 */
 class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
 {
@@ -77,7 +77,7 @@ class HTMLPurifier_AttrTypes
        }

        if (!isset($this->info[$type])) {
-            trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
+            throw new Exception('Cannot retrieve undefined attribute type ' . $type);
            return;
        }
        return $this->info[$type]->make($string);
@@ -135,7 +135,7 @@ class HTMLPurifier_AttrValidator
            // we'd also want slightly more complicated substitution
            // involving an array as the return value,
            // although we're not sure how colliding attributes would
-            // resolve (certain ones would be completely overriden,
+            // resolve (certain ones would be completely overridden,
            // others would prepend themselves).
        }

@@ -5,7 +5,7 @@ if (!defined('HTMLPURIFIER_PREFIX')) {
    define('HTMLPURIFIER_PREFIX', realpath(dirname(__FILE__) . '/..'));
 }

-// accomodations for versions earlier than 5.0.2
+// accommodations for versions earlier than 5.0.2
 // borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
 if (!defined('PHP_EOL')) {
    switch (strtoupper(substr(PHP_OS, 0, 3))) {
@@ -26,6 +26,11 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
            false
        );

+        $this->info['direction'] = new HTMLPurifier_AttrDef_Enum(
+            ['ltr', 'rtl'],
+            false
+        );
+
        $border_style =
            $this->info['border-bottom-style'] =
            $this->info['border-right-style'] =
@@ -21,7 +21,7 @@ class HTMLPurifier_Config
     * HTML Purifier's version
     * @type string
     */
-    public $version = '4.18.0';
+    public $version = '4.19.0';

    /**
     * Whether or not to automatically finalize
@@ -898,7 +898,11 @@ class HTMLPurifier_Config
                break;
            }
        }
-        trigger_error($msg . $extra, $no);
+        if ($no == E_USER_ERROR) {
+          throw new Exception($msg . $extra);
+        } else {
+          trigger_error($msg . $extra, $no);
+        }
    }

    /**
@@ -72,7 +72,7 @@ class HTMLPurifier_ConfigSchema
        $r = unserialize($contents);
        if (!$r) {
            $hash = sha1($contents);
-            trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
+            throw new Exception("Unserialization of configuration schema failed, sha1 of file was $hash");
        }
        return $r;
    }
@@ -66,7 +66,7 @@ class HTMLPurifier_ConfigSchema_Interchange_Directive
    public $version;

    /**
-     * ID of directive that supercedes this old directive.
+     * ID of directive that supersedes this old directive.
     * Null if not deprecated.
     * @type HTMLPurifier_ConfigSchema_Interchange_Id
     */
@@ -5,10 +5,10 @@ DEFAULT: ''
 --DESCRIPTION--
 Temporary prefix for IDs used in conjunction with %Attr.IDPrefix.  If you
 need to allow multiple sets of user content on web page, you may need to
-have a seperate prefix that changes with each iteration.  This way,
-seperately submitted user content displayed on the same page doesn't
+have a separate prefix that changes with each iteration.  This way,
+separately submitted user content displayed on the same page doesn't
 clobber each other. Ideal values are unique identifiers for the content it
 represents (i.e. the id of the row in the database). Be sure to add a
-seperator (like an underscore) at the end.  Warning: this directive will
+separator (like an underscore) at the end.  Warning: this directive will
 not work unless %Attr.IDPrefix is set to a non-empty value!
 --# vim: et sw=4 sts=4
@@ -1,6 +1,6 @@
 CSS.MaxImgLength
 TYPE: string/null
-DEFAULT: '1200px'
+DEFAULT: null
 VERSION: 3.1.1
 --DESCRIPTION--
 <p>
@@ -7,7 +7,8 @@ This parameter determines whether or not the filter should convert
 input that is a full document with html and body tags to a fragment
 of just the contents of a body tag. This parameter is simply something
 HTML Purifier can do during an edge-case: for most inputs, this
-processing is not necessary.
+processing is not necessary. Warning: Full HTML purification has not
+been implemented. See GitHub issue #7.

 --ALIASES--
 Core.AcceptFullDocuments
@@ -8,6 +8,6 @@ converting all non-ASCII characters into decimal numeric entities before
 converting it to its native encoding. This means that even characters that
 can be expressed in the non-UTF-8 encoding will be entity-ized, which can
 be a real downer for encodings like Big5. It also assumes that the ASCII
-repetoire is available, although this is the case for almost all encodings.
+repertoire is available, although this is the case for almost all encodings.
 Anyway, use UTF-8!
 --# vim: et sw=4 sts=4
@@ -16,7 +16,7 @@ DEFAULT: NULL
  </dd>
  <dt><em>string</em> lexer identifier</dt>
  <dd>
-    This is a slim way of manually overridding the implementation.
+    This is a slim way of manually overriding the implementation.
    Currently recognized values are: DOMLex (the default PHP5
 implementation)
    and DirectLex (the default PHP4 implementation). Only use this if
@@ -1,6 +1,6 @@
 HTML.MaxImgLength
 TYPE: int/null
-DEFAULT: 1200
+DEFAULT: null
 VERSION: 3.1.1
 --DESCRIPTION--
 <p>
@@ -6,7 +6,7 @@ DEFAULT: false
 <p>
    Whether or not to permit iframe tags in untrusted documents.  This
    directive must be accompanied by a whitelist of permitted iframes,
-    such as %URI.SafeIframeRegexp, otherwise it will fatally error.
+    such as %URI.SafeIframeRegexp or %URI.SafeIframeHosts, otherwise it will fatally error.
    This directive has no effect on strict doctypes, as iframes are not
    valid.
 </p>
@@ -142,12 +142,11 @@ class HTMLPurifier_ContentSets
        if ($return !== false) {
            return $return;
        }
-        // error-out
-        trigger_error(
+
+        throw new Exception(
            'Could not determine which ChildDef class to instantiate',
            E_USER_ERROR
        );
-        return false;
    }

    /**
@@ -24,11 +24,7 @@ class HTMLPurifier_Context
    public function register($name, &$ref)
    {
        if (array_key_exists($name, $this->_storage)) {
-            trigger_error(
-                "Name $name produces collision, cannot re-register",
-                E_USER_ERROR
-            );
-            return;
+            throw new Exception("Name $name produces collision, cannot re-register");
        }
        $this->_storage[$name] =& $ref;
    }
@@ -43,10 +39,7 @@ class HTMLPurifier_Context
    {
        if (!array_key_exists($name, $this->_storage)) {
            if (!$ignore_error) {
-                trigger_error(
-                    "Attempted to retrieve non-existent variable $name",
-                    E_USER_ERROR
-                );
+                throw new Exception("Attempted to retrieve non-existent variable $name");
            }
            $var = null; // so we can return by reference
            return $var;
@@ -61,11 +54,7 @@ class HTMLPurifier_Context
    public function destroy($name)
    {
        if (!array_key_exists($name, $this->_storage)) {
-            trigger_error(
-                "Attempted to destroy non-existent variable $name",
-                E_USER_ERROR
-            );
-            return;
+            throw new Exception("Attempted to destroy non-existent variable $name");
        }
        unset($this->_storage[$name]);
    }
@@ -139,8 +139,9 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
                continue;
            }
            $key = substr($filename, 0, strlen($filename) - 4);
-            if ($this->isOld($key, $config)) {
-                unlink($dir . '/' . $filename);
+            $file = $dir . '/' . $filename;
+            if ($this->isOld($key, $config) && file_exists($file)) {
+                unlink($file);
            }
        }
        closedir($dh);
@@ -86,7 +86,7 @@ class HTMLPurifier_DoctypeRegistry
            $doctype = $this->aliases[$doctype];
        }
        if (!isset($this->doctypes[$doctype])) {
-            trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR);
+            throw new Exception('Doctype ' . htmlspecialchars($doctype) . ' does not exist');
            $anon = new HTMLPurifier_Doctype($doctype);
            return $anon;
        }
@@ -12,7 +12,7 @@ class HTMLPurifier_Encoder
     */
    private function __construct()
    {
-        trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
+        throw new Exception('Cannot instantiate encoder, call methods statically');
    }

    /**
@@ -390,7 +390,7 @@ class HTMLPurifier_Encoder
            $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
            if ($str === false) {
                // $encoding is not a valid encoding
-                trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
+                throw new Exception('Invalid encoding ' . $encoding);
                return '';
            }
            // If the string is bjorked by Shift_JIS or a similar encoding
@@ -404,12 +404,11 @@ class HTMLPurifier_Encoder
        }
        $bug = HTMLPurifier_Encoder::testIconvTruncateBug();
        if ($bug == self::ICONV_OK) {
-            trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
+            throw new Exception('Encoding not supported, please install iconv');
        } else {
-            trigger_error(
+            throw new Exception(
                'You have a buggy version of iconv, see https://bugs.php.net/bug.php?id=48147 ' .
-                'and http://sourceware.org/bugzilla/show_bug.cgi?id=13541',
-                E_USER_ERROR
+                'and http://sourceware.org/bugzilla/show_bug.cgi?id=13541'
            );
        }
    }
@@ -454,7 +453,7 @@ class HTMLPurifier_Encoder
            $str = mb_convert_encoding($str, 'ISO-8859-1', 'UTF-8');
            return $str;
        }
-        trigger_error('Encoding not supported', E_USER_ERROR);
+        throw new Exception('Encoding not supported');
        // You might be tempted to assume that the ASCII representation
        // might be OK, however, this is *not* universally true over all
        // encodings.  So we take the conservative route here, rather
@@ -545,10 +544,9 @@ class HTMLPurifier_Encoder
            } elseif (($c = strlen($r)) < 9000) {
                $code = self::ICONV_TRUNCATES;
            } elseif ($c > 9000) {
-                trigger_error(
+                throw new Exception(
                    'Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: ' .
-                    'include your iconv version as per phpversion()',
-                    E_USER_ERROR
+                    'include your iconv version as per phpversion()'
                );
            } else {
                $code = self::ICONV_OK;
@@ -5,7 +5,7 @@
 // $config or $context to the callback functions.

 /**
- * Handles referencing and derefencing character entities
+ * Handles referencing and dereferencing character entities
 */
 class HTMLPurifier_EntityParser
 {
@@ -4,7 +4,7 @@
 * Represents a pre or post processing filter on HTML Purifier's output
 *
 * Sometimes, a little ad-hoc fixing of HTML has to be done before
- * it gets sent through HTML Purifier: you can use filters to acheive
+ * it gets sent through HTML Purifier: you can use filters to achieve
 * this effect. For instance, YouTube videos can be preserved using
 * this manner. You could have used a decorator for this task, but
 * PHP's support for them is not terribly robust, so we're going
@@ -19,7 +19,7 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
        $pre_regex = '#<object[^>]+>.+?' .
            '(?:http:)?//www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s';
        $pre_replace = '<span class="youtube-embed">\1</span>';
-        return preg_replace($pre_regex, $pre_replace, $html);
+        return preg_replace($pre_regex, $pre_replace, (string)$html);
    }

    /**
@@ -31,7 +31,7 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
    public function postFilter($html, $config, $context)
    {
        $post_regex = '#<span class="youtube-embed">((?:v|cp)/[A-Za-z0-9\-_=]+)</span>#';
-        return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html);
+        return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), (string)$html);
    }

    /**
@@ -244,7 +244,7 @@ class HTMLPurifier_Generator
            // whitespace (in fact, most don't, at least for attributes
            // like alt, but an extra space at the end is barely
            // noticeable).  Still, we have a configuration knob for
-            // this, since this transformation is not necesary if you
+            // this, since this transformation is not necessary if you
            // don't process user input with innerHTML or you don't plan
            // on supporting Internet Explorer.
            if ($this->_innerHTMLFix) {
@@ -264,9 +264,8 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
        if (isset($this->info_content_sets['Block'][$block_wrapper])) {
            $this->info_block_wrapper = $block_wrapper;
        } else {
-            trigger_error(
-                'Cannot use non-block element as block wrapper',
-                E_USER_ERROR
+            throw new Exception(
+                'Cannot use non-block element as block wrapper'
            );
        }

@@ -276,11 +275,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
            $this->info_parent = $parent;
            $this->info_parent_def = $def;
        } else {
-            trigger_error(
-                'Cannot use unrecognized element as parent',
-                E_USER_ERROR
-            );
-            $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
+            throw new Exception('Cannot use unrecognized element as parent');
        }

        // support template text
@@ -28,7 +28,7 @@ class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule

    // HTML 4.01 specifies that ins/del must not contain block
    // elements when used in an inline context, chameleon is
-    // a complicated workaround to acheive this effect
+    // a complicated workaround to achieve this effect

    // Inline context ! Block context (exclamation mark is
    // separator, see getChildDef for parsing)
@@ -2,7 +2,7 @@

 /**
 * XHTML 1.1 Ruby Annotation Module, defines elements that indicate
- * short runs of text alongside base text for annotation or pronounciation.
+ * short runs of text alongside base text for annotation or pronunciation.
 */
 class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule
 {
@@ -112,9 +112,8 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
            return;
        }
        if (!isset($this->fixesForLevel[$this->defaultLevel])) {
-            trigger_error(
-                'Default level ' . $this->defaultLevel . ' does not exist',
-                E_USER_ERROR
+            throw new Exception(
+                'Default level ' . $this->defaultLevel . ' does not exist'
            );
            return;
        }
@@ -162,8 +161,7 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
                    $e->$type = $fix;
                    break;
                default:
-                    trigger_error("Fix type $type not supported", E_USER_ERROR);
-                    break;
+                    throw new Exception("Fix type $type not supported");
            }
        }
    }
@@ -1,7 +1,7 @@
 <?php

 /**
- * Name is deprecated, but allowed in strict doctypes, so onl
+ * Name is deprecated, but allowed in strict doctypes, so only
 */
 class HTMLPurifier_HTMLModule_Tidy_Name extends HTMLPurifier_HTMLModule_Tidy
 {
@@ -183,11 +183,7 @@ class HTMLPurifier_HTMLModuleManager
            if (!$ok) {
                $module = $original_module;
                if (!class_exists($module)) {
-                    trigger_error(
-                        $original_module . ' module does not exist',
-                        E_USER_ERROR
-                    );
-                    return;
+                    throw new Exception($original_module . ' module does not exist');
                }
            }
            $module = new $module();
@@ -173,14 +173,8 @@ class HTMLPurifier_LanguageFactory

            // infinite recursion guard
            if (isset($languages_seen[$code])) {
-                trigger_error(
-                    'Circular fallback reference in language ' .
-                    $code,
-                    E_USER_ERROR
-                );
-                $fallback = 'en';
+                throw new Exception('Circular fallback reference in language ' . $code);
            }
-            $language_seen[$code] = true;

            // load the fallback recursively
            $this->loadLanguage($fallback);
@@ -238,7 +238,7 @@ class HTMLPurifier_Lexer
     */
    public function tokenizeHTML($string, $config, $context)
    {
-        trigger_error('Call to abstract class', E_USER_ERROR);
+        throw new Exception('Call to abstract class');
    }

    /**
@@ -52,14 +52,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
        // attempt to armor stray angled brackets that cannot possibly
        // form tags and thus are probably being used as emoticons
        if ($config->get('Core.AggressivelyFixLt')) {
-            $char = '[^a-z!\/]';
-            $comment = "/<!--(.*?)(-->|\z)/is";
-            $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
-            do {
-                $old = $html;
-                $html = preg_replace("/<($char)/i", '&lt;\\1', $html);
-            } while ($html !== $old);
-            $html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
+            $html = $this->aggressivelyFixLt($html);
        }

        // preprocess html, essential for UTF-8
@@ -288,7 +281,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
     */
    public function callbackUndoCommentSubst($matches)
    {
-        return '<!--' . strtr($matches[1], array('&amp;' => '&', '&lt;' => '<')) . $matches[2];
+        return '<!--' . $this->undoCommentSubstr($matches[1]) . $matches[2];
    }

    /**
@@ -299,7 +292,25 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
     */
    public function callbackArmorCommentEntities($matches)
    {
-        return '<!--' . str_replace('&', '&amp;', $matches[1]) . $matches[2];
+        return '<!--' . $this->armorEntities($matches[1]) . $matches[2];
+    }
+
+    /**
+     * @param string $string
+     * @return string
+     */
+    protected function armorEntities($string)
+    {
+        return str_replace('&', '&amp;', $string);
+    }
+
+    /**
+     * @param string $string
+     * @return string
+     */
+    protected function undoCommentSubstr($string)
+    {
+        return strtr($string, array('&amp;' => '&', '&lt;' => '<'));
    }

    /**
@@ -335,6 +346,66 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
        $ret .= '</body></html>';
        return $ret;
    }
+
+    /**
+     * @param string $html
+     * @return string
+     */
+    protected function aggressivelyFixLt($html)
+    {
+        $char = '[^a-z!\/]';
+        $html = $this->manipulateHtmlComments($html, array($this, 'armorEntities'));
+
+        do {
+            $old = $html;
+            $html = preg_replace("/<($char)/i", '&lt;\\1', $html);
+        } while ($html !== $old);
+
+        return $this->manipulateHtmlComments($html, array($this, 'undoCommentSubstr'));
+    }
+
+    /**
+     * Modify HTML comments in the given HTML content using a callback.
+     *
+     * @param string $html
+     * @param callable $callback
+     * @return string
+     */
+    protected function manipulateHtmlComments($html, callable $callback)
+    {
+        $offset = 0;
+        $startTag = '<!--';
+        $endTag = '-->';
+
+        while (($startPos = strpos($html, $startTag, $offset)) !== false) {
+            $startPos += strlen($startTag); // Move past `<!--`
+            $endPos = strpos($html, $endTag, $startPos);
+
+            if ($endPos === false) {
+                // No matching ending comment tag found
+                break;
+            }
+
+            // Extract the original comment content
+            $commentContent = substr($html, $startPos, $endPos - $startPos);
+
+            // Apply the callback to the comment content
+            $newCommentContent = $callback($commentContent);
+
+            // Reconstruct the entire comment with the new content
+            $newComment = $startTag . $newCommentContent . $endTag;
+
+            // Replace the old comment in the HTML content with the new one
+            $html = substr($html, 0, $startPos - strlen($startTag)) .
+                $newComment .
+                substr($html, $endPos + strlen($endTag));
+
+            // Move offset to the end of the new comment for the next iteration
+            $offset = strpos($html, $newComment, $offset) + strlen($newComment);
+        }
+
+        return $html;
+    }
 }

 // vim: et sw=4 sts=4
@@ -111,7 +111,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                if ($synchronize_interval && // synchronization is on
                    $cursor > 0 && // cursor is further than zero
                    $loops % $synchronize_interval === 0) { // time to synchronize!
-                    $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
+                    $current_line = 1 + substr_count($html, $nl, 0, $cursor);
                }
            }

@@ -139,7 +139,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                );
                if ($maintain_line_numbers) {
                    $token->rawPosition($current_line, $current_col);
-                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
+                    $current_line += substr_count($html, $nl, $cursor, $position_next_lt - $cursor);
                }
                $array[] = $token;
                $cursor = $position_next_lt + 1;
@@ -214,7 +214,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                    );
                    if ($maintain_line_numbers) {
                        $token->rawPosition($current_line, $current_col);
-                        $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
+                        $current_line += substr_count($html, $nl, $cursor, $strlen_segment);
                    }
                    $array[] = $token;
                    $cursor = $end ? $position_comment_end : $position_comment_end + 3;
@@ -229,7 +229,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                    $token = new HTMLPurifier_Token_End($type);
                    if ($maintain_line_numbers) {
                        $token->rawPosition($current_line, $current_col);
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+                        $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
                    }
                    $array[] = $token;
                    $inside_tag = false;
@@ -248,7 +248,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                    $token = new HTMLPurifier_Token_Text('<');
                    if ($maintain_line_numbers) {
                        $token->rawPosition($current_line, $current_col);
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+                        $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
                    }
                    $array[] = $token;
                    $inside_tag = false;
@@ -276,7 +276,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                    }
                    if ($maintain_line_numbers) {
                        $token->rawPosition($current_line, $current_col);
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+                        $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
                    }
                    $array[] = $token;
                    $inside_tag = false;
@@ -310,7 +310,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                }
                if ($maintain_line_numbers) {
                    $token->rawPosition($current_line, $current_col);
-                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+                    $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
                }
                $array[] = $token;
                $cursor = $position_next_gt + 1;
@@ -343,28 +343,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
        return $array;
    }

-    /**
-     * PHP 5.0.x compatible substr_count that implements offset and length
-     * @param string $haystack
-     * @param string $needle
-     * @param int $offset
-     * @param int $length
-     * @return int
-     */
-    protected function substrCount($haystack, $needle, $offset, $length)
-    {
-        static $oldVersion;
-        if ($oldVersion === null) {
-            $oldVersion = version_compare(PHP_VERSION, '5.1', '<');
-        }
-        if ($oldVersion) {
-            $haystack = substr($haystack, $offset, $length);
-            return substr_count($haystack, $needle);
-        } else {
-            return substr_count($haystack, $needle, $offset, $length);
-        }
-    }
-
    /**
     * Takes the inside of an HTML tag and makes an assoc array of attributes.
     *
@@ -1223,14 +1223,14 @@ class HTML5
                'type' => self::COMMENT
            );

-            /* Otherwise if the next seven chacacters are a case-insensitive match
+            /* Otherwise if the next seven characters are a case-insensitive match
            for the word "DOCTYPE", then consume those characters and switch to the
            DOCTYPE state. */
        } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
            $this->char += 7;
            $this->state = 'doctype';

-            /* Otherwise, is is a parse error. Switch to the bogus comment state.
+            /* Otherwise, it is a parse error. Switch to the bogus comment state.
            The next character that is consumed, if any, is the first character
            that will be in the comment. */
        } else {
@@ -20,7 +20,7 @@
 * The second objective is to ensure that explicitly excluded elements of
 * an element do not appear in its children.  Code that accomplishes this
 * task is pervasive through the strategy, though the two are distinct tasks
- * and could, theoretically, be seperated (although it's not recommended).
+ * and could, theoretically, be separated (although it's not recommended).
 *
 * @note Whether or not unrecognized children are silently dropped or
 *       translated into text depends on the child definitions.
@@ -641,7 +641,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
 //    Needless to say, we need to UN-skip the token so it gets
 //    reprocessed.
 //
-//  - Suppose that you successfuly process a token, replace it with
+//  - Suppose that you successfully process a token, replace it with
 //    one with your skip mark, but now another injector wants to
 //    process the skipped token with another token.  Should you continue
 //    to skip that new token, or reprocess it?  If you reprocess,
@@ -108,7 +108,7 @@ class HTMLPurifier_URI
    public function validate($config, $context)
    {
        // ABNF definitions from RFC 3986
-        $chars_sub_delims = '!$&\'()*+,;=';
+        $chars_sub_delims = $config->get('URI.AllowedSymbols');
        $chars_gen_delims = ':/?#[]@';
        $chars_pchar = $chars_sub_delims . ':@';

@@ -71,7 +71,7 @@ class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
        } // ignore unknown schemes, maybe another postfilter did it
        if (!$scheme_obj->browsable) {
            return true;
-        } // ignore non-browseable schemes, since we can't munge those in a reasonable way
+        } // ignore non-browsable schemes, since we can't munge those in a reasonable way
        if ($uri->isBenign($config, $context)) {
            return true;
        } // don't redirect if a benign URL
@@ -57,11 +57,12 @@ class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter
            return true;
        }
        // check if we actually have some whitelists enabled
-        if ($this->regexp === null) {
-            return false;
+        if ($this->regexp !== null) {
+            return preg_match($this->regexp, $uri->toString());
        }
-        // actually check the whitelists
-        return preg_match($this->regexp, $uri->toString());
+        // check if the host is in a whitelist for safe iframe hosts
+        $safeHosts = $config->get('URI.SafeIframeHosts');
+        return $safeHosts !== null && isset($safeHosts[$uri->host]);
    }
 }

@@ -29,7 +29,7 @@ class HTMLPurifier_URIParser

        // Regexp is as per Appendix B.
        // Note that ["<>] are an addition to the RFC's recommended
-        // characters, because they represent external delimeters.
+        // characters, because they represent external delimiters.
        $r_URI = '!'.
            '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
            '(//([^/?#"<>]*))?'. // 4. Authority
@@ -43,7 +43,7 @@ class HTMLPurifier_URIParser

        if (!$result) return false; // *really* invalid URI

-        // seperate out parts
+        // separate out parts
        $scheme     = !empty($matches[1]) ? $matches[2] : null;
        $authority  = !empty($matches[3]) ? $matches[4] : null;
        $path       = $matches[5]; // always present, can be empty
@@ -105,7 +105,7 @@ class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme
            }
            $image_code = $info[2];
        } else {
-            trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
+            throw new Exception("could not find exif_imagetype or getimagesize functions");
        }
        $real_content_type = image_type_to_mime_type($image_code);
        if ($real_content_type != $content_type) {