Upgrade 1-11.38

This commit is contained in:
xesmyd
2026-03-30 14:10:30 +02:00
parent f2a7e6d1fc
commit ac648ef29d
24665 changed files with 69682 additions and 2205004 deletions
@@ -52,14 +52,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
// attempt to armor stray angled brackets that cannot possibly
// form tags and thus are probably being used as emoticons
if ($config->get('Core.AggressivelyFixLt')) {
$char = '[^a-z!\/]';
$comment = "/<!--(.*?)(-->|\z)/is";
$html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
do {
$old = $html;
$html = preg_replace("/<($char)/i", '&lt;\\1', $html);
} while ($html !== $old);
$html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
$html = $this->aggressivelyFixLt($html);
}
// preprocess html, essential for UTF-8
@@ -288,7 +281,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
*/
public function callbackUndoCommentSubst($matches)
{
return '<!--' . strtr($matches[1], array('&amp;' => '&', '&lt;' => '<')) . $matches[2];
return '<!--' . $this->undoCommentSubstr($matches[1]) . $matches[2];
}
/**
@@ -299,7 +292,25 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
*/
public function callbackArmorCommentEntities($matches)
{
return '<!--' . str_replace('&', '&amp;', $matches[1]) . $matches[2];
return '<!--' . $this->armorEntities($matches[1]) . $matches[2];
}
/**
* @param string $string
* @return string
*/
protected function armorEntities($string)
{
return str_replace('&', '&amp;', $string);
}
/**
* @param string $string
* @return string
*/
protected function undoCommentSubstr($string)
{
return strtr($string, array('&amp;' => '&', '&lt;' => '<'));
}
/**
@@ -335,6 +346,66 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
$ret .= '</body></html>';
return $ret;
}
/**
* @param string $html
* @return string
*/
protected function aggressivelyFixLt($html)
{
$char = '[^a-z!\/]';
$html = $this->manipulateHtmlComments($html, array($this, 'armorEntities'));
do {
$old = $html;
$html = preg_replace("/<($char)/i", '&lt;\\1', $html);
} while ($html !== $old);
return $this->manipulateHtmlComments($html, array($this, 'undoCommentSubstr'));
}
/**
* Modify HTML comments in the given HTML content using a callback.
*
* @param string $html
* @param callable $callback
* @return string
*/
protected function manipulateHtmlComments($html, callable $callback)
{
$offset = 0;
$startTag = '<!--';
$endTag = '-->';
while (($startPos = strpos($html, $startTag, $offset)) !== false) {
$startPos += strlen($startTag); // Move past `<!--`
$endPos = strpos($html, $endTag, $startPos);
if ($endPos === false) {
// No matching ending comment tag found
break;
}
// Extract the original comment content
$commentContent = substr($html, $startPos, $endPos - $startPos);
// Apply the callback to the comment content
$newCommentContent = $callback($commentContent);
// Reconstruct the entire comment with the new content
$newComment = $startTag . $newCommentContent . $endTag;
// Replace the old comment in the HTML content with the new one
$html = substr($html, 0, $startPos - strlen($startTag)) .
$newComment .
substr($html, $endPos + strlen($endTag));
// Move offset to the end of the new comment for the next iteration
$offset = strpos($html, $newComment, $offset) + strlen($newComment);
}
return $html;
}
}
// vim: et sw=4 sts=4
@@ -111,7 +111,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
if ($synchronize_interval && // synchronization is on
$cursor > 0 && // cursor is further than zero
$loops % $synchronize_interval === 0) { // time to synchronize!
$current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
$current_line = 1 + substr_count($html, $nl, 0, $cursor);
}
}
@@ -139,7 +139,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
);
if ($maintain_line_numbers) {
$token->rawPosition($current_line, $current_col);
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
$current_line += substr_count($html, $nl, $cursor, $position_next_lt - $cursor);
}
$array[] = $token;
$cursor = $position_next_lt + 1;
@@ -214,7 +214,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
);
if ($maintain_line_numbers) {
$token->rawPosition($current_line, $current_col);
$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
$current_line += substr_count($html, $nl, $cursor, $strlen_segment);
}
$array[] = $token;
$cursor = $end ? $position_comment_end : $position_comment_end + 3;
@@ -229,7 +229,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$token = new HTMLPurifier_Token_End($type);
if ($maintain_line_numbers) {
$token->rawPosition($current_line, $current_col);
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
$current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
}
$array[] = $token;
$inside_tag = false;
@@ -248,7 +248,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$token = new HTMLPurifier_Token_Text('<');
if ($maintain_line_numbers) {
$token->rawPosition($current_line, $current_col);
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
$current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
}
$array[] = $token;
$inside_tag = false;
@@ -276,7 +276,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
if ($maintain_line_numbers) {
$token->rawPosition($current_line, $current_col);
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
$current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
}
$array[] = $token;
$inside_tag = false;
@@ -310,7 +310,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
if ($maintain_line_numbers) {
$token->rawPosition($current_line, $current_col);
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
$current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
}
$array[] = $token;
$cursor = $position_next_gt + 1;
@@ -343,28 +343,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
return $array;
}
/**
* PHP 5.0.x compatible substr_count that implements offset and length
* @param string $haystack
* @param string $needle
* @param int $offset
* @param int $length
* @return int
*/
protected function substrCount($haystack, $needle, $offset, $length)
{
static $oldVersion;
if ($oldVersion === null) {
$oldVersion = version_compare(PHP_VERSION, '5.1', '<');
}
if ($oldVersion) {
$haystack = substr($haystack, $offset, $length);
return substr_count($haystack, $needle);
} else {
return substr_count($haystack, $needle, $offset, $length);
}
}
/**
* Takes the inside of an HTML tag and makes an assoc array of attributes.
*
@@ -1223,14 +1223,14 @@ class HTML5
'type' => self::COMMENT
);
/* Otherwise if the next seven chacacters are a case-insensitive match
/* Otherwise if the next seven characters are a case-insensitive match
for the word "DOCTYPE", then consume those characters and switch to the
DOCTYPE state. */
} elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
$this->char += 7;
$this->state = 'doctype';
/* Otherwise, is is a parse error. Switch to the bogus comment state.
/* Otherwise, it is a parse error. Switch to the bogus comment state.
The next character that is consumed, if any, is the first character
that will be in the comment. */
} else {