- // to disappear during processing:
- // https://bugzilla.wikimedia.org/show_bug.cgi?id=53086
- //
- // Please replace with a better fix if one can be found.
+ // to disappear during processing: https://phabricator.wikimedia.org/T55086
+ // TODO: Please replace with a better fix if one can be found.
$html = str_replace( ' <', ' <', $html );
libxml_use_internal_errors( true );
$html = str_replace( ' <', ' <', $html );
libxml_use_internal_errors( true );
// over them in a foreach loop. It will seemingly leave the internal
// iterator on the foreach out of wack and results will be quite
// strange. Though, making a queue of items to remove seems to work.
// over them in a foreach loop. It will seemingly leave the internal
// iterator on the foreach out of wack and results will be quite
// strange. Though, making a queue of items to remove seems to work.
foreach ( $removals['TAG'] as $tagToRemove ) {
$tagToRemoveNodes = $doc->getElementsByTagName( $tagToRemove );
foreach ( $tagToRemoveNodes as $tagToRemoveNode ) {
foreach ( $removals['TAG'] as $tagToRemove ) {
$tagToRemoveNodes = $doc->getElementsByTagName( $tagToRemove );
foreach ( $tagToRemoveNodes as $tagToRemoveNode ) {
foreach ( $removals['ID'] as $itemToRemove ) {
$itemToRemoveNode = $doc->getElementById( $itemToRemove );
if ( $itemToRemoveNode ) {
foreach ( $removals['ID'] as $itemToRemove ) {
$itemToRemoveNode = $doc->getElementById( $itemToRemove );
if ( $itemToRemoveNode ) {
$xpath = new DOMXPath( $doc );
foreach ( $removals['CLASS'] as $classToRemove ) {
$elements = $xpath->query( '//*[contains(@class, "' . $classToRemove . '")]' );
$xpath = new DOMXPath( $doc );
foreach ( $removals['CLASS'] as $classToRemove ) {
$elements = $xpath->query( '//*[contains(@class, "' . $classToRemove . '")]' );
if ( !$replacements ) {
// We don't include rules like '"' => '&quot;' because entities had already been
// normalized by libxml. Using this function with input not sanitized by libxml is UNSAFE!
if ( !$replacements ) {
// We don't include rules like '"' => '&quot;' because entities had already been
// normalized by libxml. Using this function with input not sanitized by libxml is UNSAFE!
// If this error continues in the future, please track it down in the
// XML code paths if possible and fix there.
$html = str_replace( ' ', '', $html );
// If this error continues in the future, please track it down in the
// XML code paths if possible and fix there.
$html = str_replace( ' ', '', $html );