saveHTMLExact(); } /** * Load HTML with a proper encoding fix/hack. * Borrowed from the link below. * * @link http://www.php.net/manual/en/domdocument.loadhtml.php * * @param string $html * @param string $encoding */ public function loadHTML($html, $encoding = "UTF-8") { $html = mb_convert_encoding($html, 'HTML-ENTITIES', $encoding); @parent::loadHTML($html); // suppress warnings } /** * Return HTML while stripping the annoying auto-added ,
, and doctype. * * @link http://php.net/manual/en/migration52.methods.php * * @return string */ public function saveHTMLExact() { $content = preg_replace(array("/^\<\!DOCTYPE.*?/si", "!$!si"), "", $this->saveHTML()); return $content; } /** * This test functions shows an example of SmartDOMDocument in action. * A sample HTML fragment is loaded. * Then, the first image in the document is cut out and saved separately. * It also shows that Russian characters are parsed correctly. * */ public static function testHTML() { $content = <<русский
CONTENT; print "Before removing the image, the content is: " . htmlspecialchars($content) . "