28 private $smarty = null;
38 $this->doc = $this->load($content,
$doc ?
$doc :
new DOMDocument());
39 $this->
xpath =
new DOMXPath($this->doc);
50 $this->smarty = $smarty;
60 private function load($content,
$doc = null)
66 '<?xml encoding="UTF-8">' . ((
string) $content),
67 LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_PARSEHUGE
69 foreach (
$doc->childNodes as $item) {
70 if ($item->nodeType == XML_PI_NODE) {
71 $doc->removeChild($item);
74 $doc->encoding =
'UTF-8';
87 return str_replace(array_map(
function($tag) {
89 }, explode(
',',
'area,base,br,col,command,embed,hr,img,input,keygen,link,meta,param,source,track,wbr')),
'', $html);
101 $codes = $this->doc->getElementsByTagName(
'code');
102 foreach ($codes as $code) {
103 $html = self::validHTML($this->doc->saveHTML($code));
108 $html = str_replace([
'<',
'>'], [
'&lt;',
'&gt;'], self::validHTML($this->doc->saveHTML($node)));
109 return html_entity_decode($html, ENT_NOQUOTES | ENT_HTML5,
'UTF-8');
121 foreach ($nodes as $node) {
122 for ($i = $node->childNodes->length - 1; $i >= 0; $i--) {
123 $node->removeChild($node->childNodes->item($i));
125 if (is_string($callback)) {
126 $content = $callback;
128 $content = $callback ? $callback($node) :
'';
131 $doc = $this->load(
"<html><body>$content</body></html>",
new DOMDocument());
132 $fragment = $node->ownerDocument->importNode(
$doc->documentElement,
true);
133 $parent = $fragment->childNodes->item(0);
134 for ($i = $parent->childNodes->length - 1; $i >= 0; $i--) {
135 $node->insertBefore($parent->childNodes->item($i), $node->firstChild);
148 public function setText($nodes, $callback = null)
150 foreach ($nodes as $node) {
151 if (is_string($callback)) {
152 $content = $callback;
154 $content = $callback ? $callback($node) :
'';
157 $parents = $this->
setInnerHTML(array($this->doc->createElement(
'div')), $content);
159 for ($i = $parents[0]->childNodes->length - 1; $i >= 0; $i--) {
160 $child = $parents[0]->childNodes->item($i);
161 $node->parentNode->insertBefore($child, $reference);
164 $node->parentNode->removeChild($node);
177 public function xpath($expression, $contextNode = null, $registerNodeNS =
true)
179 return $this->
xpath->query($expression, $contextNode, $registerNodeNS);
190 public function query($expression, $contextNode = null, $registerNodeNS =
true)
192 return $this->
xpath($this->transform($expression), $contextNode, $registerNodeNS);
210 if ($params[
'selector']) {
211 $nodes = $this->
query($params[
'selector']);
212 foreach ($nodes as $node) {
213 if ($params[
'node']) {
215 $content = ($this->smarty ?? $GLOBALS[
'smarty'])->fetch(
"string:{$params['node']}");
216 $doc = $this->load(
"<html><body>" . trim(self::validHTML($content)) .
"</body></html>",
new DOMDocument());
217 $fragment = $node->ownerDocument->importNode(
$doc->documentElement,
true);
218 $new_node = $fragment->childNodes->item(0)->childNodes->item(0);
220 if ($params[
'before']) {
222 $parents = $this->
query($params[
'before']);
223 foreach ($parents as $parent) {
224 $parent->parentNode->insertBefore($new_node, $parent);
229 $node->appendChild($new_node);
231 } elseif ($params[
'wrapper']) {
233 $wrapper =
'<root>' . str_replace(
'<%>', $node->ownerDocument->saveHTML($node), $params[
'wrapper']) .
'</root>';
235 foreach ($domQuery2->doc->firstChild->childNodes as $child) {
236 $node->parentNode->insertBefore($node->ownerDocument->importNode($child,
true), $node);
238 $node->parentNode->removeChild($node);
239 } elseif ($params[
'attribute'] ==
'class') {
241 if ($className = $node->getAttribute(
'class')) {
242 $classes = explode(
' ', $className);
246 $classes = array_unique(array_merge($classes, explode(
' ', $params[
'value'])));
247 $node->setAttribute(
'class', implode(
' ', $classes));
249 $node->setAttribute($params[
'attribute'], $params[
'value']);
263 private function transform($path)
265 $path = (string) $path;
266 if (strstr($path,
',')) {
267 $paths = explode(
',', $path);
268 $expressions = array();
269 foreach ($paths as $path) {
270 $xpath = $this->transform(trim($path));
273 } elseif (is_array(
$xpath)) {
274 $expressions = array_merge($expressions,
$xpath);
277 return implode(
'|', $expressions);
280 $paths = array(
'//');
281 $path = preg_replace(
'|\s+>\s+|',
'>', $path);
282 $segments = preg_split(
'/\s+/', $path);
283 foreach ($segments as $key => $segment) {
284 $pathSegment = $this->tokenize($segment);
286 if (0 === strpos($pathSegment,
'[contains(')) {
287 $paths[0] .=
'*' . ltrim($pathSegment,
'*');
289 $paths[0] .= $pathSegment;
293 if (0 === strpos($pathSegment,
'[contains(')) {
294 foreach ($paths as $pathKey =>
$xpath) {
295 $paths[$pathKey] .=
'//*' . ltrim($pathSegment,
'*');
296 $paths[] =
$xpath . $pathSegment;
299 foreach ($paths as $pathKey =>
$xpath) {
300 $paths[$pathKey] .=
'//' . $pathSegment;
305 if (1 == count($paths)) {
308 return implode(
'|', $paths);
319 private function tokenize($expression)
322 $expression = str_replace(
'>',
'/', $expression);
325 $expression = preg_replace(
'|#([a-z][a-z0-9_-]*)|i',
'[@id=\'$1\']', $expression);
326 $expression = preg_replace(
'|(?<![a-z0-9_-])(\[@id=)|i',
'*$1', $expression);
329 $expression = preg_replace_callback(
330 '|\[@?([a-z0-9_-]+)=[\'"]([^\'"]+)[\'"]\]|i',
331 function ($matches) {
332 return '[@' . strtolower($matches[1]) .
"='" . $matches[2] .
"']";
338 $expression = preg_replace_callback(
339 '|\[([a-z0-9_-]+)~=[\'"]([^\'"]+)[\'"]\]|i',
340 function ($matches) {
341 return "[contains(concat(' ', normalize-space(@" . strtolower($matches[1]) .
"), ' '), ' " 342 . $matches[2] .
" ')]";
348 $expression = preg_replace_callback(
349 '|\[([a-z0-9_-]+)\*=[\'"]([^\'"]+)[\'"]\]|i',
350 function ($matches) {
351 return "[contains(@" . strtolower($matches[1]) .
", '" 352 . $matches[2] .
"')]";
358 if (
false === strpos($expression,
"[@")) {
359 $expression = preg_replace(
360 '|\.([a-z][a-z0-9_-]*)|i',
361 "[contains(concat(' ', normalize-space(@class), ' '), ' \$1 ')]",
367 $expression = str_replace(
'**',
'*', $expression);
setInnerHTML($nodes, $callback=null)
__construct($content, &$doc=null)
xpath($expression, $contextNode=null, $registerNodeNS=true)
query($expression, $contextNode=null, $registerNodeNS=true)
setText($nodes, $callback=null)