EGOCMS  18.0
EGOTEC Content-Managament-System
Ego_DomQuery.php
gehe zur Dokumentation dieser Datei
1 <?php
14 class Ego_DomQuery {
18  public $doc;
19 
23  public $xpath;
24 
28  private $smarty = null;
29 
36  public function __construct($content, &$doc = null)
37  {
38  $this->doc = $this->load($content, $doc ? $doc : new DOMDocument());
39  $this->xpath = new DOMXPath($this->doc);
40  }
41 
48  public function setSmarty($smarty)
49  {
50  $this->smarty = $smarty;
51  }
52 
60  private function load($content, $doc = null)
61  {
62  if (!$doc) {
63  $doc = $this->doc;
64  }
65  @$doc->loadHTML(
66  '<?xml encoding="UTF-8">' . ((string) $content),
67  LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_PARSEHUGE/* | LIBXML_SCHEMA_CREATE @TODO erst ab PHP 5.5.2 */
68  );
69  foreach ($doc->childNodes as $item) {
70  if ($item->nodeType == XML_PI_NODE) {
71  $doc->removeChild($item);
72  }
73  }
74  $doc->encoding = 'UTF-8';
75  return $doc;
76  }
77 
84  public static function validHTML($html)
85  {
86  // Void Elemente haben kein End-Tag
87  return str_replace(array_map(function($tag) {
88  return "</$tag>";
89  }, explode(',', 'area,base,br,col,command,embed,hr,img,input,keygen,link,meta,param,source,track,wbr')), '', $html);
90  }
91 
98  public function getHTML($node = null)
99  {
100  // In "CODE" Elementen müssen die Entitäten immer kodiert sein
101  $codes = $this->doc->getElementsByTagName('code');
102  foreach ($codes as $code) {
103  $html = self::validHTML($this->doc->saveHTML($code));
104  $this->setInnerHTML([$code], htmlentities($html));
105  }
106 
107  // Sicherstellen, dass maskierte "<" und ">" auch nach der Dekodierung aller Entitäten maskiert bleiben
108  $html = str_replace(['&lt;', '&gt;'], ['&amp;lt;', '&amp;gt;'], self::validHTML($this->doc->saveHTML($node)));
109  return html_entity_decode($html, ENT_NOQUOTES | ENT_HTML5, 'UTF-8');
110  }
111 
119  public function setInnerHTML($nodes, $callback = null)
120  {
121  foreach ($nodes as $node) {
122  for ($i = $node->childNodes->length - 1; $i >= 0; $i--) {
123  $node->removeChild($node->childNodes->item($i));
124  }
125  if (is_string($callback)) {
126  $content = $callback;
127  } else {
128  $content = $callback ? $callback($node) : '';
129  }
130 
131  $doc = $this->load("<html><body>$content</body></html>", new DOMDocument());
132  $fragment = $node->ownerDocument->importNode($doc->documentElement, true);
133  $parent = $fragment->childNodes->item(0);
134  for ($i = $parent->childNodes->length - 1; $i >= 0; $i--) {
135  $node->insertBefore($parent->childNodes->item($i), $node->firstChild);
136  }
137  }
138  return $nodes;
139  }
140 
148  public function setText($nodes, $callback = null)
149  {
150  foreach ($nodes as $node) {
151  if (is_string($callback)) {
152  $content = $callback;
153  } else {
154  $content = $callback ? $callback($node) : '';
155  }
156 
157  $parents = $this->setInnerHTML(array($this->doc->createElement('div')), $content);
158  $reference = $node;
159  for ($i = $parents[0]->childNodes->length - 1; $i >= 0; $i--) {
160  $child = $parents[0]->childNodes->item($i);
161  $node->parentNode->insertBefore($child, $reference);
162  $reference = $child;
163  }
164  $node->parentNode->removeChild($node);
165  }
166  return $nodes;
167  }
168 
177  public function xpath($expression, $contextNode = null, $registerNodeNS = true)
178  {
179  return $this->xpath->query($expression, $contextNode, $registerNodeNS);
180  }
181 
190  public function query($expression, $contextNode = null, $registerNodeNS = true)
191  {
192  return $this->xpath($this->transform($expression), $contextNode, $registerNodeNS);
193  }
194 
208  public function modify($params)
209  {
210  if ($params['selector']) {
211  $nodes = $this->query($params['selector']);
212  foreach ($nodes as $node) {
213  if ($params['node']) {
214  // Neues Element einfügen
215  $content = ($this->smarty ?? $GLOBALS['smarty'])->fetch("string:{$params['node']}");
216  $doc = $this->load("<html><body>" . trim(self::validHTML($content)) . "</body></html>", new DOMDocument());
217  $fragment = $node->ownerDocument->importNode($doc->documentElement, true);
218  $new_node = $fragment->childNodes->item(0)->childNodes->item(0);
219 
220  if ($params['before']) {
221  // ...vor einem bestimmten Element
222  $parents = $this->query($params['before']);
223  foreach ($parents as $parent) {
224  $parent->parentNode->insertBefore($new_node, $parent);
225  break;
226  }
227  } else {
228  // ...als letztes Element
229  $node->appendChild($new_node);
230  }
231  } elseif ($params['wrapper']) {
232  // Element umschließen
233  $wrapper = '<root>' . str_replace('<%>', $node->ownerDocument->saveHTML($node), $params['wrapper']) . '</root>';
234  $domQuery2 = new Ego_DomQuery($wrapper);
235  foreach ($domQuery2->doc->firstChild->childNodes as $child) {
236  $node->parentNode->insertBefore($node->ownerDocument->importNode($child, true), $node);
237  }
238  $node->parentNode->removeChild($node);
239  } elseif ($params['attribute'] == 'class') {
240  // Attribut ändern
241  if ($className = $node->getAttribute('class')) {
242  $classes = explode(' ', $className);
243  } else {
244  $classes = [];
245  }
246  $classes = array_unique(array_merge($classes, explode(' ', $params['value'])));
247  $node->setAttribute('class', implode(' ', $classes));
248  } else {
249  $node->setAttribute($params['attribute'], $params['value']);
250  }
251  }
252  }
253  }
254 
263  private function transform($path)
264  {
265  $path = (string) $path;
266  if (strstr($path, ',')) {
267  $paths = explode(',', $path);
268  $expressions = array();
269  foreach ($paths as $path) {
270  $xpath = $this->transform(trim($path));
271  if (is_string($xpath)) {
272  $expressions[] = $xpath;
273  } elseif (is_array($xpath)) {
274  $expressions = array_merge($expressions, $xpath);
275  }
276  }
277  return implode('|', $expressions);
278  }
279 
280  $paths = array('//');
281  $path = preg_replace('|\s+>\s+|', '>', $path);
282  $segments = preg_split('/\s+/', $path);
283  foreach ($segments as $key => $segment) {
284  $pathSegment = $this->tokenize($segment);
285  if (0 == $key) {
286  if (0 === strpos($pathSegment, '[contains(')) {
287  $paths[0] .= '*' . ltrim($pathSegment, '*');
288  } else {
289  $paths[0] .= $pathSegment;
290  }
291  continue;
292  }
293  if (0 === strpos($pathSegment, '[contains(')) {
294  foreach ($paths as $pathKey => $xpath) {
295  $paths[$pathKey] .= '//*' . ltrim($pathSegment, '*');
296  $paths[] = $xpath . $pathSegment;
297  }
298  } else {
299  foreach ($paths as $pathKey => $xpath) {
300  $paths[$pathKey] .= '//' . $pathSegment;
301  }
302  }
303  }
304 
305  if (1 == count($paths)) {
306  return $paths[0];
307  }
308  return implode('|', $paths);
309  }
310 
319  private function tokenize($expression)
320  {
321  // Child selectors
322  $expression = str_replace('>', '/', $expression);
323 
324  // IDs
325  $expression = preg_replace('|#([a-z][a-z0-9_-]*)|i', '[@id=\'$1\']', $expression);
326  $expression = preg_replace('|(?<![a-z0-9_-])(\[@id=)|i', '*$1', $expression);
327 
328  // arbitrary attribute strict equality
329  $expression = preg_replace_callback(
330  '|\[@?([a-z0-9_-]+)=[\'"]([^\'"]+)[\'"]\]|i',
331  function ($matches) {
332  return '[@' . strtolower($matches[1]) . "='" . $matches[2] . "']";
333  },
334  $expression
335  );
336 
337  // arbitrary attribute contains full word
338  $expression = preg_replace_callback(
339  '|\[([a-z0-9_-]+)~=[\'"]([^\'"]+)[\'"]\]|i',
340  function ($matches) {
341  return "[contains(concat(' ', normalize-space(@" . strtolower($matches[1]) . "), ' '), ' "
342  . $matches[2] . " ')]";
343  },
344  $expression
345  );
346 
347  // arbitrary attribute contains specified content
348  $expression = preg_replace_callback(
349  '|\[([a-z0-9_-]+)\*=[\'"]([^\'"]+)[\'"]\]|i',
350  function ($matches) {
351  return "[contains(@" . strtolower($matches[1]) . ", '"
352  . $matches[2] . "')]";
353  },
354  $expression
355  );
356 
357  // Classes
358  if (false === strpos($expression, "[@")) {
359  $expression = preg_replace(
360  '|\.([a-z][a-z0-9_-]*)|i',
361  "[contains(concat(' ', normalize-space(@class), ' '), ' \$1 ')]",
362  $expression
363  );
364  }
365 
367  $expression = str_replace('**', '*', $expression);
368 
369  return $expression;
370  }
371 }
372 ?>
setInnerHTML($nodes, $callback=null)
__construct($content, &$doc=null)
setSmarty($smarty)
xpath($expression, $contextNode=null, $registerNodeNS=true)
getHTML($node=null)
query($expression, $contextNode=null, $registerNodeNS=true)
setText($nodes, $callback=null)
static validHTML($html)
modify($params)