EGOCMS  18.0
EGOTEC Content-Managament-System
Ego_Search.php
gehe zur Dokumentation dieser Datei
1 <?php
8 abstract class Ego_Search
9 {
10  protected $config = [];
11 
27  abstract function delete( $index );
28 
45  abstract function reset();
46 
63  abstract function update($index, $page);
64 
84  abstract function search($search, $relation, $query);
85 
91  protected $extraQuery = '';
92 
104  public function globalSearch($search, $sites = array(), $query = array(), $param = array(), $sort = array(), $filter = '') {
105  $this->checkSearch($search);
106 
107  $pages = array();
108  if (empty($sites)) {
109  $sites = Ego_System::getAllSites();
110  }
111  foreach ($sites as $site) {
112  if (is_string($site)) {
113  $site = new Site($site);
114  }
115  try {
116  $lang = $_REQUEST['lang'] ? $_REQUEST['lang'] : ($GLOBALS['site'] ? $GLOBALS['site']->language : null);
117  if ($lang) {
118  $site->setLanguage($lang);
119  }
120  } catch (Exception $e) {
121  // Mandant existiert nicht in dieser Sprache, ignorieren
122  continue;
123  }
124  foreach ($site->getPages($query, array_merge($param, array('fulltext' => $search, 'filter' => $filter))) as $page) {
125  $pages[] = $page;
126  }
127  }
128  return $this->sortPages($pages, $query['order'], $sort);
129  }
130 
150  protected function sortPages($pages, $order = '', $sort = array()) {
151  if (!empty($order) && preg_match('/^([^ ]+) ?(asc|desc)?/i', $order, $match)) {
152  return Ego_System::sortPages($pages, 'field', $match[1], $match[2] ? strtolower($match[2]) : 'asc');
153  } elseif (!empty($sort)) {
154  return Ego_System::sortPages($pages, $sort[0], $sort[1], $sort[2]);
155  }
156  return Ego_System::sortPages($pages, 'field', 'score', 'desc');
157  }
158 
166  protected function _getContent($page, $k) {
167  switch ($k) {
168  // Schlagwörter
169  case 'keywords':
170  $content = str_replace(',', ' ', $page->getKeywords($page->getSite()->language, true));
171  if ($page->getSite()->admin['keyword_register_own_site']) {
172  $keyword_parents = $page->getParents(array(
173  'fields' => 'name,extra',
174  'where' => "type='_keywords/entry'"
175  ),array(
176  'auth_or' => "1=1"
177  ));
178  foreach ($keyword_parents as $keyword) {
179  $content .= ' '.$keyword->field['name'];
180 
181  // Synonyme
182  require_once 'base/Ego_Combo.php';
183  $combo = new Ego_Combo($keyword->extra['synonym']);
184  foreach ($combo->getText() as $text) {
185  $content .= ' '.$text;
186  }
187  }
188  }
189  break;
190 
191  // Page Extra
192  case 'extra':
193  $extra = Ego_System::arrayValuesRecursive($this->filterExtra($page)); // Keine Schlüssel in den Suchindex aufnehmen.
194  $content = implode(' ', $extra);
195  break;
196 
197  // Page Felder
198  default:
199  $content = $page->field[$k];
200  }
201 
202  return $this->filterContent($content, $k);
203  }
204 
212  protected function _getExtra($page, $clean = false) {
213  return Ego_System::arrayFlatRecursive($this->filterExtra($page), function($content) use ($clean) {
214  // Werte aus dem Extrafeld entfernen, die nicht gefunden werden sollen
215  if ($clean) {
216  // Keine 0 und 1
217  $content = preg_replace('/(?<!\d)(0|1)(?!\d)/', '', $content);
218  }
219  return $this->filterContent($content);
220  });
221  }
222 
230  public function filterContent($content, $k = 'extra') {
231  if (
232  in_array($k, array('short', 'content', 'extra'))
233  && is_string($content) // Falls doch Objekte im Extrafeld gelandet sind.
234  && preg_match_all('/(title|alt)=(["\'])(.*?)\\2/ims', $content, $matches)
235  ) {
236  // Bestimmte HTML Attribute beibehalten
237  foreach ($matches[3] as $match) {
238  $content .= " $match";
239  }
240  }
241 
242  $content = preg_replace('/<[^>]*>/', ' ', $content);
243  $content = preg_replace('/&[^ ;]+;/', ' ', $content);
244  $content = preg_replace('/\s+/', ' ', $content);
245  $content = strtr($content, "\"\n\r\t", ' ');
246  return trim($content);
247  }
248 
255  public function filterExtra($page) {
256  $extra = $page->extra;
257  if (!is_array($extra)) { // Vor der Verwendung sicherstellen, dass es sich auch um ein Array handelt
258  $extra = array();
259  } else {
260  unset(
261  $extra['history'],
262  $extra['language_link'],
263  $extra['language_standard'],
264  $extra['origImgWidth'],
265  $extra['origImgHeight'],
266  $extra['origFileSize'],
267  $extra['clones'],
268  $extra['clone_original'],
269  $extra['_blocks'],
270  $extra['_layout'],
271  $extra['_forms'],
272  $extra['_template'],
273  $extra['_style'],
274  $extra['mediapool']
275  );
276  $site = $page->getSite();
277  if ($file = $site->getSiteFile('admin/search_index.php')) {
278  require_once($file);
279  $extra = search_index($page, $extra);
280  }
281  }
282  return $extra;
283  }
284 
310  public function setExtraQuery($query, $bind = array()) {
311  // SQL Query generieren
312  $sql_query = $query;
313  foreach (preg_split('/(and|or)/si', $query) as $sub_query) {
314  if (preg_match('/(!?extra\.[^ ]+)(\s*(!=|=)\s*(.*?))?$/si', trim($sub_query, '() '), $matches)) {
315  $param = $matches[1];
316  if (!in_array($GLOBALS['egotec_conf']['search_engine'], ['lucene', 'elastic'])) {
317  $param = substr($param, 6);
318  }
319  $operator = mb_strtolower($matches[3]);
320  $value = trim($matches[4], '\'"');
321  if (is_numeric($value)) {
322  $value = (int) $value;
323  }
324 
325  // Wert darf nicht im Extrafeld gesetzt sein
326  $exclude = false;
327  if ($param[0] == '!') {
328  $exclude = true;
329  $param = substr($param, 1);
330  }
331 
332  // Für die Extra Suche mit den SQL Treibern gibt es keine Binds
333  if (is_array($bind) && strpos($value, ':') === 0 && isset($bind[substr($value, 1)])) {
334  $value = $bind[substr($value, 1)];
335  }
336 
337  if ($exclude) {
338  // Ausnahme bilden
339  $replace = "extra NOT LIKE '%s:" . strlen($param) . ":" . $param . ";%'";
340  } else {
341  // SQL Syntax schreiben
342  $replace = "extra ";
343  if (is_string($value)) {
344  $value = "'%s:" . strlen($param) . ":\"" . $param . "\";s:" . strlen($value) . ":\"" . $value . "\"%'";
345  } else {
346  $value = "'%s:" . strlen($param) . ":\"" . $param . "\";_:" . $value . "%'";
347  }
348 
349  // Vergleich bilden
350  switch ($operator) {
351  case '=':
352  $replace .= "LIKE $value";
353  break;
354  case '!=':
355  $replace .= "NOT LIKE $value";
356  break;
357  default:
358  $replace .= "LIKE $value";
359  }
360  }
361 
362  $sql_query = str_replace($matches[0], $replace, $sql_query);
363  }
364  }
365 
366  $this->extraQuery = $sql_query;
367  }
368 
378  protected function prepareSearch($search, $filter = '', $original = false, $fuzzy = false) {
379  $search = mb_strtolower(trim($search));
380  foreach (array('+', '-', '~', '&') as $char) {
381  // Diese Zeichen dürfen nicht mehrmals nacheinander vorkommen
382  $search = preg_replace('/[' . $char . ']{2,}/', $char, $search);
383  }
384  // Andere Lucene Query Zeichen escapen
385  $search = preg_replace('%([\|!(){}[\]^*?:/]+)%', '\\\\$1', $search);
386 
387  // Hotfix: Punkt am Ende funktioniert nicht
388  $search = rtrim($search, '.');
389 
390  $search_asterix = ''; // Die Suche mit * erweitern
391  $search_fuzzy = ''; // Die Suche mit ~ erweitern
392  $in_string = '';
393  $in_word = '';
394  $in_fuzzy = '';
395 
396  // Fuzzy Einstellung setzen
397  $fuzzy_num = '';
398  if ($GLOBALS['egotec_conf']['search']['fuzzy_num']) {
399  $fuzzy_num = $GLOBALS['egotec_conf']['search']['fuzzy_num'];
400  }
401 
402  for ($i = 0; $i < strlen($search); $i++) {
403  $character = $search[$i];
404  switch ($character) {
405  case '"':
406  if ($in_string == '"') {
407  $in_string = '';
408  } else {
409  $in_string = '"';
410  }
411  $search_asterix .= $character;
412  $search_fuzzy .= $character;
413  if (!$in_string) {
414  $search_fuzzy .= '~' . $fuzzy_num;
415  }
416  $in_word = '';
417  break;
418  case ' ':
419  if (
420  !$in_string
421  && !$in_fuzzy
422  && !in_array(substr($search_asterix, -1), array('"', '*'))
423  ) {
424  $search_asterix .= '*';
425  $search_fuzzy .= '~' . $fuzzy_num;
426  }
427  $search_asterix .= $character;
428  $search_fuzzy .= $character;
429  $in_word = $in_fuzzy = '';
430  break;
431  case '-':
432  // Suche mit Bindestrichen ermöglichen
433  if ($search_asterix != '' && substr($search_asterix, -1) != ' ') {
434  $search_asterix .= ' '; // @TODO Workaround, nicht die Lösung.
435  } else {
436  $search_asterix .= $character;
437  }
438  break;
439  case '~':
440  // Suche mit Tilde (Fuzzy Search)
441  $search_asterix = substr_replace($search_asterix, '', strrpos($search_asterix, '*'), 1);
442  $search_asterix .= $character;
443  $search_fuzzy .= $character;
444  $in_fuzzy = $character;
445  break;
446  default:
447  if ($this->config['leading_wildcard'] && !$in_word) {
448  // Asterix vor einem Wort
449  if (!$in_string && !in_array($character, array('+', '-', '*')) && $search_asterix[strlen($search_asterix) - 1] != '-') {
450  $search_asterix .= '*';
451  }
452  $in_word = $character;
453  }
454  $search_asterix .= $character;
455  $search_fuzzy .= $character;
456  }
457  }
458  if ($character != '"' && !$in_fuzzy) {
459  if ($character != '*') {
460  // Am Ende noch einen Asterix hinzufügen
461  $search_asterix .= '*';
462  }
463  if ($search_asterix != '*' && $character != '~') {
464  // Am Ende noch eine Tilde hinzufügen
465  $search_fuzzy .= '~' . $fuzzy_num;
466  }
467  }
468 
469  $search_string = $search_asterix;
470  if ($fuzzy) {
471  $search_string .= " $search_fuzzy";
472  }
473 
474  if (!$original) {
475  // Lucene Filter hinzufügen
476  if (is_string($filter) && !empty($filter)) {
477  if ($search_string == '*') {
478  $search_string = $filter;
479  } else {
480  $search_string = "($search_string) AND ($filter)";
481  }
482  }
483 
484  // Zusätzlicher Query für die Extra Suche
485  if (!empty($this->extraQuery)) {
486  if ($search_string == '*') {
487  $search_string = $this->extraQuery;
488  } else {
489  $search_string = "($search_string) AND ({$this->extraQuery})";
490  }
491  }
492 
493  // Ausrufezeichen maskieren
494  $search_string = str_replace('!', '\!', $search_string);
495  }
496 
497  return $search_string;
498  }
499 
503  function clearCache() {
504  return;
505  }
506 
513  protected function checkSearch($search) {
514  $maxlength = $GLOBALS['egotec_conf']['search']['maxlength']
515  ? $GLOBALS['egotec_conf']['search']['maxlength']
516  : 200;
517  if (mb_strlen($search) > $maxlength) {
518  Ego_System::header(400);
519  exit;
520  }
521  }
522 }
_getContent($page, $k)
Definition: Ego_Search.php:166
checkSearch($search)
Definition: Ego_Search.php:513
static arrayFlatRecursive($array, $callback=null)
static getAllSites($username='', $perm='', $table=false, $type='')
static sortPages($pages, $sorttype='field', $sortby='id', $sortdirection="asc")
_getExtra($page, $clean=false)
Definition: Ego_Search.php:212
sortPages($pages, $order='', $sort=array())
Definition: Ego_Search.php:150
filterExtra($page)
Definition: Ego_Search.php:255
globalSearch($search, $sites=array(), $query=array(), $param=array(), $sort=array(), $filter='')
Definition: Ego_Search.php:104
search($search, $relation, $query)
static arrayValuesRecursive($array)
update($index, $page)
filterContent($content, $k='extra')
Definition: Ego_Search.php:230
setExtraQuery($query, $bind=array())
Definition: Ego_Search.php:310
Definition: Site.php:29