7 require_once(
'base/Ego_Search.php');
8 require_once(
'composer/vendor/autoload.php');
23 private $client = null;
30 private $officeImport =
false;
37 private const MAX_CLAUSE_COUNT = 10000;
53 throw new Exception(
"missing licence");
61 'leading_wildcard' => true
65 $table = $GLOBALS[
'site']->pageTable;
68 if (!$GLOBALS[
'egotec_conf'][
'elastic'][
'max_results']) {
69 $GLOBALS[
'egotec_conf'][
'elastic'][
'max_results'] = 10000;
77 for ($i = 0; $i < 4; $i++) {
78 if ($GLOBALS[
'egotec_conf'][
'elastic'][
'host' . $i]) {
79 $hosts[] = trim($GLOBALS[
'egotec_conf'][
'elastic'][
'host' . $i],
'/');
84 throw new Exception(
"missing hosts for elastic");
87 $this->client = Elasticsearch\ClientBuilder::create()->setHosts($hosts)->build();
89 $this->config[
'index'] = strtolower($table);
90 $this->config[
'table'] = $table;
91 $this->config[
'param'] = $param;
94 $this->createPipeline();
104 public function delete($id) {
106 'index' => $this->config[
'index'],
107 'type' => $this->config[
'type'],
111 $this->client->delete($params);
123 $GLOBALS[
'monitor'][
'search_reset']++;
138 $GLOBALS[
'monitor'][
'search_reset_all']++;
152 'index' => $this->config[
'index']
155 if (!$this->client->indices()->exists($params)) {
157 'index' => $this->config[
'index'],
160 'number_of_shards' => $GLOBALS[
'egotec_conf'][
'elastic'][
'number_of_shards'],
161 'number_of_replicas' => $GLOBALS[
'egotec_conf'][
'elastic'][
'number_of_replicas'],
162 'index.mapping.ignore_malformed' =>
true,
163 'index.mapping.total_fields.limit' => self::MAX_CLAUSE_COUNT
166 $this->config[
'type'] => [
169 'type' =>
'completion' 177 $this->client->indices()->create($params);
192 'index' => $all ?
"_all" : $this->config[
'index'],
193 'client' => [
'ignore' => [400, 404]]
196 if ($this->client->indices()->exists($params)) {
197 $this->client->indices()->delete($params);
204 foreach ($pages as $page) {
205 $page = $this->indexFiles($page);
207 $params[
'body'][] = [
209 '_index' => $this->config[
'index'],
210 '_type' => $this->config[
'type'],
211 '_id' => $page->field[
'id']
215 $params[
'body'][] = $this->getBody($page);
218 $this->client->bulk($params);
230 public function update($id, $page, $count = []) {
231 $page = $this->indexFiles($page);
233 $GLOBALS[
'monitor'][
'search_update_count']++;
236 'index' => $this->config[
'index'],
237 'type' => $this->config[
'type'],
239 'body' => $this->getBody($page),
240 'client' => [
'ignore' => 404]
243 $this->client->index($params);
253 private function getBody($page) {
255 $extra_contents = is_array($page->extra[
'_contents'])
261 return trim(strip_tags(implode(
' ', $value)));
264 $page->extra[
'_contents'],
267 preg_match(
'/^(index\.php\?|https?:\/\/|[^@ ]+@[^ ]+|\d{4}-\d{2}-\d{2}|\d{2}:\d{2}:?)/si', $value)
268 || preg_match(
'/^\{.*?\}$/si', $value)
273 $value = preg_replace(
'/(\r\n|\r|\n)/s',
' ', $value);
285 unset($page->extra[
'_contents']);
286 $extra_values = $this->
_getExtra($page,
true);
296 $this->
_getContent($page,
'content') . ($extra_contents ?
' ' . $extra_contents :
'')
300 $suggestions = array_values(
305 return preg_replace(
'/\W/',
'', $value);
313 [$keywords, $name, $title, $short, $content]
320 return !is_numeric($value) && strlen($value) > 2;
327 'keywords' => $keywords,
332 'content' => $content,
334 'extra_values' => trim(
340 return implode(
' ', $value);
347 'type' => $page->field[
'type'],
349 'ignore_search' => (($page->field[
'nav_hide'] & 4) == 4) ? 1 : 0,
350 'inactive' => (int)$page->field[
'inactive'],
351 'deleted' => (
int)$page->field[
'deleted'],
353 'input' => $suggestions
367 private function getSearchParam($tables, $search, $filter, $fuzzy, $id_list = []) {
368 $fields = [
'keywords',
'url',
'name',
'title',
'short',
'content',
'extra_values'];
371 $rewrite =
'top_terms_' . self::MAX_CLAUSE_COUNT;
374 $search = trim(mb_strtolower($search));
377 $search = trim(preg_replace([
386 if (preg_match_all(
'/( |^)([^+-][^ "]+-[^ "]+)/is', $search, $matches)) {
387 foreach ($matches[2] as $word) {
388 $search .=
' ' . str_replace(
'-',
' ', $word);
394 if ($tables === null) {
396 list($name, $lang) = preg_split(
'/_(?=[^_]*$)/', $this->config[
'table']);
397 $site =
new Site($name, $lang);
398 $count = $site->getSearchCount();
409 if (preg_match_all(
'/(".*?"|[^ ]+)/is', $search, $matches)) {
411 $match_phrase =
sizeof($matches[0]) > 1 && !preg_match(
'/(^| )["*+-]/', $search);
413 foreach ($fields as $field) {
415 if ($field ==
'extra_values' && isset($count[
'extra'])) {
416 $boost = (int)$count[
'extra'];
417 } elseif (isset($count[$field])) {
418 $boost = (int)$count[$field];
421 foreach ($matches[0] as $query) {
423 if (!in_array($query[0], [
'"',
'-']) && strpos($query,
'-') !==
false) {
424 $query =
"\"$query\"";
427 if ($query[0] ==
'"') {
429 $query = trim($query,
'"');
445 'rewrite' => $rewrite,
450 } elseif (preg_match(
'/^(.*?)~([0-9.]+|)$/', $query, $match)) {
453 $fuzziness = floatval($match[2] !==
'' ? $match[2] : 1);
459 'rewrite' => $rewrite,
461 'fuzziness' => round(2 * $fuzziness)
467 $wildcard =
function ($query) {
468 if (strpos($query,
'*') ===
false) {
476 $query = ltrim($query,
'+');
477 $must_should[md5($query)][] = [
480 'value' => $wildcard($query),
481 'rewrite' => $rewrite,
488 $query = ltrim($query,
'-');
492 'value' => $wildcard($query),
493 'rewrite' => $rewrite,
503 'value' => $wildcard($query),
504 'rewrite' => $rewrite,
519 'query' => trim($search),
520 'boost' => $boost + 1
528 $minimum_should_match = $search ==
'' && (!empty($filter) || !empty($this->extraQuery)) ? 0 : 1;
530 if (
sizeof($must_should)) {
531 $minimum_should_match = 0;
533 foreach ($must_should as $items) {
537 'minimum_should_match' => 1
543 if ($this->config[
'param'][
'only_active']) {
550 if (!$this->config[
'param'][
'deleted'] && !$this->conig[
'param'][
'deleted_or']) {
557 if ($this->config[
'param'][
'search']) {
566 if (!empty($id_list)) {
568 'constant_score' => [
580 $size = (int) $GLOBALS[
'egotec_conf'][
'elastic'][
'max_results'];
583 if (!empty($this->extraQuery)) {
592 if (!empty($filter)) {
614 'index' => $tables ? $tables : $this->config[
'index'],
617 'type' => $this->config[
'type'],
618 'sort' => [
'_score'],
625 'must_not' => $must_not,
628 'minimum_should_match' => $minimum_should_match
638 private function createPipeline() {
639 if ($this->officeImport) {
641 'id' =>
'attachment',
643 'description' =>
'Extract attachment information',
648 'indexed_chars' => -1
655 $this->client->ingest()->putPipeline($params);
667 private function indexFile(
Page $page, $path) {
672 @ini_set(
"memory_limit",
"-1");
674 $pageTable = strtolower($page->
getSite()->pageTable);
678 'index' => $pageTable,
679 'type' => $this->config[
'type'],
681 'pipeline' =>
'attachment',
686 $result = $this->client->index($params);
689 $content = $this->getIndexFile($pageTable, $identity);
690 $this->
delete($identity);
706 private function getIndexFile($pageTable, $identity) {
708 'index' => strtolower($pageTable),
709 'type' => $this->config[
'type'],
713 $response = $this->client->get($params);
714 return $response[
'_source'][
'attachment'][
'content'];
724 private function indexFiles(
Page $page) {
725 if (!$this->officeImport) {
730 $page->field[
'type'] ==
'multimedia/file' 731 && !$page->extra[
'_indexed']
733 if (!preg_match(
'/(image|video|audio|zip|exe|rar|octet-stream|postscript)/is', $page->extra[
'mime_type'])) {
734 $page->extra[
'_indexed'] =
true;
736 $content = $this->indexFile($page, $GLOBALS[
'egotec_conf'][
'var_dir'] .
'media/' . $page->
getSite()->name .
'/' . $page->
getMediaFilename());
738 }
catch (Exception $e) {
741 strpos($e->getMessage(),
'EncryptedDocumentException') ===
false 742 && strpos($e->getMessage(),
'TikaException') ===
false 744 egotec_error_log($page->
getIdentity() .
" Fehler beim Indizieren: " . $page->extra[
'mime_type'] .
"\n" . $e->getMessage());
748 $page->
update([],
true,
true);
774 public function search($search, $relation, $query, $filter =
'', $fuzzy =
false) {
777 $GLOBALS[
'monitor'][
'search_count']++;
778 $GLOBALS[
'monitor'][
'search_length'] += mb_strlen($search);
779 $GLOBALS[
'monitor'][
'search_words'] += substr_count($search,
' ') + 1;
781 $params = $this->getSearchParam(null, $search, $filter, $fuzzy, $query[
'id_list']);
783 $start = microtime(
true);
785 $results = $this->client->search($params);
786 }
catch (Exception $e) {
790 $hits = $results[
"hits"][
"hits"];
791 $stop = microtime(
true);
793 $duration = (int)(($stop - $start) * 1000);
794 $GLOBALS[
'monitor'][
'search_duration'] += $duration;
797 unset($query[
'order']);
798 unset($query[
'limit']);
799 $query[
'where'] =
'1=0';
805 foreach ($hits as $key => $value) {
806 $ids[$value[
"_id"]] = $value[
"_score"];
809 $max_value = $results[
"hits"][
"max_score"];
810 $min_value = min(array_values($ids));
812 if ($max_value == $min_value) {
813 $multiply = 4 / $max_value;
816 $multiply = 4 / ($max_value - $min_value);
819 return $this->buildQuery($ids, $relation, $query, $min_value, $multiply);
834 public function globalSearch($search, $sites = [], $query = [], $param = [], $sort = [], $filter =
'') {
837 $start1 = microtime(
true);
838 $GLOBALS[
'monitor'][
'search_global_count']++;
839 $GLOBALS[
'monitor'][
'search_global_length'] += mb_strlen($search);
840 $GLOBALS[
'monitor'][
'search_global_words'] += substr_count($search,
' ') + 1;
843 if (!isset($param[
'search']) && empty($GLOBALS[
'admin_area'])) {
844 $param[
'search'] =
true;
848 $lang = $_REQUEST[
'lang'] ? $_REQUEST[
'lang'] : ($GLOBALS[
'site'] ? $GLOBALS[
'site']->language : null);
854 $tables = $this->getTables($sites, $lang, $relations);
857 $start2 = microtime(
true);
858 $params = $this->getSearchParam($tables, $search, $filter, (
bool)$param[
'fuzzy']);
860 $results = $this->client->search($params);
861 }
catch (Exception $e) {
864 $stop2 = microtime(
true);
865 $duration = (int)(($stop2 - $start2) * 1000);
866 $GLOBALS[
'monitor'][
'search_global_d2'] += $duration;
868 $hits = $results[
"hits"][
"hits"];
874 $min_value = PHP_INT_MAX;
877 foreach ($hits as $hit) {
878 if (!isset($sorted_hits[$hit[
'_index']])) {
879 $sorted_hits[$hit[
'_index']] = [];
881 $sorted_hits[$hit[
'_index']][$hit[
'_id']] = $hit[
'_score'];
882 if ($hit[
'_score'] < $min_value) {
883 $min_value = $hit[
'_score'];
887 $max_value = $results[
"hits"][
"max_score"];
889 if ($max_value == $min_value) {
890 $multiply = 4 / $max_value;
893 $multiply = 4 / ($max_value - $min_value);
898 foreach ($sorted_hits as $table => $ids) {
899 $site = $relations[$table];
900 foreach ($site->getPages($this->buildQuery($ids, $site->pageTable .
'.id', $query, $min_value, $multiply), $param) as $page) {
904 $pages = $this->
sortPages($pages, $query[
'order'], $sort);
906 $stop1 = microtime(
true);
907 $duration = (int)(($stop1 - $start1) * 1000);
908 $GLOBALS[
'monitor'][
'search_global_d1'] += $duration;
923 $tables = $this->config[
'index'];
924 if (!empty($sites)) {
925 $tables = $this->getTables($sites);
928 $results = $this->client->search([
932 'type' => $this->config[
'type'],
934 'sort' => [
'_score'],
940 'field' =>
'suggest',
941 'skip_duplicates' =>
true 947 }
catch (Exception $e) {
952 if (is_array($results[
'suggest'][
'suggest'][0][
'options'])) {
953 foreach ($results[
'suggest'][
'suggest'][0][
'options'] as $result) {
954 if (mb_strtolower($query) != mb_strtolower($result[
'text'])) {
955 $suggestions[] = $result[
'text'];
974 private function buildQuery($result, $relation, $query, $min_value, $multiply) {
975 if (!empty($result)) {
976 $ids = array_keys($result);
977 if (empty($query[
'where'])) {
978 $query[
'where'] =
'';
980 $query[
'where'] .=
' AND ';
985 foreach (array_chunk($ids, 999) as $id_group) {
986 $id_groups[] =
"$relation IN (" . implode(
', ', $id_group) .
")";
988 $query[
'where'] .=
'(' . implode(
' OR ', $id_groups) .
')';
990 $query[
'fields2'] =
'CASE ';
991 foreach ($result as $id => $value) {
992 $query[
'fields2'] .=
"WHEN $relation = $id THEN " . (str_replace(
',',
'.', ($value - $min_value) * $multiply)) .
" ";
994 $query[
'fields2'] .=
'ELSE 0 END AS score';
995 if (empty($query[
'order'])) {
996 $query[
'order'] =
'score DESC';
999 unset($query[
'order']);
1000 unset($query[
'limit']);
1001 $query[
'where'] =
'1=0';
1016 if (is_array($bind)) {
1018 $reserved_characters = preg_quote(
'+-&|!(){}[]^"~*?:\\');
1019 foreach ($bind as $key => $value) {
1020 $query = str_replace(
1022 preg_replace_callback(
1023 '/[' . $reserved_characters .
']/',
1024 function ($matches) {
1025 return '\\' . $matches[0];
1035 $Elastic_query = $query;
1036 $sub_queries = preg_split(
'/\s+(and|or)\s+/si', $query);
1037 foreach ($sub_queries as $sub_query) {
1038 if (preg_match(
'/(!?extra\.[^ !=<>]+)(\s*(like|>=|<=|!=|=|>|<)\s*(.*?))?$/si', trim($sub_query,
'() '), $matches)) {
1039 $param = $matches[1];
1040 $operator = mb_strtolower($matches[3]);
1041 $value = trim($matches[4],
'\'"'); 1043 // Feld darf nicht im Dokument gesetzt sein 1045 if ($param[0] == '!') { 1047 $param = substr($param, 1); 1052 $replace = "-$param:*
" . (sizeof($sub_queries) == 1 ? ' AND *' : ''); 1054 // Elastic Syntax schreiben 1055 $replace = "$param:
"; 1056 if (!is_numeric($value)) { 1057 if ($operator == 'like') { 1058 $value = $this->prepareSearch(str_replace('%', '', $value), '', true); 1060 $value = "'$value'"; 1065 switch ($operator) { 1071 $replace = "($param:* NOT $param:$value)
"; 1074 $replace .= '{' . $value . ' TO *}'; 1077 $replace .= '[' . $value . ' TO *]'; 1080 $replace .= '{* TO ' . $value . '}'; 1083 $replace .= '[* TO ' . $value . ']'; 1090 $Elastic_query = str_replace($matches[0], $replace, $Elastic_query); 1094 $this->extraQuery = $Elastic_query; 1102 public function getConfig() { 1103 return $this->config; 1111 public function clearCache() { 1112 foreach ($this->hosts as $host) { 1113 @file_get_contents($host . '_cache/clear'); 1125 private function getTables($sites, $lang = '', &$relations = []) { 1128 $lang = $_REQUEST['lang'] ? $_REQUEST['lang'] : ($GLOBALS['site'] ? $GLOBALS['site']->language : null); 1130 foreach ($sites as $site) { 1132 if (is_string($site)) { 1133 $site = new Site($site); 1136 $site->setLanguage($lang); 1138 } catch (Exception $e) { 1139 // Mandant existiert nicht in dieser Sprache, ignorieren 1142 $table = strtolower($site->pageTable); 1143 $tables = $tables . ',' . $table; 1144 $relations[$table] = $site; 1146 return ltrim($tables, ','); 1156 private function error($e) { 1157 $response = @json_decode($e->getMessage(), true); 1159 if ($response['error']) { 1160 if ($response['error']['root_cause']) { 1161 foreach ($response['error']['root_cause'] as $cause) { 1162 $messages[] = $cause['type'] . ' for index ' . $cause['index']; 1165 if ($response['error']['failed_shards']) { 1166 foreach ($response['error']['failed_shards'] as $shard) { 1167 if (!empty($shard['reason']['caused_by'])) { 1168 $messages[] = 'caused by ' . $shard['reason']['caused_by']['type'] . ' (reason: ' . $shard['reason']['caused_by']['reason'] . ')'; 1173 if (empty($messages)) { 1174 $messages[] = $e->getMessage(); 1176 egotec_error_log('Elastic Exception thrown (' . get_class($e) . ', Code ' . $e->getCode() . '): ' . implode('; ', $messages)); 1177 Ego_System::header($response['status'] ?? 400); update($id, $page, $count=[])
__construct($table='', $param=[])
getSuggestions($query, $sites=[], $max=5)
static checkLicence($ini_path)
globalSearch($search, $sites=[], $query=[], $param=[], $sort=[], $filter='')
setExtraQuery($query, $bind=[])
static arrayFlatRecursive($array, $callback=null)
static getAllSites($username='', $perm='', $table=false, $type='')
update($param=array(), $matrix_flag=true, $asis=false)
static file_exists($file)
_getExtra($page, $clean=false)
sortPages($pages, $order='', $sort=array())
getMediaFilename($force_lang=false, $suffix="")
search($search, $relation, $query, $filter='', $fuzzy=false)
static file_get_contents($filename, $utf8=true, $context=null)
static filterNonUtf8($s, $substitute="", $strict=false)