DisplayErrorVisible(); # Коды возврата поискового движка define ("SRC_OK", 0x00000000); //Поиск выполнен успешно define ("SRC_EMPTY_QUERY", 0x00000001); //Пустой поисковый запрос define ("SRC_SHORT_QUERY", 0x00000002); //Короткий поисковый запрос define ("SRC_NOT_FOUND", 0x00000003); //Не найдено class CSEngine{ var $query; //Строка запроса var $page; //Номер текущей страницы var $max_on_page; //Количество элементов на странице var $el_on_page; //Идентификатор количества на странице var $cnt_results; //Количество найденных позиций var $total_pages; //Количество страниц var $results; //Результаты поиска var $qsl_where; //Условия для запроса var $search_banner; //Баннер для поискового запроса function GetSufixPage(){ $v = substr("".$this->cnt_results."", -1); if ($v=="1") return "страница"; if (($v=="2")||($v=="3")||($v=="4")) return "страницы"; return "страниц"; } # Получить реальный IP пользователя function GetRealIp(){ if (!empty($_SERVER['HTTP_CLIENT_IP'])){ $ip=$_SERVER['HTTP_CLIENT_IP']; } elseif (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])){ $ip=$_SERVER['HTTP_X_FORWARDED_FOR']; }else{ $ip=$_SERVER['REMOTE_ADDR']; } return $ip; } # Записль лога function WriteLog($text){ $ip=$this->GetRealIp(); $n=$this->cnt_results; // Debug()->Out("REPLACE INTO users_search(id,text,date,found,user_ip) VALUES(id=LAST_INSERT_ID(id+1), '$text', NOW(), '$n', '$ip')"); $res = SendQuery("INSERT IGNORE INTO users_search(id,text,date,found,user_ip) VALUES(id=LAST_INSERT_ID(id+1), '$text', NOW(), '$n', '$ip')"); } function isExistsRus($str){ for ($i=0; $i= ord('а')) && ($char <=ord('я'))){ return false; } } return true; } function __construct() { //Устанавливаем русскую кодировку //setlocale(LC_ALL, "ru_RU.CP1251"); $this->cnt_results=0; $this->page=1; $this->el_on_page = (int)(isset($_COOKIE['pg_search'])?($_COOKIE['pg_search']):0); $this->max_on_page = CFilter::GetPageCnt($this->el_on_page); if (isset($_GET['p'])){ $p = strip_tags($_GET['p']); if (is_numeric($p)) $this->page=$p; } $this->total_pages=1; //Получаем строку запроса из GET параметра вне зависимости от кодировки if (isset($_GET['s'])){ $vl = $_GET['s']; if ( $this->isExistsRus($vl) ){ $s = ConvText(urldecode($vl)); if ($s=="") $s = strip_tags($vl); }else{ $s = strip_tags($vl); } $s=str_replace("\'", '′', $s); $this->query = FiltringSearchString($s); $this->query=str_replace("′", "\'", $this->query); }else{ $this->query = ""; } } # Проверяет являетлся ли слово русским function isWordRu($word){ if (preg_match("/^[А-Яа-я]+$/i", $word)) return true; return false; } # Проверяет являетлся ли слово английским function isWordEn($word){ if (preg_match("/^[A-Za-z]+$/i", $word)) return true; return false; } function GetCharType($char){ $type = 0; if ($this->isWordRu($char)){ $type = 1; }elseif($this->isWordEn($char)){ $type = 2; }elseif(is_numeric($char)){ $type = 3; } return $type; } #Удаляем стоп слова function ClearStopWords($list){ $f = file("engine/search/stopwords.tbl"); for ($i = 0; $i $elem){ //$word = mb_strtolower($elem['word'], "windows-1251"); $word = mb_strtolower($elem['word'], "utf-8"); if ($word == $line){ unset($list[$n]); } } } return $list; } function GetSinonimWords($list){ #Открываем файл с синомимами $f = file("engine/search/synonym.tbl"); foreach($list as $k => $elem){ //$word = mb_strtolower($elem['word'], "windows-1251"); $word = mb_strtolower($elem['word'], "utf-8"); $list[$k]['base']=array(); for ($i = 0; $iGetCharType($char); # Допускается ли символ к обработке if (!isNormalChar($char)) $char=" "; $tp = $this->GetCharType($char); if (isSpace($char)){ $w = trim($w); if ($w!="") $wl[]=array('word'=>strtoupper($w), 'type'=>$type); $w=""; $type=-1; }elseif(($tp!=0)&&($type!=4)){ if ($type!=$tp) $type=5; $w.=$char; }else{ if (($char=='.')||($char==',')){ if (preg_match("#[.|,]#", $w)==0){ $type=4; $tp=4; }else{ $type=5; } } elseif(!is_numeric($char)){ $type=5; } $w.=$char; } } $w = trim($w); if ($w!="") $wl[]=array('word'=>strtoupper($w), 'type'=>$type); #Удаляем стоп слова $wl = $this->ClearStopWords($wl); $wl = $this->GetSinonimWords($wl); # Инициализация морфологии для русского языка $opts = array( 'storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true, ); $dir = dirname(__FILE__) . '/dicts/ru'; $lang = 'ru_RU'; try{ $morphy = new phpMorphy($dir, $lang, $opts); } catch(phpMorphy_Exception $e) { die('Error occured while creating phpMorphy RU instance: ' . PHP_EOL . $e); return false; } $hd_words = array(); foreach($wl as $i => $elem){ $tp = $elem['type']; $wr = $elem['word']; $bz = $elem['base']; #Ищем словосочетания foreach($bz as $tt){ if(strpos($tt, " ")>0){ $hd_words[]=$tt; } } if ($tp==1 || $tp==2 || $tp==5){ if ($tp==1){ $bs = $morphy->getBaseForm($wr); if ($bs!=$wr){ if (is_array($bs)){ foreach($bs as $t){ if (!in_array($t, $elem['base'])){ array_unshift($wl[$i]['base'], $t); } } }else{ if (!in_array($bs, $elem['base'])){ array_unshift($wl[$i]['base'], $bs); } } } } if (false===($fm=$morphy->getAllForms($wr))){ $wl[$i]['wrd_forms'] = array(); }else{ $wl[$i]['wrd_forms'] = $fm; } } } unset($morphy); //Работаем со словосочетаниями //---------------------------- $sins = GetSynonymWords($hd_words); foreach ($sins as $w){ $lt = explode(" ", $w[0]); foreach($lt as $bs){ foreach ($wl as $j => $elem){ if ($elem['word']==strtoupper($bs)){ for ($i=1; $i$data['caption'], "description"=>$data['description'], "id"=>$data['id'], "url"=>$url['url'], "group"=>$url['base'], "parent_id"=>$data['parent_id'], "code_1c"=>FormatCode1C($data['code_1c']), "image"=>$data['img'], "rel"=>999999.0 * 10000); //Релевантность максимальна $this->cnt_results+=1; } return $result; } #$max_sin_words - макс. кол-во синонимов в запросе function BuildMySqlWhere($src_words, $max_sin_words=2){ $wr = ""; $first=true; foreach ($src_words as $l){ if (!$first) $wr .= "AND"; $wr.="("; if ($l['type']==5){ //Составное слово $exp = ""; $hrd_words = $this->ExstractHardWords($l['word']); $separator = "(([[:print:]]+)?|([[:punct:]]+)?)?"; $n=0; foreach ($hrd_words as $w){ $n+=1; if ($exp!=""){ $exp .= $separator; }else{ $exp .= "([[:<:]]".$w.")"; continue; } if ($n==count($hrd_words)){ $exp .= "(".$w."[[:>:]])"; }else{ $exp .= "(".$w.")"; } } if ($exp != ""){ $wr .= "%field% RLIKE ('".$exp."')"; } foreach ($l['base'] as $base){ $wr .= "OR "."%field% RLIKE ('[[:<:]]".$base."[[:>:]]')"; } foreach ($l['wrd_forms'] as $forms){ $wr .= "OR "."%field% RLIKE ('[[:<:]]".$forms."[[:>:]]')"; } //------------------------- }elseif($l['type']==4){ //Число с точкой $tv = str_replace(",", ".", $l['word']); $vr = explode('.',$tv); $exp = "[[:<:]]".($vr[0])."[.|,]".($vr[1])."[^0-9]"; $wr .= "%field% RLIKE ('".$exp."')"; }elseif($l['type']==3){ //Число целое $exp = "[[:<:]]".$l['word']."[^0-9]"; $wr .= "%field% RLIKE ('".$exp."')"; if (strlen($l['word'])==5){ //Ищем по коду заказа (если длина строки равна 5 - т.е. это длина кода) $wr .= "OR (code_1c=".$l['word'].")"; } }else{ $exp = "([[:<:]]".$l['word']."[[:>:]])"; foreach ($l['base'] as $base){ if ($base!=$l['word']){ if (strlen($base)>0){ $exp .= "|([[:<:]]".$base.")"; //Не ставим завершения слова } } } foreach ($l['wrd_forms'] as $forms){ if (is_array($forms)){ $n = $max_sin_words; foreach ($forms as $form){ if ($l['word']!=$form){ $exp .= "|([[:<:]]".$form."[[:>:]])"; $n -= 1; if ($n<=0) break; //Уменьшаем количество слов. т.к. получаются очень тяжелые запросы } } }else{ if ($l['word']!=$forms){ $exp .= "|([[:<:]]".$forms."[[:>:]])"; } } } $wr .= "%field% RLIKE ('".$exp."')"; } $wr.=")"; $first=false; } return $wr; } # Поиск по базе данных function SearchOnDataBase($src_words){ $result=NULL; // if (ereg('%field%', $this->qsl_where)){ if (preg_match('/%field%/', $this->qsl_where)){ $where = str_replace("%field%", 'UPPER(CONCAT(caption, " ", description))', $this->qsl_where); $this->cnt_results+=GetCountResQuery("SELECT COUNT(*) FROM products WHERE visible=1 AND (".$where.")"); $Q = "SELECT parent_id, id FROM products WHERE visible=1 AND (".$where.")"; $ids = array(); $res = SendQuery($Q); while ($row=mysql_fetch_array($res)){ $ids[]=$row['id']; } if (!empty($ids)){ $rws = GetProductURLsFromId($ids, array('caption', 'description', 'code_1c', 'img', 'parent_id')); //GetPathToElement($ids); foreach ($rws as $l){ $result[] = array("caption"=>$l['caption'], // "description"=>$l['description'], "description"=>str_ireplace("
", " ", $l['description']), "id"=>$l['id'], "url"=>$l['url'], "group"=>$l['base'], "parent_id"=>$l['parent_id'], "code_1c"=>FormatCode1C($l['code_1c']), "image"=>$l['img'], "rel"=>1.0); } } } return $result; } # Поиск по сайту IvZip function SearchOnIvZip($src_words){ $result=NULL; // if (ereg('%field%', $this->qsl_where)){ if (preg_match('/%field%/', $this->qsl_where)){ $where = str_replace("%field%", 'UPPER(description)', $this->qsl_where); $this->cnt_results+=GetCountResQuery("SELECT COUNT(*) FROM iz_prod WHERE visible=1 AND (".$where.")"); $Q = "SELECT id, parent_id, description, code_1c, image FROM iz_prod WHERE visible=1 AND (".$where.")"; $res = SendQuery($Q); while ($row=mysql_fetch_array($res)){ $result[] = array("caption"=>"", "description"=>$row['description'], "id"=>$row['id'], "url"=>"http://ivzip.ru/index.php?catid=".$row['parent_id']."&pid=".$row['code_1c'], "group"=>-1, "parent_id"=>$row['parent_id'], "code_1c"=>FormatCode1C($row['code_1c']), "image"=>$row['image'], "rel"=>0.25); } } return $result; } function NormalizeCode($text){ $text = preg_replace("#<[\s]*sup[\s]*>([^<]*)<[\s]*/sup[\s]*>#i", '[sup]\\1[/sup]', $text); $text = preg_replace("#<[\s]*sub[\s]*>([^<]*)<[\s]*/sub[\s]*>#i", '[sub]\\1[/sub]', $text); $text = preg_replace("#<[\s]*b[\s]*>([^<]*)<[\s]*/b[\s]*>#i", '[b]\\1[/b]', $text); $text = preg_replace("/<.*?>/s", ' ', $text); $text = preg_replace("/ /s", ' ', $text); $text = trim(preg_replace("/[\r\n\s]+/s", ' ', $text)); $text = Replace_BB_Code($text); if (strlen($text)>255){ $begin = 0; if (preg_match("##is", $text, $matches, PREG_OFFSET_CAPTURE)){ if ($matches[0][1]>10){ $begin = $matches[0][1] - 5; } } $text = TruncateText($text, $begin, 255)."..."; if ($begin){ $text="...".$text; } } return $text; } # Выделяем найденный текст жирным function Highlight($src_words){ $cols = array('word', 'base', 'wrd_forms'); $highlightWords = array(); foreach($this->results as $i => $elem){ $capt = strip_tags($elem['caption']); $discr = strip_tags($elem['description']); $code = strip_tags($elem['code_1c']); foreach($cols as $cl){ foreach ($src_words as $nd){ $tt = $nd[$cl]; if (is_array($tt)){ foreach ($tt as $t){ if (is_array($t)){ foreach ($t as $nt){ if (($nt!="")&&(!in_array("#\b($nt)\b#is", $highlightWords))){ $highlightWords[] = "#\b($nt)\b#is"; } } }else{ if (($t!="")&&(!in_array("#\b($t)\b#is", $highlightWords))){ $highlightWords[] = "#\b($t)\b#is"; } } } }else{ if (($tt!="")&&(!in_array("#\b($tt)\b#is", $highlightWords))){ $highlightWords[] = "#\b($tt)\b#is"; } } } } $this->results[$i]['caption'] = $this->NormalizeCode(preg_replace(array_reverse($highlightWords), '[highlight]$1[/highlight]', $capt)); $this->results[$i]['description'] = $this->NormalizeCode(preg_replace(array_reverse($highlightWords), '[highlight]$1[/highlight]', $discr)); $this->results[$i]['code_1c'] = $this->NormalizeCode(preg_replace(array_reverse($highlightWords), '[highlight]$1[/highlight]', $code)); } } # Выполняет сортировку результата поиска function SortResults($src_words){ $columns=array(array('name'=>'caption', 'rel'=>1000),array('name'=>'description', 'rel'=>8)); for($i=0; $iresults); $i++){ foreach($columns as $gr){ $capt = strtoupper($this->results[$i][$gr['name']]); if (empty($capt)){ $rel=1; }else{ $omega = 1/strlen($capt); $rel = $gr['rel'] * $omega; } foreach ($src_words as $wrd){ $n = substr_count($capt, $wrd['word']); if ($n>0){ $this->results[$i]['rel']*=($n+1)*$rel; }else{ if (is_array($wrd['base'])){ foreach($wrd['base'] as $mt){ if ($n = substr_count($capt, $mt) > 0) break; } }else{ $n = substr_count($capt, $wrd['base']); } if ($n>0){ $this->results[$i]['rel']*=($n+1)*$rel*0.9; }else{ $decl = 0.8; if (isset($wrd['wrd_forms'])){ foreach($wrd['wrd_forms'] as $wt){ $n = substr_count($capt, $wrd['base']); if ($n>0){ $this->results[$i]['rel']*=($n+1)*$rel*$decl; break; } $decl /= 2.0; } } } } } } } usort($this->results, 'cmp_ar_capt_results'); usort($this->results, 'cmp_ar_results'); } # Получить результат поиска с учетом настроек и страницы function GetResult(){ if ($this->el_on_page==4) return $this->results; $offset = $this->max_on_page*($this->page-1); return array_slice($this->results, $offset, $this->max_on_page); } # Получить номер стартовой позиций с учетом фильтра function GetStartIndex(){ $res = 1; if ($this->el_on_page!=4){ $res = $this->max_on_page*($this->page-1) + 1; } return $res; } # Поиск шаблона для баннера по поисковому запросу function SearchWordsBanner($words){ $res = null; if ($this->page == 1){ $f = file("engine/search/banner_words.lst"); for ($i = 0; $i',$line); foreach(explode(' ', trim($ln)) as $n => $elem){ $wcount++; foreach($words as $m => $element_words){ $fined=false; if (strcasecmp($element_words['word'], $elem)==0){ $wc_found++; $fined=true; } if ($fined) break; if (isset($element_words['wrd_forms'])){ foreach($element_words['wrd_forms'] as $k => $el){ if (strcasecmp($el, $elem)==0){ $wc_found++; $fined=true; break; } } } if ($fined) break; foreach($element_words['base'] as $k => $el){ if (strcasecmp($el, $elem)==0){ $wc_found++; $fined=true; break; } } if ($fined) break; } } if ($wcount == $wc_found){ $res = trim($tpl); break; } } } return $res; } # Выполняет поиск function Execute(){ $this->cnt_results=0; if ($this->query == "") return SRC_EMPTY_QUERY; if (strlen($this->query) < 3) return SRC_SHORT_QUERY; $src_words = $this->MakeWordsFromStr($this->query); $this->qsl_where = $this->BuildMySqlWhere($src_words); $this->search_banner = $this->SearchWordsBanner($src_words); //Debug()->Out($src_words); # Поиск по точному совпадению в заголовке $r = $this->SearchToCaptions($this->query); if ($r!=NULL) $this->results[]=$r; //Debug()->Out($r); # Поиск по базе данных $r = $this->SearchOnDataBase($src_words); //Debug()->Out($r); if ($r!=NULL){ foreach($r as $s){ $prodid = $s['id']; $found = false; if (isset($this->results)){ foreach($this->results as $search){ if ($search['id'] == $s['id']){ $found = true; break; } } } if (!$found) $this->results[] = $s; } } # Поиск на сайте IvZIP //$r = $this->SearchOnIvZip($src_words); $r = NULL; //Debug()->Out($r); if ($r!=NULL){ foreach($r as $s){ $this->results[] = $s; } } //Debug()->Out($this->query); $this->WriteLog($this->query); //Debug()->Out($this->query); if ($this->cnt_results == 0) return SRC_NOT_FOUND; $this->Highlight($src_words); $this->SortResults($src_words); $this->total_pages = ceil($this->cnt_results/$this->max_on_page); if ($this->page > $this->total_pages) $this->page=$this->total_pages; $this->query = str_replace(array('"', "\'"), array('"', '’'), $this->query); return SRC_OK; } }; /* вспомогательные функции -------------------------------- */ function isSpace($char){ return (preg_match("#[\s\-()]#", $char)!=0); } #Допустимый символ function isNormalChar($char){ return (preg_match("#[\s\-A-Za-z():\.,0-9А-Яа-яёЁ@]#", $char)!=0); } function cmp_ar_capt_results($a, $b){ return strcmp($a["caption"], $b["caption"]) * -1; } function cmp_ar_results($a, $b){ if ($a["rel"] == $b["rel"]) return 0; if ($a["rel"] > $b["rel"]) return -1; return 1; } ?>