includes/search.php
changeset 334 c72b545f1304
parent 322 5f1cd51bf1be
child 335 67bd3121a12e
equal deleted inserted replaced
333:32429702305e 334:c72b545f1304
   110  *   page_text: string, the contents of the page (trimmed to ~150 bytes if necessary)
   110  *   page_text: string, the contents of the page (trimmed to ~150 bytes if necessary)
   111  *   score: numerical relevance score, 1-100, rounded to 2 digits and calculated based on which terms were present and which were not
   111  *   score: numerical relevance score, 1-100, rounded to 2 digits and calculated based on which terms were present and which were not
   112  * @param string Search query
   112  * @param string Search query
   113  * @param string Will be filled with any warnings encountered whilst parsing the query
   113  * @param string Will be filled with any warnings encountered whilst parsing the query
   114  * @param bool Case sensitivity - defaults to false
   114  * @param bool Case sensitivity - defaults to false
       
   115  * @param array|reference Will be filled with the parsed list of words.
   115  * @return array
   116  * @return array
   116  */
   117  */
   117 
   118 
   118 function perform_search($query, &$warnings, $case_sensitive = false)
   119 function perform_search($query, &$warnings, $case_sensitive = false, &$word_list)
   119 {
   120 {
   120   global $db, $session, $paths, $template, $plugins; // Common objects
   121   global $db, $session, $paths, $template, $plugins; // Common objects
   121   $warnings = array();
   122   $warnings = array();
   122 
   123 
   123   $query = parse_search_query($query, $warnings);
   124   $query = parse_search_query($query, $warnings);
   520       }
   521       }
   521     }
   522     }
   522     if ( isset($scores[$idstring]) )
   523     if ( isset($scores[$idstring]) )
   523     {
   524     {
   524       $page_data[$idstring] = array(
   525       $page_data[$idstring] = array(
   525           'page_name' => $page['name'],
   526           'page_name' => highlight_search_result($page['name'], $word_list, $case_sensitive),
   526           'page_text' => '',
   527           'page_text' => '',
   527           'page_id' => $page['urlname_nons'],
   528           'page_id' => $page['urlname_nons'],
   528           'namespace' => $page['namespace'],
   529           'namespace' => $page['namespace'],
   529           'score' => $scores[$idstring],
   530           'score' => $scores[$idstring],
   530           'page_length' => 1,
   531           'page_length' => 1,
   590 
   591 
   591     // Calculate score
   592     // Calculate score
   592     // if ( $score > $divisor )
   593     // if ( $score > $divisor )
   593     //   $score = $divisor;
   594     //   $score = $divisor;
   594     $datum['score'] = round($score / $divisor, 2) * 100;
   595     $datum['score'] = round($score / $divisor, 2) * 100;
       
   596     
       
   597     // Highlight the URL
       
   598     $datum['url_highlight'] = makeUrlComplete($datum['namespace'], $datum['page_id']);
       
   599     $datum['url_highlight'] = preg_replace('/\?.+$/', '', $datum['url_highlight']);
       
   600     $datum['url_highlight'] = highlight_search_result($datum['url_highlight'], $word_list, $case_sensitive);
   595 
   601 
   596     // Store it in our until-now-unused results array
   602     // Store it in our until-now-unused results array
   597     $results[] = $datum;
   603     $results[] = $datum;
   598   }
   604   }
   599 
   605 
   611  * @return array
   617  * @return array
   612  */
   618  */
   613 
   619 
   614 function parse_search_query($query, &$warnings)
   620 function parse_search_query($query, &$warnings)
   615 {
   621 {
       
   622   global $lang;
       
   623   
   616   $stopwords = get_stopwords();
   624   $stopwords = get_stopwords();
   617   $ret = array(
   625   $ret = array(
   618     'any' => array(),
   626     'any' => array(),
   619     'req' => array(),
   627     'req' => array(),
   620     'not' => array()
   628     'not' => array()
   661 
   669 
   662     $ticker++;
   670     $ticker++;
   663 
   671 
   664     if ( $ticker == 20 )
   672     if ( $ticker == 20 )
   665     {
   673     {
   666       $warnings[] = 'Some of your search terms were excluded because searches are limited to 20 terms to prevent excessive server load.';
   674       $warnings[] = $lang->get('search_err_query_too_many_terms');
   667       break;
   675       break;
   668     }
   676     }
   669 
   677 
   670     if ( substr ( $atom, 0, 2 ) == '+"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' )
   678     if ( substr ( $atom, 0, 2 ) == '+"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' )
   671     {
   679     {
   672       $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) );
   680       $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) );
   673       if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
   681       if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
   674       {
   682       {
   675         $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.';
   683         $warnings[] = $lang->get('search_err_query_has_stopwords');
   676         $ticker--;
   684         $ticker--;
   677         continue;
   685         continue;
   678       }
   686       }
   679       if(in_array($word, $ret['req']))
   687       if(in_array($word, $ret['req']))
   680       {
   688       {
   681         $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.';
   689         $warnings[] = $lang->get('search_err_query_dup_terms');
   682         $ticker--;
   690         $ticker--;
   683         continue;
   691         continue;
   684       }
   692       }
   685       $ret['req'][] = $word;
   693       $ret['req'][] = $word;
   686     }
   694     }
   687     elseif ( substr ( $atom, 0, 2 ) == '-"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' )
   695     elseif ( substr ( $atom, 0, 2 ) == '-"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' )
   688     {
   696     {
   689       $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) );
   697       $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) );
   690       if ( strlen ( $word ) < 4 )
   698       if ( strlen ( $word ) < 4 )
   691       {
   699       {
   692         $warnings[] = 'One or more of your search terms was excluded because terms must be at least 4 characters in length.';
   700         $warnings[] = $lang->get('search_err_query_term_too_short');
   693         $ticker--;
   701         $ticker--;
   694         continue;
   702         continue;
   695       }
   703       }
   696       if(in_array($word, $ret['not']))
   704       if(in_array($word, $ret['not']))
   697       {
   705       {
   698         $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.';
   706         $warnings[] = $lang->get('search_err_query_dup_terms');
   699         $ticker--;
   707         $ticker--;
   700         continue;
   708         continue;
   701       }
   709       }
   702       $ret['not'][] = $word;
   710       $ret['not'][] = $word;
   703     }
   711     }
   704     elseif ( substr ( $atom, 0, 1 ) == '+' )
   712     elseif ( substr ( $atom, 0, 1 ) == '+' )
   705     {
   713     {
   706       $word = substr ( $atom, 1 );
   714       $word = substr ( $atom, 1 );
   707       if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
   715       if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
   708       {
   716       {
   709         $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.';
   717         $warnings[] = $lang->get('search_err_query_has_stopwords');
   710         $ticker--;
   718         $ticker--;
   711         continue;
   719         continue;
   712       }
   720       }
   713       if(in_array($word, $ret['req']))
   721       if(in_array($word, $ret['req']))
   714       {
   722       {
   715         $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.';
   723         $warnings[] = $lang->get('search_err_query_dup_terms');
   716         $ticker--;
   724         $ticker--;
   717         continue;
   725         continue;
   718       }
   726       }
   719       $ret['req'][] = $word;
   727       $ret['req'][] = $word;
   720     }
   728     }
   721     elseif ( substr ( $atom, 0, 1 ) == '-' )
   729     elseif ( substr ( $atom, 0, 1 ) == '-' )
   722     {
   730     {
   723       $word = substr ( $atom, 1 );
   731       $word = substr ( $atom, 1 );
   724       if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
   732       if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
   725       {
   733       {
   726         $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.';
   734         $warnings[] = $lang->get('search_err_query_has_stopwords');
   727         $ticker--;
   735         $ticker--;
   728         continue;
   736         continue;
   729       }
   737       }
   730       if(in_array($word, $ret['not']))
   738       if(in_array($word, $ret['not']))
   731       {
   739       {
   732         $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.';
   740         $warnings[] = $lang->get('search_err_query_dup_terms');
   733         $ticker--;
   741         $ticker--;
   734         continue;
   742         continue;
   735       }
   743       }
   736       $ret['not'][] = $word;
   744       $ret['not'][] = $word;
   737     }
   745     }
   738     elseif ( substr ( $atom, 0, 1 ) == '"' && substr ( $atom, ( strlen($atom) - 1 ), 1 ) == '"' )
   746     elseif ( substr ( $atom, 0, 1 ) == '"' && substr ( $atom, ( strlen($atom) - 1 ), 1 ) == '"' )
   739     {
   747     {
   740       $word = substr ( $atom, 1, ( strlen ( $atom ) - 2 ) );
   748       $word = substr ( $atom, 1, ( strlen ( $atom ) - 2 ) );
   741       if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
   749       if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
   742       {
   750       {
   743         $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.';
   751         $warnings[] = $lang->get('search_err_query_has_stopwords');
   744         $ticker--;
   752         $ticker--;
   745         continue;
   753         continue;
   746       }
   754       }
   747       if(in_array($word, $ret['any']))
   755       if(in_array($word, $ret['any']))
   748       {
   756       {
   749         $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.';
   757         $warnings[] = $lang->get('search_err_query_dup_terms');
   750         $ticker--;
   758         $ticker--;
   751         continue;
   759         continue;
   752       }
   760       }
   753       $ret['any'][] = $word;
   761       $ret['any'][] = $word;
   754     }
   762     }
   755     else
   763     else
   756     {
   764     {
   757       $word = $atom;
   765       $word = $atom;
   758       if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
   766       if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
   759       {
   767       {
   760         $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.';
   768         $warnings[] = $lang->get('search_err_query_has_stopwords');
   761         $ticker--;
   769         $ticker--;
   762         continue;
   770         continue;
   763       }
   771       }
   764       if(in_array($word, $ret['any']))
   772       if(in_array($word, $ret['any']))
   765       {
   773       {
   766         $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.';
   774         $warnings[] = $lang->get('search_err_query_dup_terms');
   767         $ticker--;
   775         $ticker--;
   768         continue;
   776         continue;
   769       }
   777       }
   770       $ret['any'][] = $word;
   778       $ret['any'][] = $word;
   771     }
   779     }