search.php File Reference

Go to the source code of this file.

Functions

 search_results ()
 matches_text ($num)
 report_matches ()
 end_form ($value)
 readInt ($file)
 readString ($file)
 readHeader ($file)
 computeIndex ($word)
 search ($file, $word, &$statsList)
 combine_results ($results, &$docs)
 filter_results ($docs, &$requiredWords, &$forbiddenWords)
 compare_rank ($a, $b)
 sort_results ($docs, &$sorted)
 report_results (&$docs)
 main ()


Function Documentation

combine_results ( results,
&$  docs 
)

Definition at line 196 of file search.php.

Referenced by main().

00197 {
00198   foreach ($results as $wordInfo)
00199   {
00200     $docsList = &$wordInfo["docs"];
00201     foreach ($docsList as $di)
00202     {
00203       $key=$di["url"];
00204       $rank=$di["rank"];
00205       if (in_array($key, array_keys($docs)))
00206       {
00207         $docs[$key]["rank"]+=$rank;
00208       }
00209       else
00210       {
00211         $docs[$key] = array("url"=>$key,
00212             "name"=>$di["name"],
00213             "rank"=>$rank
00214             );
00215       }
00216       $docs[$key]["words"][] = array(
00217                "word"=>$wordInfo["word"],
00218                "match"=>$wordInfo["match"],
00219                "freq"=>$di["freq"]
00220                );
00221     }
00222   }
00223   return $docs;
00224 }

compare_rank ( a,
b 
)

Definition at line 266 of file search.php.

00267 {
00268   if ($a["rank"] == $b["rank"]) 
00269   {
00270     return 0;
00271   }
00272   return ($a["rank"]>$b["rank"]) ? -1 : 1; 
00273 }

computeIndex ( word  ) 

Definition at line 74 of file search.php.

Referenced by search().

00075 {
00076   // Fast string hashing
00077   //$lword = strtolower($word);
00078   //$l = strlen($lword);
00079   //for ($i=0;$i<$l;$i++)
00080   //{
00081   //  $c = ord($lword{$i});
00082   //  $v = (($v & 0xfc00) ^ ($v << 6) ^ $c) & 0xffff;
00083   //}
00084   //return $v;
00085 
00086   // Simple hashing that allows for substring search
00087   if (strlen($word)<2) return -1;
00088   // high char of the index
00089   $hi = ord($word{0});
00090   if ($hi==0) return -1;
00091   // low char of the index
00092   $lo = ord($word{1});
00093   if ($lo==0) return -1;
00094   // return index
00095   return $hi*256+$lo;
00096 }

end_form ( value  ) 

Definition at line 48 of file search.php.

Referenced by main().

00049 {
00050   echo "            <td><input type=\"text\" name=\"query\" value=\"$value\" size=\"20\" accesskey=\"s\"/></td>\n          </tr>\n        </table>\n      </form>\n    </li>\n  </ul>\n</div>\n";
00051 }

filter_results ( docs,
&$  requiredWords,
&$  forbiddenWords 
)

Definition at line 226 of file search.php.

Referenced by main().

00227 {
00228   $filteredDocs=array();
00229   while (list ($key, $val) = each ($docs)) 
00230   {
00231     $words = &$docs[$key]["words"];
00232     $copy=1; // copy entry by default
00233     if (sizeof($requiredWords)>0)
00234     {
00235       foreach ($requiredWords as $reqWord)
00236       {
00237         $found=0;
00238         foreach ($words as $wordInfo)
00239         { 
00240           $found = $wordInfo["word"]==$reqWord;
00241           if ($found) break;
00242         }
00243         if (!$found) 
00244         {
00245           $copy=0; // document contains none of the required words
00246           break;
00247         }
00248       }
00249     }
00250     if (sizeof($forbiddenWords)>0)
00251     {
00252       foreach ($words as $wordInfo)
00253       {
00254         if (in_array($wordInfo["word"],$forbiddenWords))
00255         {
00256           $copy=0; // document contains a forbidden word
00257           break;
00258         }
00259       }
00260     }
00261     if ($copy) $filteredDocs[$key]=$docs[$key];
00262   }
00263   return $filteredDocs;
00264 }

main (  ) 

Definition at line 324 of file search.php.

References combine_results(), end_form(), filter_results(), readHeader(), report_results(), search(), and sort_results().

00325 {
00326   if(strcmp('4.1.0', phpversion()) > 0) 
00327   {
00328     die("Error: PHP version 4.1.0 or above required!");
00329   }
00330   if (!($file=fopen("search.idx","rb"))) 
00331   {
00332     die("Error: Search index file could NOT be opened!");
00333   }
00334   if (readHeader($file)!="DOXS")
00335   {
00336     die("Error: Header of index file is invalid!");
00337   }
00338   $query="";
00339   if (array_key_exists("query", $_GET))
00340   {
00341     $query=$_GET["query"];
00342   }
00343   end_form($query);
00344   echo "&nbsp;\n<div class=\"searchresults\">\n";
00345   $results = array();
00346   $requiredWords = array();
00347   $forbiddenWords = array();
00348   $foundWords = array();
00349   $word=strtok($query," ");
00350   while ($word) // for each word in the search query
00351   {
00352     if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
00353     if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
00354     if (!in_array($word,$foundWords))
00355     {
00356       $foundWords[]=$word;
00357       search($file,strtolower($word),$results);
00358     }
00359     $word=strtok(" ");
00360   }
00361   $docs = array();
00362   combine_results($results,$docs);
00363   // filter out documents with forbidden word or that do not contain
00364   // required words
00365   $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
00366   // sort the results based on rank
00367   $sorted = array();
00368   sort_results($filteredDocs,$sorted);
00369   // report results to the user
00370   report_results($sorted);
00371   echo "</div>\n";
00372   fclose($file);
00373 }

Here is the call graph for this function:

matches_text ( num  ) 

Definition at line 28 of file search.php.

00029 {
00030   if ($num==0)
00031   {
00032     return "Sorry, no documents matching your query.";
00033   }
00034   else if ($num==1)
00035   {
00036     return "Found <b>1</b> document matching your query.";
00037   }
00038   else // $num>1
00039   {
00040     return "Found <b>$num</b> documents matching your query. Showing best matches first.";
00041   }
00042 }

readHeader ( file  ) 

Definition at line 67 of file search.php.

Referenced by main().

00068 {
00069   $header =fgetc($file); $header.=fgetc($file);
00070   $header.=fgetc($file); $header.=fgetc($file);
00071   return $header;
00072 }

readInt ( file  ) 

Definition at line 53 of file search.php.

Referenced by search().

00054 {
00055   $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
00056   $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
00057   return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
00058 }

readString ( file  ) 

Definition at line 60 of file search.php.

Referenced by search().

00061 {
00062   $result="";
00063   while (ord($c=fgetc($file))) $result.=$c;
00064   return $result;
00065 }

report_matches (  ) 

Definition at line 44 of file search.php.

00045 {
00046   return "Matches: ";
00047 }

report_results ( &$  docs  ) 

Definition at line 282 of file search.php.

Referenced by main().

00283 {
00284   echo "<table cellspacing=\"2\">\n";
00285   echo "  <tr>\n";
00286   echo "    <td colspan=\"2\"><h2>".search_results()."</h2></td>\n";
00287   echo "  </tr>\n";
00288   $numDocs = sizeof($docs);
00289   if ($numDocs==0)
00290   {
00291     echo "  <tr>\n";
00292     echo "    <td colspan=\"2\">".matches_text(0)."</td>\n";
00293     echo "  </tr>\n";
00294   }
00295   else
00296   {
00297     echo "  <tr>\n";
00298     echo "    <td colspan=\"2\">".matches_text($numDocs);
00299     echo "\n";
00300     echo "    </td>\n";
00301     echo "  </tr>\n";
00302     $num=1;
00303     foreach ($docs as $doc)
00304     {
00305       echo "  <tr>\n";
00306       echo "    <td align=\"right\">$num.</td>";
00307       echo     "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
00308       echo "  <tr>\n";
00309       echo "    <td></td><td class=\"tiny\">".report_matches()." ";
00310       foreach ($doc["words"] as $wordInfo)
00311       {
00312         $word = $wordInfo["word"];
00313         $matchRight = substr($wordInfo["match"],strlen($word));
00314         echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
00315       }
00316       echo "    </td>\n";
00317       echo "  </tr>\n";
00318       $num++;
00319     }
00320   }
00321   echo "</table>\n";
00322 }

search ( file,
word,
&$  statsList 
)

Definition at line 98 of file search.php.

References computeIndex(), readInt(), and readString().

Referenced by main().

00099 {
00100   $index = computeIndex($word);
00101   if ($index!=-1) // found a valid index
00102   {
00103     fseek($file,$index*4+4); // 4 bytes per entry, skip header
00104     $index = readInt($file);
00105     if ($index) // found words matching the hash key
00106     {
00107       $start=sizeof($statsList);
00108       $count=$start;
00109       fseek($file,$index);
00110       $w = readString($file);
00111       while ($w)
00112       {
00113         $statIdx = readInt($file);
00114         if ($word==substr($w,0,strlen($word)))
00115         { // found word that matches (as substring)
00116           $statsList[$count++]=array(
00117               "word"=>$word,
00118               "match"=>$w,
00119               "index"=>$statIdx,
00120               "full"=>strlen($w)==strlen($word),
00121               "docs"=>array()
00122               );
00123         }
00124         $w = readString($file);
00125       }
00126       $totalHi=0;
00127       $totalFreqHi=0;
00128       $totalFreqLo=0;
00129       for ($count=$start;$count<sizeof($statsList);$count++)
00130       {
00131         $statInfo = &$statsList[$count];
00132         $multiplier = 1;
00133         // whole word matches have a double weight
00134         if ($statInfo["full"]) $multiplier=2;
00135         fseek($file,$statInfo["index"]); 
00136         $numDocs = readInt($file);
00137         $docInfo = array();
00138         // read docs info + occurrence frequency of the word
00139         for ($i=0;$i<$numDocs;$i++)
00140         {
00141           $idx=readInt($file); 
00142           $freq=readInt($file); 
00143           $docInfo[$i]=array("idx"  => $idx,
00144                              "freq" => $freq>>1,
00145                              "rank" => 0.0,
00146                              "hi"   => $freq&1
00147                             );
00148           if ($freq&1) // word occurs in high priority doc
00149           {
00150             $totalHi++;
00151             $totalFreqHi+=$freq*$multiplier;
00152           }
00153           else // word occurs in low priority doc
00154           {
00155             $totalFreqLo+=$freq*$multiplier;
00156           }
00157         }
00158         // read name and url info for the doc
00159         for ($i=0;$i<$numDocs;$i++)
00160         {
00161           fseek($file,$docInfo[$i]["idx"]);
00162           $docInfo[$i]["name"]=readString($file);
00163           $docInfo[$i]["url"]=readString($file);
00164         }
00165         $statInfo["docs"]=$docInfo;
00166       }
00167       $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;
00168       for ($count=$start;$count<sizeof($statsList);$count++)
00169       {
00170         $statInfo = &$statsList[$count];
00171         $multiplier = 1;
00172         // whole word matches have a double weight
00173         if ($statInfo["full"]) $multiplier=2;
00174         for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
00175         {
00176           $docInfo = &$statInfo["docs"];
00177           // compute frequency rank of the word in each doc
00178           $freq=$docInfo[$i]["freq"];
00179           if ($docInfo[$i]["hi"])
00180           {
00181             $statInfo["docs"][$i]["rank"]=
00182               (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;
00183           }
00184           else
00185           {
00186             $statInfo["docs"][$i]["rank"]=
00187               (float)($freq*$multiplier)/$totalFreq;
00188           }
00189         }
00190       }
00191     }
00192   }
00193   return $statsList;
00194 }

Here is the call graph for this function:

search_results (  ) 

Definition at line 23 of file search.php.

00024 {
00025   return "Search Results";
00026 }

sort_results ( docs,
&$  sorted 
)

Definition at line 275 of file search.php.

Referenced by main().

00276 {
00277   $sorted = $docs;
00278   usort($sorted,"compare_rank");
00279   return $sorted;
00280 }


Generated on Wed Jan 2 14:01:43 2008 for Pacman by  doxygen 1.5.1