Go to the source code of this file.
Functions | |
search_results () | |
matches_text ($num) | |
report_matches () | |
end_form ($value) | |
readInt ($file) | |
readString ($file) | |
readHeader ($file) | |
computeIndex ($word) | |
search ($file, $word, &$statsList) | |
combine_results ($results, &$docs) | |
filter_results ($docs, &$requiredWords, &$forbiddenWords) | |
compare_rank ($a, $b) | |
sort_results ($docs, &$sorted) | |
report_results (&$docs) | |
main () |
combine_results | ( | $ | results, | |
&$ | docs | |||
) |
Definition at line 196 of file search.php.
Referenced by main().
00197 { 00198 foreach ($results as $wordInfo) 00199 { 00200 $docsList = &$wordInfo["docs"]; 00201 foreach ($docsList as $di) 00202 { 00203 $key=$di["url"]; 00204 $rank=$di["rank"]; 00205 if (in_array($key, array_keys($docs))) 00206 { 00207 $docs[$key]["rank"]+=$rank; 00208 } 00209 else 00210 { 00211 $docs[$key] = array("url"=>$key, 00212 "name"=>$di["name"], 00213 "rank"=>$rank 00214 ); 00215 } 00216 $docs[$key]["words"][] = array( 00217 "word"=>$wordInfo["word"], 00218 "match"=>$wordInfo["match"], 00219 "freq"=>$di["freq"] 00220 ); 00221 } 00222 } 00223 return $docs; 00224 }
compare_rank | ( | $ | a, | |
$ | b | |||
) |
Definition at line 266 of file search.php.
00267 { 00268 if ($a["rank"] == $b["rank"]) 00269 { 00270 return 0; 00271 } 00272 return ($a["rank"]>$b["rank"]) ? -1 : 1; 00273 }
computeIndex | ( | $ | word | ) |
Definition at line 74 of file search.php.
Referenced by search().
00075 { 00076 // Fast string hashing 00077 //$lword = strtolower($word); 00078 //$l = strlen($lword); 00079 //for ($i=0;$i<$l;$i++) 00080 //{ 00081 // $c = ord($lword{$i}); 00082 // $v = (($v & 0xfc00) ^ ($v << 6) ^ $c) & 0xffff; 00083 //} 00084 //return $v; 00085 00086 // Simple hashing that allows for substring search 00087 if (strlen($word)<2) return -1; 00088 // high char of the index 00089 $hi = ord($word{0}); 00090 if ($hi==0) return -1; 00091 // low char of the index 00092 $lo = ord($word{1}); 00093 if ($lo==0) return -1; 00094 // return index 00095 return $hi*256+$lo; 00096 }
end_form | ( | $ | value | ) |
Definition at line 48 of file search.php.
Referenced by main().
00049 { 00050 echo " <td><input type=\"text\" name=\"query\" value=\"$value\" size=\"20\" accesskey=\"s\"/></td>\n </tr>\n </table>\n </form>\n </li>\n </ul>\n</div>\n"; 00051 }
filter_results | ( | $ | docs, | |
&$ | requiredWords, | |||
&$ | forbiddenWords | |||
) |
Definition at line 226 of file search.php.
Referenced by main().
00227 { 00228 $filteredDocs=array(); 00229 while (list ($key, $val) = each ($docs)) 00230 { 00231 $words = &$docs[$key]["words"]; 00232 $copy=1; // copy entry by default 00233 if (sizeof($requiredWords)>0) 00234 { 00235 foreach ($requiredWords as $reqWord) 00236 { 00237 $found=0; 00238 foreach ($words as $wordInfo) 00239 { 00240 $found = $wordInfo["word"]==$reqWord; 00241 if ($found) break; 00242 } 00243 if (!$found) 00244 { 00245 $copy=0; // document contains none of the required words 00246 break; 00247 } 00248 } 00249 } 00250 if (sizeof($forbiddenWords)>0) 00251 { 00252 foreach ($words as $wordInfo) 00253 { 00254 if (in_array($wordInfo["word"],$forbiddenWords)) 00255 { 00256 $copy=0; // document contains a forbidden word 00257 break; 00258 } 00259 } 00260 } 00261 if ($copy) $filteredDocs[$key]=$docs[$key]; 00262 } 00263 return $filteredDocs; 00264 }
main | ( | ) |
Definition at line 324 of file search.php.
References combine_results(), end_form(), filter_results(), readHeader(), report_results(), search(), and sort_results().
00325 { 00326 if(strcmp('4.1.0', phpversion()) > 0) 00327 { 00328 die("Error: PHP version 4.1.0 or above required!"); 00329 } 00330 if (!($file=fopen("search.idx","rb"))) 00331 { 00332 die("Error: Search index file could NOT be opened!"); 00333 } 00334 if (readHeader($file)!="DOXS") 00335 { 00336 die("Error: Header of index file is invalid!"); 00337 } 00338 $query=""; 00339 if (array_key_exists("query", $_GET)) 00340 { 00341 $query=$_GET["query"]; 00342 } 00343 end_form($query); 00344 echo " \n<div class=\"searchresults\">\n"; 00345 $results = array(); 00346 $requiredWords = array(); 00347 $forbiddenWords = array(); 00348 $foundWords = array(); 00349 $word=strtok($query," "); 00350 while ($word) // for each word in the search query 00351 { 00352 if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; } 00353 if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; } 00354 if (!in_array($word,$foundWords)) 00355 { 00356 $foundWords[]=$word; 00357 search($file,strtolower($word),$results); 00358 } 00359 $word=strtok(" "); 00360 } 00361 $docs = array(); 00362 combine_results($results,$docs); 00363 // filter out documents with forbidden word or that do not contain 00364 // required words 00365 $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords); 00366 // sort the results based on rank 00367 $sorted = array(); 00368 sort_results($filteredDocs,$sorted); 00369 // report results to the user 00370 report_results($sorted); 00371 echo "</div>\n"; 00372 fclose($file); 00373 }
Here is the call graph for this function:
matches_text | ( | $ | num | ) |
Definition at line 28 of file search.php.
00029 { 00030 if ($num==0) 00031 { 00032 return "Sorry, no documents matching your query."; 00033 } 00034 else if ($num==1) 00035 { 00036 return "Found <b>1</b> document matching your query."; 00037 } 00038 else // $num>1 00039 { 00040 return "Found <b>$num</b> documents matching your query. Showing best matches first."; 00041 } 00042 }
readHeader | ( | $ | file | ) |
Definition at line 67 of file search.php.
Referenced by main().
00068 { 00069 $header =fgetc($file); $header.=fgetc($file); 00070 $header.=fgetc($file); $header.=fgetc($file); 00071 return $header; 00072 }
readInt | ( | $ | file | ) |
Definition at line 53 of file search.php.
Referenced by search().
00054 { 00055 $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file)); 00056 $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file)); 00057 return ($b1<<24)|($b2<<16)|($b3<<8)|$b4; 00058 }
readString | ( | $ | file | ) |
Definition at line 60 of file search.php.
Referenced by search().
00061 { 00062 $result=""; 00063 while (ord($c=fgetc($file))) $result.=$c; 00064 return $result; 00065 }
report_matches | ( | ) |
report_results | ( | &$ | docs | ) |
Definition at line 282 of file search.php.
Referenced by main().
00283 { 00284 echo "<table cellspacing=\"2\">\n"; 00285 echo " <tr>\n"; 00286 echo " <td colspan=\"2\"><h2>".search_results()."</h2></td>\n"; 00287 echo " </tr>\n"; 00288 $numDocs = sizeof($docs); 00289 if ($numDocs==0) 00290 { 00291 echo " <tr>\n"; 00292 echo " <td colspan=\"2\">".matches_text(0)."</td>\n"; 00293 echo " </tr>\n"; 00294 } 00295 else 00296 { 00297 echo " <tr>\n"; 00298 echo " <td colspan=\"2\">".matches_text($numDocs); 00299 echo "\n"; 00300 echo " </td>\n"; 00301 echo " </tr>\n"; 00302 $num=1; 00303 foreach ($docs as $doc) 00304 { 00305 echo " <tr>\n"; 00306 echo " <td align=\"right\">$num.</td>"; 00307 echo "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n"; 00308 echo " <tr>\n"; 00309 echo " <td></td><td class=\"tiny\">".report_matches()." "; 00310 foreach ($doc["words"] as $wordInfo) 00311 { 00312 $word = $wordInfo["word"]; 00313 $matchRight = substr($wordInfo["match"],strlen($word)); 00314 echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") "; 00315 } 00316 echo " </td>\n"; 00317 echo " </tr>\n"; 00318 $num++; 00319 } 00320 } 00321 echo "</table>\n"; 00322 }
search | ( | $ | file, | |
$ | word, | |||
&$ | statsList | |||
) |
Definition at line 98 of file search.php.
References computeIndex(), readInt(), and readString().
Referenced by main().
00099 { 00100 $index = computeIndex($word); 00101 if ($index!=-1) // found a valid index 00102 { 00103 fseek($file,$index*4+4); // 4 bytes per entry, skip header 00104 $index = readInt($file); 00105 if ($index) // found words matching the hash key 00106 { 00107 $start=sizeof($statsList); 00108 $count=$start; 00109 fseek($file,$index); 00110 $w = readString($file); 00111 while ($w) 00112 { 00113 $statIdx = readInt($file); 00114 if ($word==substr($w,0,strlen($word))) 00115 { // found word that matches (as substring) 00116 $statsList[$count++]=array( 00117 "word"=>$word, 00118 "match"=>$w, 00119 "index"=>$statIdx, 00120 "full"=>strlen($w)==strlen($word), 00121 "docs"=>array() 00122 ); 00123 } 00124 $w = readString($file); 00125 } 00126 $totalHi=0; 00127 $totalFreqHi=0; 00128 $totalFreqLo=0; 00129 for ($count=$start;$count<sizeof($statsList);$count++) 00130 { 00131 $statInfo = &$statsList[$count]; 00132 $multiplier = 1; 00133 // whole word matches have a double weight 00134 if ($statInfo["full"]) $multiplier=2; 00135 fseek($file,$statInfo["index"]); 00136 $numDocs = readInt($file); 00137 $docInfo = array(); 00138 // read docs info + occurrence frequency of the word 00139 for ($i=0;$i<$numDocs;$i++) 00140 { 00141 $idx=readInt($file); 00142 $freq=readInt($file); 00143 $docInfo[$i]=array("idx" => $idx, 00144 "freq" => $freq>>1, 00145 "rank" => 0.0, 00146 "hi" => $freq&1 00147 ); 00148 if ($freq&1) // word occurs in high priority doc 00149 { 00150 $totalHi++; 00151 $totalFreqHi+=$freq*$multiplier; 00152 } 00153 else // word occurs in low priority doc 00154 { 00155 $totalFreqLo+=$freq*$multiplier; 00156 } 00157 } 00158 // read name and url info for the doc 00159 for ($i=0;$i<$numDocs;$i++) 00160 { 00161 fseek($file,$docInfo[$i]["idx"]); 00162 $docInfo[$i]["name"]=readString($file); 00163 $docInfo[$i]["url"]=readString($file); 00164 } 00165 $statInfo["docs"]=$docInfo; 00166 } 00167 $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi; 00168 for ($count=$start;$count<sizeof($statsList);$count++) 00169 { 00170 $statInfo = &$statsList[$count]; 00171 $multiplier = 1; 00172 // whole word matches have a double weight 00173 if ($statInfo["full"]) $multiplier=2; 00174 for ($i=0;$i<sizeof($statInfo["docs"]);$i++) 00175 { 00176 $docInfo = &$statInfo["docs"]; 00177 // compute frequency rank of the word in each doc 00178 $freq=$docInfo[$i]["freq"]; 00179 if ($docInfo[$i]["hi"]) 00180 { 00181 $statInfo["docs"][$i]["rank"]= 00182 (float)($freq*$multiplier+$totalFreqLo)/$totalFreq; 00183 } 00184 else 00185 { 00186 $statInfo["docs"][$i]["rank"]= 00187 (float)($freq*$multiplier)/$totalFreq; 00188 } 00189 } 00190 } 00191 } 00192 } 00193 return $statsList; 00194 }
Here is the call graph for this function:
search_results | ( | ) |
sort_results | ( | $ | docs, | |
&$ | sorted | |||
) |