Homepage Demos Overview Downloads Tutorials Reference
Credits

roboop/search.php

Go to the documentation of this file.
00001 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
00002 <html>
00003   <head>
00004     <meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
00005     <title>Search</title>
00006     <link href="doxygen.css" rel="stylesheet" type="text/css">
00007     <link rel="home" href="../index.html">
00008     <link rel="up" href="../index.html">
00009     <link rel="SHORTCUT ICON" href="favicon.ico">
00010   </head>
00011   <body>
00012     <!-- <img src="aibosmall.jpg" width=92 height=75 align=right> -->
00013     <table cellpadding="1" cellspacing="6" border="0"
00014            style="text-align: left; margin-left: auto; margin-right: auto;">
00015       <tbody>
00016         <tr>
00017           <!-- #Homepage# --> <td style="vertical-align: top;"><a target="_top" href="../../index.html">Homepage</a></td>
00018           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00019           <!-- #Demos# --> <td style="vertical-align: top;"><a target="_top" href="../../Samples.html">Demos</a></td>
00020           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00021           <!-- #Overview# --> <td style="vertical-align: top;"><a target="_top" href="../../Overview.html">Overview</a></td>
00022           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00023           <!-- #Downloads# --> <td style="vertical-align: top;"><a target="_top" href="../../VersionHistory.html">Downloads</a></td>
00024           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00025           <!-- #Tutorials# --> <td style="vertical-align: top;"><a target="_top" href="../../Tutorials.html">Tutorials</a></td>
00026           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00027           <!-- #Reference# --> <td style="vertical-align: top;"><a target="_top" href="../index.html">Reference</a></td>
00028           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"><br></td>
00029           <!-- #Credits# --> <td style="vertical-align: top;"><a target="_top" href="../../Credits.html">Credits</a> </td>
00030         </tr>
00031       </tbody>
00032     </table>
00033 <table style="text-align: left; margin-left: auto; margin-right: auto; width: 700px;" border="0" cellspacing="2" cellpadding="2">
00034   <tbody>
00035     <tr>
00036       <td style="vertical-align: top;">
00037 <!-- Generated by Doxygen 1.3.9.1 -->
00038 <div class="qindex">  <form class="search" action="search.php" method="get">
00039 <a class="qindex" href="main.html">Main&nbsp;Page</a> | <a class="qindex" href="namespaces.html">Namespace List</a> | <a class="qindex" href="hierarchy.html">Class&nbsp;Hierarchy</a> | <a class="qindex" href="classes.html">Alphabetical&nbsp;List</a> | <a class="qindex" href="annotated.html">Class&nbsp;List</a> | <a class="qindex" href="dirs.html">Directories</a> | <a class="qindex" href="files.html">File&nbsp;List</a> | <a class="qindex" href="namespacemembers.html">Namespace&nbsp;Members</a> | <a class="qindex" href="functions.html">Class&nbsp;Members</a> | <a class="qindex" href="globals.html">File&nbsp;Members</a>  | <span class="searchHL"><u>S</u>earch&nbsp;for&nbsp;
00040 <?php
00041 
00042 function search_results()
00043 {
00044   return "Search Results";
00045 }
00046 
00047 function matches_text($num)
00048 {
00049   if ($num==0)
00050   {
00051     return "Sorry, no documents matching your query.";
00052   }
00053   else if ($num==1)
00054   {
00055     return "Found <b>1</b> document matching your query.";
00056   }
00057   else // $num>1
00058   {
00059     return "Found <b>$num</b> documents matching your query. Showing best matches first.";
00060   }
00061 }
00062 
00063 function report_matches()
00064 {
00065   return "Matches: ";
00066 }
00067 
00068 function readInt($file)
00069 {
00070   $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
00071   $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
00072   return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
00073 }
00074 
00075 function readString($file)
00076 {
00077   $result="";
00078   while (ord($c=fgetc($file))) $result.=$c;
00079   return $result;
00080 }
00081 
00082 function readHeader($file)
00083 {
00084   $header =fgetc($file); $header.=fgetc($file);
00085   $header.=fgetc($file); $header.=fgetc($file);
00086   return $header;
00087 }
00088 
00089 function computeIndex($word)
00090 {
00091   if (strlen($word)<2) return -1;
00092   // high char of the index
00093   $hi = ord($word{0});
00094   if ($hi==0) return -1;
00095   // low char of the index
00096   $lo = ord($word{1});
00097   if ($lo==0) return -1;
00098   // return index
00099   return $hi*256+$lo;
00100 }
00101 
00102 function search($file,$word,&$statsList)
00103 {
00104   $index = computeIndex($word);
00105   if ($index!=-1) // found a valid index
00106   {
00107     fseek($file,$index*4+4); // 4 bytes per entry, skip header
00108     $index = readInt($file);
00109     if ($index) // found words matching first two characters
00110     {
00111       $start=sizeof($statsList);
00112       $count=$start;
00113       fseek($file,$index);
00114       $w = readString($file);
00115       while ($w)
00116       {
00117         $statIdx = readInt($file);
00118         if ($word==substr($w,0,strlen($word)))
00119         { // found word that matches (as substring)
00120           $statsList[$count++]=array(
00121               "word"=>$word,
00122               "match"=>$w,
00123               "index"=>$statIdx,
00124               "full"=>strlen($w)==strlen($word),
00125               "docs"=>array()
00126               );
00127         }
00128         $w = readString($file);
00129       }
00130       $totalHi=0;
00131       $totalFreqHi=0;
00132       $totalFreqLo=0;
00133       for ($count=$start;$count<sizeof($statsList);$count++)
00134       {
00135         $statInfo = &$statsList[$count];
00136         $multiplier = 1;
00137         // whole word matches have a double weight
00138         if ($statInfo["full"]) $multiplier=2;
00139         fseek($file,$statInfo["index"]); 
00140         $numDocs = readInt($file);
00141         $docInfo = array();
00142         // read docs info + occurrence frequency of the word
00143         for ($i=0;$i<$numDocs;$i++)
00144         {
00145           $idx=readInt($file); 
00146           $freq=readInt($file); 
00147           $docInfo[$i]=array("idx"  => $idx,
00148                              "freq" => $freq>>1,
00149                              "rank" => 0.0,
00150                              "hi"   => $freq&1
00151                             );
00152           if ($freq&1) // word occurs in high priority doc
00153           {
00154             $totalHi++;
00155             $totalFreqHi+=$freq*$multiplier;
00156           }
00157           else // word occurs in low priority doc
00158           {
00159             $totalFreqLo+=$freq*$multiplier;
00160           }
00161         }
00162         // read name and url info for the doc
00163         for ($i=0;$i<$numDocs;$i++)
00164         {
00165           fseek($file,$docInfo[$i]["idx"]);
00166           $docInfo[$i]["name"]=readString($file);
00167           $docInfo[$i]["url"]=readString($file);
00168         }
00169         $statInfo["docs"]=$docInfo;
00170       }
00171       $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;
00172       for ($count=$start;$count<sizeof($statsList);$count++)
00173       {
00174         $statInfo = &$statsList[$count];
00175         $multiplier = 1;
00176         // whole word matches have a double weight
00177         if ($statInfo["full"]) $multiplier=2;
00178         for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
00179         {
00180           $docInfo = &$statInfo["docs"];
00181           // compute frequency rank of the word in each doc
00182           $freq=$docInfo[$i]["freq"];
00183           if ($docInfo[$i]["hi"])
00184           {
00185             $statInfo["docs"][$i]["rank"]=
00186               (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;
00187           }
00188           else
00189           {
00190             $statInfo["docs"][$i]["rank"]=
00191               (float)($freq*$multiplier)/$totalFreq;
00192           }
00193         }
00194       }
00195     }
00196   }
00197   return $statsList;
00198 }
00199 
00200 function combine_results($results,&$docs)
00201 {
00202   foreach ($results as $wordInfo)
00203   {
00204     $docsList = &$wordInfo["docs"];
00205     foreach ($docsList as $di)
00206     {
00207       $key=$di["url"];
00208       $rank=$di["rank"];
00209       if (in_array($key, array_keys($docs)))
00210       {
00211         $docs[$key]["rank"]+=$rank;
00212       }
00213       else
00214       {
00215         $docs[$key] = array("url"=>$key,
00216             "name"=>$di["name"],
00217             "rank"=>$rank
00218             );
00219       }
00220       $docs[$key]["words"][] = array(
00221                "word"=>$wordInfo["word"],
00222                "match"=>$wordInfo["match"],
00223                "freq"=>$di["freq"]
00224                );
00225     }
00226   }
00227   return $docs;
00228 }
00229 
00230 function filter_results($docs,&$requiredWords,&$forbiddenWords)
00231 {
00232   $filteredDocs=array();
00233   while (list ($key, $val) = each ($docs)) 
00234   {
00235     $words = &$docs[$key]["words"];
00236     $copy=1; // copy entry by default
00237     if (sizeof($requiredWords)>0)
00238     {
00239       foreach ($requiredWords as $reqWord)
00240       {
00241         $found=0;
00242         foreach ($words as $wordInfo)
00243         { 
00244           $found = $wordInfo["word"]==$reqWord;
00245           if ($found) break;
00246         }
00247         if (!$found) 
00248         {
00249           $copy=0; // document contains none of the required words
00250           break;
00251         }
00252       }
00253     }
00254     if (sizeof($forbiddenWords)>0)
00255     {
00256       foreach ($words as $wordInfo)
00257       {
00258         if (in_array($wordInfo["word"],$forbiddenWords))
00259         {
00260           $copy=0; // document contains a forbidden word
00261           break;
00262         }
00263       }
00264     }
00265     if ($copy) $filteredDocs[$key]=$docs[$key];
00266   }
00267   return $filteredDocs;
00268 }
00269 
00270 function compare_rank($a,$b)
00271 {
00272   if ($a["rank"] == $b["rank"]) 
00273   {
00274     return 0;
00275   }
00276   return ($a["rank"]>$b["rank"]) ? -1 : 1; 
00277 }
00278 
00279 function sort_results($docs,&$sorted)
00280 {
00281   $sorted = $docs;
00282   usort($sorted,"compare_rank");
00283   return $sorted;
00284 }
00285 
00286 function report_results(&$docs)
00287 {
00288   echo "<table cellspacing=\"2\">\n";
00289   echo "  <tr>\n";
00290   echo "    <td colspan=\"2\"><h2>".search_results()."</h2></td>\n";
00291   echo "  </tr>\n";
00292   $numDocs = sizeof($docs);
00293   if ($numDocs==0)
00294   {
00295     echo "  <tr>\n";
00296     echo "    <td colspan=\"2\">".matches_text(0)."</td>\n";
00297     echo "  </tr>\n";
00298   }
00299   else
00300   {
00301     echo "  <tr>\n";
00302     echo "    <td colspan=\"2\">".matches_text($numDocs);
00303     echo "\n";
00304     echo "    </td>\n";
00305     echo "  </tr>\n";
00306     $num=1;
00307     foreach ($docs as $doc)
00308     {
00309       echo "  <tr>\n";
00310       echo "    <td align=\"right\">$num.</td>";
00311       echo     "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
00312       echo "  <tr>\n";
00313       echo "    <td></td><td class=\"tiny\">".report_matches()." ";
00314       foreach ($doc["words"] as $wordInfo)
00315       {
00316         $word = $wordInfo["word"];
00317         $matchRight = substr($wordInfo["match"],strlen($word));
00318         echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
00319       }
00320       echo "    </td>\n";
00321       echo "  </tr>\n";
00322       $num++;
00323     }
00324   }
00325   echo "</table>\n";
00326 }
00327 
00328 function main()
00329 {
00330   if(strcmp('4.1.0', phpversion()) > 0) 
00331   {
00332     die("Error: PHP version 4.1.0 or above required!");
00333   }
00334   if (!($file=fopen("search.idx","rb"))) 
00335   {
00336     die("Error: Search index file could NOT be opened!");
00337   }
00338   if (readHeader($file)!="DOXS")
00339   {
00340     die("Error: Header of index file is invalid!");
00341   }
00342   $query="";
00343   if (array_key_exists("query", $_GET))
00344   {
00345     $query=$_GET["query"];
00346   }
00347   echo "<input class=\"search\" type=\"text\" name=\"query\" value=\"$query\" size=\"20\" accesskey=\"s\"/>\n";
00348   echo "</span>\n";
00349   echo "</form>\n";
00350   echo "</div>\n";
00351   $results = array();
00352   $requiredWords = array();
00353   $forbiddenWords = array();
00354   $foundWords = array();
00355   $word=strtok($query," ");
00356   while ($word) // for each word in the search query
00357   {
00358     if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
00359     if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
00360     if (!in_array($word,$foundWords))
00361     {
00362       $foundWords[]=$word;
00363       search($file,$word,$results);
00364     }
00365     $word=strtok(" ");
00366   }
00367   $docs = array();
00368   combine_results($results,$docs);
00369   // filter out documents with forbidden word or that do not contain
00370   // required words
00371   $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
00372   // sort the results based on rank
00373   $sorted = array();
00374   sort_results($filteredDocs,$sorted);
00375   // report results to the user
00376   report_results($sorted);
00377   fclose($file);
00378 }
00379 
00380 main();
00381 
00382 
00383 ?>
00384 </td></tr></tbody></table>
00385 
00386   <br>
00387   <table cellpadding="2" cellspacing="2" border="0" style="text-align: left; width: 100%; color: rgb(0, 0, 0);">
00388     <tbody>
00389       <tr>
00390         <td style="vertical-align: top;"><small>
00391             <b><a href="http://www.cours.polymtl.ca/roboop/">ROBOOP</a> v1.21a<br></b>
00392           </small>
00393         </td>
00394         <td style="vertical-align: top; text-align: right; font-style: italic;">
00395           <small>
00396             Generated Tue Nov 23 16:36:29 2004 by <a href="http://www.doxygen.org/">Doxygen</a> 1.3.9.1
00397           </small>
00398           <script type="text/javascript" language="javascript">
00399             <!--
00400             s="na";c="na";j="na";f=""+escape(document.referrer)
00401             //-->
00402           </script>
00403           <script type="text/javascript" language="javascript1.2">
00404             <!--
00405             s=screen.width;v=navigator.appName
00406             if (v != "Netscape") {c=screen.colorDepth}
00407             else {c=screen.pixelDepth}
00408             j=navigator.javaEnabled()
00409             //-->
00410           </script>
00411           <script type="text/javascript" language="javascript">
00412             <!--
00413             function pr(n) {document.write(n,"\n");}
00414             NS2Ch=0
00415             if (navigator.appName == "Netscape" &&
00416             navigator.appVersion.charAt(0) == "2") {NS2Ch=1}
00417             if (NS2Ch == 0) {
00418             r="size="+s+"&colors="+c+"&referer="+f+"&java="+j+"&stamp="+(new Date()).getTime()+""
00419             pr("<IMG BORDER=0 width=16 height=16 align=\"middle\" SRC=\"http://aibo2.boltz.cs.cmu.edu/head.gif?"+r+"\">")}
00420             //-->
00421           </script> 
00422           
00423           <noscript>
00424             <img src="http://aibo2.boltz.cs.cmu.edu/head.gif" border="0" width=16 height=16 align="middle">
00425           </noscript>
00426         </td>
00427       </tr>
00428     </tbody>
00429   </table>
00430 </body>
00431 </html>

Tekkotsu v2.2.1
Generated Tue Nov 23 16:36:39 2004 by Doxygen 1.3.9.1