<html> <div style="{width:400px; position:absolute; right:0px; background-color:#F0F8FF; margin:10px; border:2px solid #ddf; padding:5px;}"> <h2>To do</h2> <ul> <li>update <a href="http://fabien.benetou.fr/Tools/Sphinxsearch">Tools/Sphinxsearch</a> and <a href="http://fabien.benetou.fr/repository/?p=.git;a=blob;f=pim_search.php">the public repository source</a></li> <li>List the non working queries and debug them (use = keyword_to_test for comparison)</li> <ul> <li><a href="/search/full text search">full text search</a> should return <a href="http://fabien.benetou.fr/Events/RailsCampParis3#FullTextSearch">#FullTextSearch</a></li> <li><a href="/search/facebook">facebook</a></li> <li><a href="/search/google">google</a></li> <li><a href="/search/apple">apple</a></li> <li><a href="/search/Brooks">Brooks</a></li> <li>... (cf log analysis "?bug&keyword=problematickeyword")</li> </ul> <li>see also <a href="http://fabien.benetou.fr/Wiki/LearningSearch#ProblematicQueries">ProblematicQueries</a></li> <li>make it more generalist via e.g. <a href="http://www.onelook.com/reverse-dictionary.shtml">reverse dictionary</a></li> <li>better split documents, e.g. logs per day</li> <li>handle the histories (wiki and repository diffs)</li> <li>make URLs work, consider specific stemming</li> <li>improving weightning and thus ranking</li> <li>properly handle remote URLs</li> <li>leverage .pageindex of each wiki</li> <li>work on snippets</li> <li>replace WikiName: by their icon, cf localmap</li> <li>add pagerank and other topology metrics as attributes</li> <li>use delta or RT indexing, requires better ID generation first though</li> <li>add visualization, e.g. network or linked document with the same color</li> <li>provide links to specific interesting queries, e.g. only this week</li> <ul> <li><a href="/search/%22internet%20brain%22~10">"internet brain"~10</a></li> </ul> <li><a href="http://fabien.benetou.fr/Wiki/LearningSearch">LearningSearch</a></li> <li>integrate <a href="http://fabien.benetou.fr/MemoryRecalls/ImprovingPIM#SocialPIM">Social PIM</a>, especially if there are 0 results</li> <li>integrate external search, e.g. Seeks, especially if there are 0 results including through Social PIM</li> </ul> </ul> </div> <div style="{width:500px; position:absolute; left:0px; background-color:#F8F0FF; margin:10px; border:2px solid #ddf; padding:5px;}"> <?php # Overall this is very specialized and should be configurable to be used on other PIMs # this should be tested first and if not present, explain in a line where to download and how to install Sphinx API require ( "sphinxapi.php" ); require ( "sphinx_doc_ids.php" ); $index = "pmwikis"; $words = $_GET["query"]; if ( isset ($_GET["startingitem"]) ) $startingitem = $_GET["startingitem"]; else $startingitem = 0; $image_path = "/devpim/pub/"; $copy_img = "<img src=\"".$image_path."clipboard_add.png"."\" alt=\"copy the page name to the clipboard\"/>"; $star_img = "<img src=\"".$image_path."yellow-star.gif"."\" alt=\"mark that result as significant\"/>"; // this could use some Javascript effect $alt_img = "<img src=\"".$image_path."server.png"."\" alt=\"use the alternate server (local or remote)\"/>"; $bug_img = "<img src=\"".$image_path."bug.gif"."\" alt=\"mark that query as problematic\"/>"; if ($words == ""){ print "You need to make an actual search, use ?query=keyword<br/>"; print "<form action=\"/devpim/pub/search.php\" method=\"get\"><input type=\"text\" name=\"query\"><input type=\"submit\" value=\"search\"/></form><br/>"; return; } if ($startingitem == "" || $startingitem < 0){ $startingitem=0; } $itemspan=20; $cl = new SphinxClient (); $cl->SetLimits($startingitem, $startingitem+$itemspan); $cl->SetMatchMode(SPH_MATCH_EXTENDED2); $cl->SetRankingMode(SPH_RANK_PROXIMITY_BM25); # consider other ranking e.g. pagerank, weighted with freshness and hybrid $cl->SetSortMode(SPH_SORT_RELEVANCE); $res = $cl->Query($words,$index); #cf http://sphinxsearch.com/docs/manual-2.0.1.html#api-funcgroup-querying #$res = $cl->BuildExcerpts ( $docs, $index, $words, $opts ); if ( !$res ) { die ( "ERROR: " . $cl->GetLastError() . ".\n" ); } else { // XXX should test for empty result before doing the assigment // if (0 results) { $socialsearch = shell_exec("pmwiki_social_search "$keyword"); } // ideally this would be done ascynhroneously as it does take some time (few seconds!) // yet still always offer it as an option (since it's "costly") #var_dump($res); $IDs = array_keys($res["matches"]); // note that $IDs = array_unique(array_keys($res["matches"])); does not solve the multiple page issue // thus probably comes from a duplicate indexing print "<h2>Query<a href=\"?bug&keyword=$words\">$bug_img</a></h2>"; if (isset( $res["words"]["$words"]["hits"]) ) $hits = $res["words"]["$words"]["hits"]." time(s) "; else $hits = ""; print "\"$words\" found ".$hits." in ".$res["total_found"]." document(s):<br/>"; print "<ul>"; foreach ($IDs as $i) { #BuildExcerpts() #consider here adding snippet, would require few disk access, MySQL storage could be faster... print "<li>"; $file=$idx["$i"]; if (preg_match('|/home/fabien/irclogs/|',$file)){ $target = preg_replace('|/home/fabien/irclogs/(.*)|','$1',$file); $url = urlencode($target); print "<a href=\"https://cloud.benetou.fr/discussions/$url\">IRClogs:$target</a>"; } elseif (preg_match('|/home/fabien/www/mirrors/|',$file)){ $page = preg_replace('|/home/fabien/www/mirrors/(.*)/wiki.d/(.*)\.(.*)|','$2/$3',$file); $source = preg_replace('|/home/fabien/www/mirrors/(.*)/wiki.d/(.*)\.(.*)|','$1',$file); switch ( $source ) { case "saint-maur": $url="http://saint-maur.benetou.fr"; break; case "fabien": $url="http://fabien.benetou.fr"; break; case "pim": $url="http://www.ourp.im/"; break; case "agiwiki": $url="http://www.agi-wiki.org/"; break; case "wiki": $url="http://fabien.benetou.fr/innovativ.it/www/HistoricalArchives/Seedea"; break; default: $url=$source; } print "<a href=\"$url/$page\">$source:$page</a>"; print preg_replace('|/home/fabien/www/mirrors/(.*)/wiki.d/(.*)\.(.*)|','<a href="http://self/mirrors/$1/$2/$3">'.$alt_img.'</a>',$file); print "<a href=\"javascript:[[$page]]\">$copy_img</a>"; } elseif (preg_match('|/home/fabien/repository/|',$file)){ print preg_replace('|/home/fabien/repository/(.*)|','<a href="http://fabien.benetou.fr/repository/?p=.git;a=blob;f=$1">Repository:$1</a>',$file); } else { print preg_replace('|/home/fabien/www/(.*)/wiki.d/(.*)\.(.*)|','<a href="http://self/$1/$2/$3">$1:$2/$3</a>',$file); print preg_replace('|/home/fabien/www/(.*)/wiki.d/(.*)\.(.*)|','<a href="https://cloud.benetou.fr/backups/wiki/$2/$3">'.$alt_img.'</a>',$file); } print "<a href=\"?star&keyword=$words&result=$file\">$star_img</a>"; print "</li>"; } $previousitems=$startingitem-$itemspan; $nextitems=$startingitem+$itemspan; print "</ul>"; print "<div>"; if ($previousitems >= 0) print "<a href=\"/search/$words/$previousitems\">previous items</a>"; for ($p=1;$p<$res["total"]/$itemspan;$p++) print " <a href=\"/search/$words/".($p-1)*$itemspan."\">$p</a>/"; if ($nextitems < $res["total"]) print "<a href=\"/search/$words/$nextitems\">next items</a>"; print "</div>"; $indexage = "unknown"; $targetindexfile = "/var/lib/sphinxsearch/data/pmwikis.spd"; $updatedindex = filemtime($targetindexfile); $indexage = date("c",$updatedindex); print "<hr/><center>$indexage index<br/>(if bug check indexer errors).</center>"; } ?> </div>