To do

update Tools/Sphinxsearch and the public repository source
List the non working queries and debug them (use = keyword_to_test for comparison)

full text search should return #FullTextSearch
facebook
google
apple
Brooks
... (cf log analysis "?bug&keyword=problematickeyword")

see also ProblematicQueries
make it more generalist via e.g. reverse dictionary
better split documents, e.g. logs per day
handle the histories (wiki and repository diffs)
make URLs work, consider specific stemming
improving weightning and thus ranking
properly handle remote URLs
leverage .pageindex of each wiki
work on snippets
replace WikiName: by their icon, cf localmap
add pagerank and other topology metrics as attributes
use delta or RT indexing, requires better ID generation first though
add visualization, e.g. network or linked document with the same color
provide links to specific interesting queries, e.g. only this week

"internet brain"~10

LearningSearch
integrate Social PIM, especially if there are 0 results
integrate external search, e.g. Seeks, especially if there are 0 results including through Social PIM

"; $star_img = " $\"mark$ "; // this could use some Javascript effect $alt_img = " $\"use$ "; $bug_img = " $\"mark$ "; if ($words == ""){ print "You need to make an actual search, use ?query=keyword
"; print "
"; return; } if ($startingitem == "" || $startingitem < 0){ $startingitem=0; } $itemspan=20; $cl = new SphinxClient (); $cl->SetLimits($startingitem, $startingitem+$itemspan); $cl->SetMatchMode(SPH_MATCH_EXTENDED2); $cl->SetRankingMode(SPH_RANK_PROXIMITY_BM25); # consider other ranking e.g. pagerank, weighted with freshness and hybrid $cl->SetSortMode(SPH_SORT_RELEVANCE); $res = $cl->Query($words,$index); #cf http://sphinxsearch.com/docs/manual-2.0.1.html#api-funcgroup-querying #$res = $cl->BuildExcerpts ( $docs, $index, $words, $opts ); if ( !$res ) { die ( "ERROR: " . $cl->GetLastError() . ".\n" ); } else { // XXX should test for empty result before doing the assigment // if (0 results) { $socialsearch = shell_exec("pmwiki_social_search "$keyword"); } // ideally this would be done ascynhroneously as it does take some time (few seconds!) // yet still always offer it as an option (since it's "costly") #var_dump($res); $IDs = array_keys($res["matches"]); // note that $IDs = array_unique(array_keys($res["matches"])); does not solve the multiple page issue // thus probably comes from a duplicate indexing print "

Query$bug_img

"; if (isset( $res["words"]["$words"]["hits"]) ) $hits = $res["words"]["$words"]["hits"]." time(s) "; else $hits = ""; print "\"$words\" found ".$hits." in ".$res["total_found"]." document(s):
"; print "

"; $file=$idx["$i"]; if (preg_match('|/home/fabien/irclogs/|',$file)){ $target = preg_replace('|/home/fabien/irclogs/(.*)|','$1',$file); $url = urlencode($target); print "IRClogs:$target"; } elseif (preg_match('|/home/fabien/www/mirrors/|',$file)){ $page = preg_replace('|/home/fabien/www/mirrors/(.*)/wiki.d/(.*)\.(.*)|','$2/$3',$file); $source = preg_replace('|/home/fabien/www/mirrors/(.*)/wiki.d/(.*)\.(.*)|','$1',$file); switch ( $source ) { case "saint-maur": $url="http://saint-maur.benetou.fr"; break; case "fabien": $url="http://fabien.benetou.fr"; break; case "pim": $url="http://www.ourp.im/"; break; case "agiwiki": $url="http://www.agi-wiki.org/"; break; case "wiki": $url="http://fabien.benetou.fr/innovativ.it/www/HistoricalArchives/Seedea"; break; default: $url=$source; } print "$source:$page"; print preg_replace('|/home/fabien/www/mirrors/(.*)/wiki.d/(.*)\.(.*)|',''.$alt_img.'',$file); print "$copy_img"; } elseif (preg_match('|/home/fabien/repository/|',$file)){ print preg_replace('|/home/fabien/repository/(.*)|','Repository:$1',$file); } else { print preg_replace('|/home/fabien/www/(.*)/wiki.d/(.*)\.(.*)|','$1:$2/$3',$file); print preg_replace('|/home/fabien/www/(.*)/wiki.d/(.*)\.(.*)|',''.$alt_img.'',$file); } print "$star_img"; print "

"; print "

"; if ($previousitems >= 0) print "previous items"; for ($p=1;$p<$res["total"]/$itemspan;$p++) print " $p/"; if ($nextitems < $res["total"]) print "next items"; print "

"; $indexage = "unknown"; $targetindexfile = "/var/lib/sphinxsearch/data/pmwikis.spd"; $updatedindex = filemtime($targetindexfile); $indexage = date("c",$updatedindex); print "

$indexage index
(if bug check indexer errors)."; } ?>