< html >
< div style = "{width:400px; position:absolute; right:0px; background-color:#F0F8FF; margin:10px; border:2px solid #ddf; padding:5px;}" >
< h2 > To do< / h2 >
< ul >
< li > update < a href = "http://fabien.benetou.fr/Tools/Sphinxsearch" > Tools/Sphinxsearch< / a > and < a href = "http://fabien.benetou.fr/repository/?p=.git;a=blob;f=pim_search.php" > the public repository source< / a > < / li >
< li > List the non working queries and debug them (use = keyword_to_test for comparison)< / li >
< ul >
< li > < a href = "/search/full text search" > full text search< / a > should return < a href = "http://fabien.benetou.fr/Events/RailsCampParis3#FullTextSearch" > #FullTextSearch< / a > < / li >
< li > < a href = "/search/facebook" > facebook< / a > < / li >
< li > < a href = "/search/google" > google< / a > < / li >
< li > < a href = "/search/apple" > apple< / a > < / li >
< li > < a href = "/search/Brooks" > Brooks< / a > < / li >
< li > ... (cf log analysis "?bug& keyword=problematickeyword")< / li >
< / ul >
< li > see also < a href = "http://fabien.benetou.fr/Wiki/LearningSearch#ProblematicQueries" > ProblematicQueries< / a > < / li >
< li > make it more generalist via e.g. < a href = "http://www.onelook.com/reverse-dictionary.shtml" > reverse dictionary< / a > < / li >
< li > better split documents, e.g. logs per day< / li >
< li > handle the histories (wiki and repository diffs)< / li >
< li > make URLs work, consider specific stemming< / li >
< li > improving weightning and thus ranking< / li >
< li > properly handle remote URLs< / li >
< li > leverage .pageindex of each wiki< / li >
< li > work on snippets< / li >
< li > replace WikiName: by their icon, cf localmap< / li >
< li > add pagerank and other topology metrics as attributes< / li >
< li > use delta or RT indexing, requires better ID generation first though< / li >
< li > add visualization, e.g. network or linked document with the same color< / li >
< li > provide links to specific interesting queries, e.g. only this week< / li >
< ul >
< li > < a href = "/search/%22internet%20brain%22~10" > "internet brain"~10< / a > < / li >
< / ul >
< li > < a href = "http://fabien.benetou.fr/Wiki/LearningSearch" > LearningSearch< / a > < / li >
< li > integrate < a href = "http://fabien.benetou.fr/MemoryRecalls/ImprovingPIM#SocialPIM" > Social PIM< / a > , especially if there are 0 results< / li >
< li > integrate external search, e.g. Seeks, especially if there are 0 results including through Social PIM< / li >
< / ul >
< / ul >
< / div >
< div style = "{width:500px; position:absolute; left:0px; background-color:#F8F0FF; margin:10px; border:2px solid #ddf; padding:5px;}" >
<?php
# Overall this is very specialized and should be configurable to be used on other PIMs
# this should be tested first and if not present, explain in a line where to download and how to install Sphinx API
require ( "sphinxapi.php" );
require ( "sphinx_doc_ids.php" );
$index = "pmwikis";
$words = $_GET["query"];
if ( isset ($_GET["startingitem"]) )
$startingitem = $_GET["startingitem"];
else
$startingitem = 0;
$image_path = "/devpim/pub/";
$copy_img = "< img src = \"".$image_path."clipboard_add.png"."\" alt = \"copy the page name to the clipboard \ " / > ";
$star_img = "< img src = \"".$image_path."yellow-star.gif"."\" alt = \"mark that result as significant \ " / > ";
// this could use some Javascript effect
$alt_img = "< img src = \"".$image_path."server.png"."\" alt = \"use the alternate server ( local or remote ) \ " / > ";
$bug_img = "< img src = \"".$image_path."bug.gif"."\" alt = \"mark that query as problematic \ " / > ";
if ($words == ""){
print "You need to make an actual search, use ?query=keyword< br / > ";
print "< form action = \"/devpim/pub/search.php\" method = \"get\" > < input type = \"text\" name = \"query\" > < input type = \"submit\" value = \"search\"/ > < / form > < br / > ";
return;
}
if ($startingitem == "" || $startingitem < 0 ) {
$startingitem=0;
}
$itemspan=20;
$cl = new SphinxClient ();
$cl->SetLimits($startingitem, $startingitem+$itemspan);
$cl->SetMatchMode(SPH_MATCH_EXTENDED2);
$cl->SetRankingMode(SPH_RANK_PROXIMITY_BM25);
# consider other ranking e.g. pagerank, weighted with freshness and hybrid
$cl->SetSortMode(SPH_SORT_RELEVANCE);
$res = $cl->Query($words,$index);
#cf http://sphinxsearch.com/docs/manual-2.0.1.html#api-funcgroup-querying
#$res = $cl->BuildExcerpts ( $docs, $index, $words, $opts );
if ( !$res ) {
die ( "ERROR: " . $cl->GetLastError() . ".\n" );
} else {
// XXX should test for empty result before doing the assigment
// if (0 results) { $socialsearch = shell_exec("pmwiki_social_search "$keyword"); }
// ideally this would be done ascynhroneously as it does take some time (few seconds!)
// yet still always offer it as an option (since it's "costly")
#var_dump($res);
$IDs = array_keys($res["matches"]);
// note that $IDs = array_unique(array_keys($res["matches"])); does not solve the multiple page issue
// thus probably comes from a duplicate indexing
print "< h2 > Query< a href = \"?bug&keyword=$words\" > $bug_img< / a > < / h2 > ";
if (isset( $res["words"]["$words"]["hits"]) )
$hits = $res["words"]["$words"]["hits"]." time(s) ";
else
$hits = "";
print "\"$words\" found ".$hits." in ".$res["total_found"]." document(s):< br / > ";
print "< ul > ";
foreach ($IDs as $i)
{
#BuildExcerpts()
#consider here adding snippet, would require few disk access, MySQL storage could be faster...
print "< li > ";
$file=$idx["$i"];
if (preg_match('|/home/fabien/irclogs/|',$file)){
$target = preg_replace('|/home/fabien/irclogs/(.*)|','$1',$file);
$url = urlencode($target);
print "< a href = \"https://cloud.benetou.fr/discussions/$url\" > IRClogs:$target< / a > ";
}
elseif (preg_match('|/home/fabien/www/mirrors/|',$file)){
$page = preg_replace('|/home/fabien/www/mirrors/(.*)/wiki.d/(.*)\.(.*)|','$2/$3',$file);
$source = preg_replace('|/home/fabien/www/mirrors/(.*)/wiki.d/(.*)\.(.*)|','$1',$file);
switch ( $source ) {
case "saint-maur": $url="http://saint-maur.benetou.fr"; break;
case "fabien": $url="http://fabien.benetou.fr"; break;
case "pim": $url="http://www.ourp.im/"; break;
case "agiwiki": $url="http://www.agi-wiki.org/"; break;
case "wiki": $url="http://fabien.benetou.fr/innovativ.it/www/HistoricalArchives/Seedea"; break;
default: $url=$source;
}
print "< a href = \"$url/$page\" > $source:$page< / a > ";
print preg_replace('|/home/fabien/www/mirrors/(.*)/wiki.d/(.*)\.(.*)|','< a href = "http://self/mirrors/$1/$2/$3" > '.$alt_img.'< / a > ',$file);
print "< a href = \"javascript:[[$page]]\" > $copy_img< / a > ";
} elseif (preg_match('|/home/fabien/repository/|',$file)){
print preg_replace('|/home/fabien/repository/(.*)|','< a href = "http://fabien.benetou.fr/repository/?p=.git;a=blob;f=$1" > Repository:$1< / a > ',$file);
} else {
print preg_replace('|/home/fabien/www/(.*)/wiki.d/(.*)\.(.*)|','< a href = "http://self/$1/$2/$3" > $1:$2/$3< / a > ',$file);
print preg_replace('|/home/fabien/www/(.*)/wiki.d/(.*)\.(.*)|','< a href = "https://cloud.benetou.fr/backups/wiki/$2/$3" > '.$alt_img.'< / a > ',$file);
}
print "< a href = \"?star&keyword=$words&result=$file\" > $star_img< / a > ";
print "< / li > ";
}
$previousitems=$startingitem-$itemspan;
$nextitems=$startingitem+$itemspan;
print "< / ul > ";
print "< div > ";
if ($previousitems >= 0)
print "< a href = \"/search/$words/$previousitems\" > previous items< / a > ";
for ($p=1;$p< $res["total"]/$itemspan;$p++)
print " < a href = \"/search/$words/".($p-1)*$itemspan."\" > $p< / a > /";
if ($nextitems < $res["total"])
print "< a href = \"/search/$words/$nextitems\" > next items< / a > ";
print "< / div > ";
$indexage = "unknown";
$targetindexfile = "/var/lib/sphinxsearch/data/pmwikis.spd";
$updatedindex = filemtime($targetindexfile);
$indexage = date("c",$updatedindex);
print "< hr / > < center > $indexage index< br / > (if bug check indexer errors).< / center > ";
}
?>
< / div >