2011-08-14 11:45:41 +02:00
< html >
< div style = " { width:400px; position:absolute; right:0px; background-color:#F0F8FF; margin:10px; border:2px solid #ddf; padding:5px;} " >
< h2 > To do </ h2 >
< ul >
2011-08-14 11:59:41 +02:00
< li > update < a href = " http://fabien.benetou.fr/Tools/Sphinxsearch " > Tools / Sphinxsearch </ a > and < a href = " http://fabien.benetou.fr/repository/?p=.git;a=blob;f=pim_search.php " > the public repository source </ a ></ li >
2011-08-14 11:45:41 +02:00
< li > List the non working queries and debug them ( use = keyword_to_test for comparison ) </ li >
< ul >
< li >< a href = " /search/full text search " > full text search </ a > should return < a href = " http://fabien.benetou.fr/Events/RailsCampParis3#FullTextSearch " > #FullTextSearch</a></li>
< li >< a href = " /search/facebook " > facebook </ a ></ li >
< li >< a href = " /search/google " > google </ a ></ li >
< li >< a href = " /search/apple " > apple </ a ></ li >
< li >< a href = " /search/Brooks " > Brooks </ a ></ li >
< li >... ( cf log analysis " ?bug&keyword=problematickeyword " ) </ li >
</ ul >
< li > see also < a href = " http://fabien.benetou.fr/Wiki/LearningSearch#ProblematicQueries " > ProblematicQueries </ a ></ li >
< li > make it more generalist via e . g . < a href = " http://www.onelook.com/reverse-dictionary.shtml " > reverse dictionary </ a ></ li >
< li > better split documents , e . g . logs per day </ li >
< li > handle the histories ( wiki and repository diffs ) </ li >
< li > make URLs work , consider specific stemming </ li >
< li > improving weightning and thus ranking </ li >
< li > properly handle remote URLs </ li >
< li > leverage . pageindex of each wiki </ li >
< li > work on snippets </ li >
< li > replace WikiName : by their icon , cf localmap </ li >
< li > add pagerank and other topology metrics as attributes </ li >
< li > use delta or RT indexing , requires better ID generation first though </ li >
< li > add visualization , e . g . network or linked document with the same color </ li >
< li > provide links to specific interesting queries , e . g . only this week </ li >
< ul >
< li >< a href = " /search/%22internet%20brain%22~10 " > " internet brain " ~ 10 </ a ></ li >
</ ul >
< li >< a href = " http://fabien.benetou.fr/Wiki/LearningSearch " > LearningSearch </ a ></ li >
< li > integrate < a href = " http://fabien.benetou.fr/MemoryRecalls/ImprovingPIM#SocialPIM " > Social PIM </ a > , especially if there are 0 results </ li >
< li > integrate external search , e . g . Seeks , especially if there are 0 results including through Social PIM </ li >
</ ul >
</ ul >
</ div >
< div style = " { width:500px; position:absolute; left:0px; background-color:#F8F0FF; margin:10px; border:2px solid #ddf; padding:5px;} " >
< ? php
# Overall this is very specialized and should be configurable to be used on other PIMs
# this should be tested first and if not present, explain in a line where to download and how to install Sphinx API
require ( " sphinxapi.php " );
require ( " sphinx_doc_ids.php " );
$index = " pmwikis " ;
$words = $_GET [ " query " ];
if ( isset ( $_GET [ " startingitem " ]) )
$startingitem = $_GET [ " startingitem " ];
else
$startingitem = 0 ;
$image_path = " /devpim/pub/ " ;
2011-08-15 11:46:51 +02:00
$copy_img = " <img src= \" " . $image_path . " clipboard_add.png " . " \" alt= \" copy the page name to the clipboard \" /> " ;
$star_img = " <img src= \" " . $image_path . " yellow-star.gif " . " \" alt= \" mark that result as significant \" /> " ;
// this could use some Javascript effect
$alt_img = " <img src= \" " . $image_path . " server.png " . " \" alt= \" use the alternate server (local or remote) \" /> " ;
$bug_img = " <img src= \" " . $image_path . " bug.gif " . " \" alt= \" mark that query as problematic \" /> " ;
2011-08-14 11:45:41 +02:00
if ( $words == " " ){
print " You need to make an actual search, use ?query=keyword<br/> " ;
print " <form action= \" /devpim/pub/search.php \" method= \" get \" ><input type= \" text \" name= \" query \" ><input type= \" submit \" value= \" search \" /></form><br/> " ;
return ;
}
if ( $startingitem == " " || $startingitem < 0 ){
$startingitem = 0 ;
}
$itemspan = 20 ;
$cl = new SphinxClient ();
$cl -> SetLimits ( $startingitem , $startingitem + $itemspan );
$cl -> SetMatchMode ( SPH_MATCH_EXTENDED2 );
$cl -> SetRankingMode ( SPH_RANK_PROXIMITY_BM25 );
# consider other ranking e.g. pagerank, weighted with freshness and hybrid
$cl -> SetSortMode ( SPH_SORT_RELEVANCE );
$res = $cl -> Query ( $words , $index );
#cf http://sphinxsearch.com/docs/manual-2.0.1.html#api-funcgroup-querying
#$res = $cl->BuildExcerpts ( $docs, $index, $words, $opts );
if ( ! $res ) {
die ( " ERROR: " . $cl -> GetLastError () . " . \n " );
} else {
// XXX should test for empty result before doing the assigment
// if (0 results) { $socialsearch = shell_exec("pmwiki_social_search "$keyword"); }
// ideally this would be done ascynhroneously as it does take some time (few seconds!)
// yet still always offer it as an option (since it's "costly")
#var_dump($res);
$IDs = array_keys ( $res [ " matches " ]);
2011-08-15 11:46:51 +02:00
// note that $IDs = array_unique(array_keys($res["matches"])); does not solve the multiple page issue
// thus probably comes from a duplicate indexing
2011-08-14 11:45:41 +02:00
print " <h2>Query<a href= \" ?bug&keyword= $words\ " > $bug_img </ a ></ h2 > " ;
2011-08-15 11:46:51 +02:00
if ( isset ( $res [ " words " ][ " $words " ][ " hits " ]) )
$hits = $res [ " words " ][ " $words " ][ " hits " ] . " time(s) " ;
else
$hits = " " ;
print " \" $words\ " found " . $hits . " in " . $res["total_found"] . " document ( s ) :< br /> " ;
2011-08-14 11:45:41 +02:00
print " <ul> " ;
foreach ( $IDs as $i )
{
#BuildExcerpts()
#consider here adding snippet, would require few disk access, MySQL storage could be faster...
print " <li> " ;
$file = $idx [ " $i " ];
if ( preg_match ( '|/home/fabien/irclogs/|' , $file )){
$target = preg_replace ( '|/home/fabien/irclogs/(.*)|' , '$1' , $file );
$url = urlencode ( $target );
print " <a href= \" https://cloud.benetou.fr/discussions/ $url\ " > IRClogs : $target </ a > " ;
}
elseif ( preg_match ( '|/home/fabien/www/mirrors/|' , $file )){
$page = preg_replace ( '|/home/fabien/www/mirrors/(.*)/wiki.d/(.*)\.(.*)|' , '$2/$3' , $file );
$source = preg_replace ( '|/home/fabien/www/mirrors/(.*)/wiki.d/(.*)\.(.*)|' , '$1' , $file );
switch ( $source ) {
case " saint-maur " : $url = " http://saint-maur.benetou.fr " ; break ;
case " fabien " : $url = " http://fabien.benetou.fr " ; break ;
case " pim " : $url = " http://www.ourp.im/ " ; break ;
case " agiwiki " : $url = " http://www.agi-wiki.org/ " ; break ;
case " wiki " : $url = " http://fabien.benetou.fr/innovativ.it/www/HistoricalArchives/Seedea " ; break ;
default : $url = $source ;
}
print " <a href= \" $url / $page\ " > $source : $page </ a > " ;
print preg_replace ( '|/home/fabien/www/mirrors/(.*)/wiki.d/(.*)\.(.*)|' , '<a href="http://self/mirrors/$1/$2/$3">' . $alt_img . '</a>' , $file );
print " <a href= \" javascript:[[ $page ]] \" > $copy_img </a> " ;
} elseif ( preg_match ( '|/home/fabien/repository/|' , $file )){
print preg_replace ( '|/home/fabien/repository/(.*)|' , '<a href="http://fabien.benetou.fr/repository/?p=.git;a=blob;f=$1">Repository:$1</a>' , $file );
} else {
print preg_replace ( '|/home/fabien/www/(.*)/wiki.d/(.*)\.(.*)|' , '<a href="http://self/$1/$2/$3">$1:$2/$3</a>' , $file );
print preg_replace ( '|/home/fabien/www/(.*)/wiki.d/(.*)\.(.*)|' , '<a href="https://cloud.benetou.fr/backups/wiki/$2/$3">' . $alt_img . '</a>' , $file );
}
print " <a href= \" ?star&keyword= $words &result= $file\ " > $star_img </ a > " ;
print " </li> " ;
}
$previousitems = $startingitem - $itemspan ;
$nextitems = $startingitem + $itemspan ;
print " </ul> " ;
print " <div> " ;
if ( $previousitems >= 0 )
print " <a href= \" /search/ $words / $previousitems\ " > previous items </ a > " ;
for ( $p = 1 ; $p < $res [ " total " ] / $itemspan ; $p ++ )
print " <a href= \" /search/ $words / " . ( $p - 1 ) * $itemspan . " \" > $p </a>/ " ;
if ( $nextitems < $res [ " total " ])
print " <a href= \" /search/ $words / $nextitems\ " > next items </ a > " ;
print " </div> " ;
$indexage = " unknown " ;
$targetindexfile = " /var/lib/sphinxsearch/data/pmwikis.spd " ;
$updatedindex = filemtime ( $targetindexfile );
$indexage = date ( " c " , $updatedindex );
print " <hr/><center> $indexage index<br/>(if bug check indexer errors).</center> " ;
}
?>
</ div >