You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
181 lines
5.4 KiB
181 lines
5.4 KiB
14 years ago
|
<?php
|
||
|
|
||
|
# return the "key pages" of a group
|
||
|
# todo
|
||
14 years ago
|
# explicit importance via PTVs
|
||
|
# existing PTVs startrecall, recall, startprepare, ... (cf MemoryHandling)
|
||
|
# dedicated PTV
|
||
14 years ago
|
# http://phpir.com/pagerank-in-php
|
||
|
# http://faculty.ucr.edu/~hanneman/nettext/C10_Centrality.html#Betweenness
|
||
|
# check if isset() really is necessary, seems it was avoided in Coeditions
|
||
14 years ago
|
# transform as an extension of http://www.pmwiki.org/wiki/PmWiki/CustomPagelistSortOrder
|
||
|
# would produce much better integration
|
||
14 years ago
|
|
||
|
function GroupKeyPages($groupname,$selectionmechanism=""){
|
||
14 years ago
|
# XXX if the groupname does not exist then should return an empty array
|
||
14 years ago
|
$pages = ListPages("/$groupname\./e");
|
||
|
|
||
|
//note that this could be replaced by proper ListPages call
|
||
|
$pages = CleanPageList($pages,$groupname);
|
||
|
|
||
|
$b = GenerateNetworkFromList($pages);
|
||
|
//var_dump($b);
|
||
|
|
||
|
switch ($selectionmechanism) {
|
||
|
case "NetworkLeverageCentrality":
|
||
|
$keypages = $selectionmechanism($b,0.05); # value threshold
|
||
|
break;
|
||
|
case "NetworkOutgoingIncoming":
|
||
|
$keypages = $selectionmechanism($b,4,5); # outgoing and incoming degree threshold
|
||
|
break;
|
||
|
default:
|
||
|
$keypages = NetworkDegree($b,8); # degree threshold
|
||
14 years ago
|
} # XXX none of those methods leverage PTVs so far
|
||
14 years ago
|
return array_unique($keypages);
|
||
|
}
|
||
|
|
||
|
|
||
|
function GenerateNetworkFromList($pagelist){
|
||
14 years ago
|
$b = array();
|
||
14 years ago
|
foreach ($pagelist as $page) {
|
||
14 years ago
|
//$content = ReadPage($page,$since=0);
|
||
|
$content = ReadPage($page,READPAGE_CURRENT);
|
||
|
# check if it has PTVs :
|
||
|
$relatedPTVs = array( "startrecall", "recall", "startprepare" );
|
||
|
foreach ($relatedPTVs as $PTV){
|
||
|
# if so, add the value
|
||
|
$PTV_value = PageTextVar($page,$PTV);
|
||
|
if ($PTV_value) {
|
||
|
$b["$page"]["$PTV"] = $PTV_value;
|
||
|
}
|
||
|
} # XXX somehow seems to parse the list of pages twice
|
||
14 years ago
|
$links = $content["targets"];
|
||
|
$links_array = explode(",",$links);
|
||
|
foreach ($links_array as $link) {
|
||
|
if (($linkindex = array_search($link,$pagelist)) !== false ){
|
||
|
if (isset($b["$page"]["outgoing"]))
|
||
|
$b["$page"]["outgoing"]++;
|
||
|
else
|
||
|
{ $b["$page"]["outgoing"] = 1; $b["$page"]["name"]=$page; }
|
||
|
if (isset( $b["$link"]["incoming"]))
|
||
|
$b["$link"]["incoming"]++;
|
||
|
else
|
||
|
{ $b["$link"]["incoming"] = 1; $b["$link"]["name"]=$link; }
|
||
|
if (isset($b["$page"]["degree"]))
|
||
|
$b["$page"]["degree"]++;
|
||
|
else
|
||
|
{ $b["$page"]["degree"] = 1; $b["$page"]["name"]=$page; }
|
||
|
if (isset($b["$link"]["degree"]))
|
||
|
$b["$link"]["degree"]++;
|
||
|
else
|
||
|
{ $b["$link"]["degree"] = 1; $b["$link"]["name"]=$link; }
|
||
|
$b["$page"]["neighboors"][] = $link;
|
||
|
$b["$link"]["neighboors"][] = $page;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return $b;
|
||
|
}
|
||
|
|
||
|
function CleanPageList($pagelist,$groupname){
|
||
|
// rather arbitrary, not necessarily a good choice
|
||
|
unset($pagelist[array_search("$groupname.$groupname",$pagelist)]);
|
||
|
unset($pagelist[array_search("$groupname.Template",$pagelist)]);
|
||
|
unset($pagelist[array_search("$groupname.GroupHeader",$pagelist)]);
|
||
|
unset($pagelist[array_search("$groupname.GroupFooter",$pagelist)]);
|
||
|
return $pagelist;
|
||
|
}
|
||
|
|
||
|
function NetworkOutgoingIncoming($b,$IncomingThreshold=0,$OutgoingThreshold=0){
|
||
|
$keypages = array(); //done to avoid returning notthing if the theshold is too high
|
||
|
foreach ($b as $bs){
|
||
|
if (isset($bs["incoming"]))
|
||
|
if ($bs["incoming"] > $IncomingThreshold )
|
||
|
$keypages[] = $bs["name"];
|
||
|
if (isset($bs["outgoing"]))
|
||
|
if ($bs["outgoing"] > $OutgoingThreshold )
|
||
|
$keypages[] = $bs["name"];
|
||
|
}
|
||
|
return $keypages;
|
||
|
}
|
||
|
|
||
|
function NetworkDegree($b, $DegreeThreshold=0){
|
||
|
$keypages = array(); //done to avoid returning notthing if the theshold is too high
|
||
|
foreach ($b as $bs){
|
||
|
if (isset($bs["degree"]))
|
||
|
if ($bs["degree"] > $DegreeThreshold )
|
||
|
$keypages[] = $bs["name"];
|
||
|
}
|
||
|
return $keypages;
|
||
|
}
|
||
|
|
||
|
function NetworkLeverageCentrality($b,$Threshold=0){
|
||
|
$keypages = array(); //done to avoid returning notthing if the theshold is too high
|
||
|
foreach ($b as $bs){
|
||
|
if (isset($bs["neighboors"])){
|
||
|
$bs["leveragecentrality"] = 0;
|
||
|
$sum = 0; $avg=0;
|
||
|
foreach($bs["neighboors"] as $neighboor){
|
||
|
$sum += ($bs["degree"] - $b["$neighboor"]["degree"]) / ($bs["degree"] + $b["$neighboor"]["degree"]);
|
||
|
$avg += $b["$neighboor"]["degree"];
|
||
|
}
|
||
|
if ($bs["degree"] > 0){
|
||
|
$avg = $avg / $bs["degree"];
|
||
|
if ($avg > 0){
|
||
|
$bs["leveragecentrality"] = ($sum/$avg) / $bs["degree"];
|
||
|
}
|
||
|
}
|
||
|
if ($bs["leveragecentrality"] > $Threshold )
|
||
|
$keypages[] = $bs["name"];
|
||
|
}
|
||
|
}
|
||
|
return $keypages;
|
||
|
}
|
||
|
|
||
|
function GroupDegreeDistribution($groupname){
|
||
|
$pages = ListPages("/$groupname\./e");
|
||
|
|
||
|
//note that this could be replaced by proper ListPages call
|
||
|
$pages = CleanPageList($pages,$groupname);
|
||
|
|
||
|
$b = GenerateNetworkFromList($pages);
|
||
14 years ago
|
if (empty($b)) return array();
|
||
14 years ago
|
|
||
|
$distribution = array_fill(1,10,0);
|
||
|
// has no effect
|
||
|
|
||
|
foreach ($b as $node){
|
||
|
$degree = $node["degree"];
|
||
|
if (isset($distribution[$degree]))
|
||
|
$distribution[$degree]++;
|
||
|
else
|
||
|
$distribution[$degree]=1;
|
||
|
$sumdeg += $node["degree"];
|
||
|
$sumin += $node["incoming"];
|
||
|
$sumout += $node["outgoing"];
|
||
|
}
|
||
|
$avgdeg = $sumdeg / count($b);
|
||
|
$avgin = $sumin / count($b);
|
||
|
$avgout = $sumout / count($b);
|
||
|
|
||
|
/*
|
||
|
print "avgdeg = $avgdeg; ";
|
||
|
print "avgin = $avgin; ";
|
||
|
print "avgout = $avgout; ";
|
||
|
strangely equal, probably a mistake there
|
||
|
*/
|
||
|
ksort($distribution);
|
||
|
|
||
|
/* fails to get the proper last key
|
||
|
end($distribution);
|
||
|
$last_key = key($distribution);
|
||
|
print $last_key;
|
||
|
for ($i=1;i<$last_key;$i++)
|
||
|
if (!(isset($distribution[$i])))
|
||
|
$distribution[$i]=0;
|
||
|
*/
|
||
|
return array_unique($distribution);
|
||
|
}
|
||
|
|
||
|
?>
|