You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
misc/cookbook/groupkeypages.php

163 lines
4.6 KiB

<?php
# return the "key pages" of a group
# todo
# http://phpir.com/pagerank-in-php
# http://faculty.ucr.edu/~hanneman/nettext/C10_Centrality.html#Betweenness
# check if isset() really is necessary, seems it was avoided in Coeditions
function GroupKeyPages($groupname,$selectionmechanism=""){
$pages = ListPages("/$groupname\./e");
//note that this could be replaced by proper ListPages call
$pages = CleanPageList($pages,$groupname);
$b = GenerateNetworkFromList($pages);
//var_dump($b);
switch ($selectionmechanism) {
case "NetworkLeverageCentrality":
$keypages = $selectionmechanism($b,0.05); # value threshold
break;
case "NetworkOutgoingIncoming":
$keypages = $selectionmechanism($b,4,5); # outgoing and incoming degree threshold
break;
default:
$keypages = NetworkDegree($b,8); # degree threshold
}
return array_unique($keypages);
}
function GenerateNetworkFromList($pagelist){
foreach ($pagelist as $page) {
$content = ReadPage($page,$since=0);
$links = $content["targets"];
$links_array = explode(",",$links);
foreach ($links_array as $link) {
if (($linkindex = array_search($link,$pagelist)) !== false ){
if (isset($b["$page"]["outgoing"]))
$b["$page"]["outgoing"]++;
else
{ $b["$page"]["outgoing"] = 1; $b["$page"]["name"]=$page; }
if (isset( $b["$link"]["incoming"]))
$b["$link"]["incoming"]++;
else
{ $b["$link"]["incoming"] = 1; $b["$link"]["name"]=$link; }
if (isset($b["$page"]["degree"]))
$b["$page"]["degree"]++;
else
{ $b["$page"]["degree"] = 1; $b["$page"]["name"]=$page; }
if (isset($b["$link"]["degree"]))
$b["$link"]["degree"]++;
else
{ $b["$link"]["degree"] = 1; $b["$link"]["name"]=$link; }
$b["$page"]["neighboors"][] = $link;
$b["$link"]["neighboors"][] = $page;
}
}
}
return $b;
}
function CleanPageList($pagelist,$groupname){
// rather arbitrary, not necessarily a good choice
unset($pagelist[array_search("$groupname.$groupname",$pagelist)]);
unset($pagelist[array_search("$groupname.Template",$pagelist)]);
unset($pagelist[array_search("$groupname.GroupHeader",$pagelist)]);
unset($pagelist[array_search("$groupname.GroupFooter",$pagelist)]);
return $pagelist;
}
function NetworkOutgoingIncoming($b,$IncomingThreshold=0,$OutgoingThreshold=0){
$keypages = array(); //done to avoid returning notthing if the theshold is too high
foreach ($b as $bs){
if (isset($bs["incoming"]))
if ($bs["incoming"] > $IncomingThreshold )
$keypages[] = $bs["name"];
if (isset($bs["outgoing"]))
if ($bs["outgoing"] > $OutgoingThreshold )
$keypages[] = $bs["name"];
}
return $keypages;
}
function NetworkDegree($b, $DegreeThreshold=0){
$keypages = array(); //done to avoid returning notthing if the theshold is too high
foreach ($b as $bs){
if (isset($bs["degree"]))
if ($bs["degree"] > $DegreeThreshold )
$keypages[] = $bs["name"];
}
return $keypages;
}
function NetworkLeverageCentrality($b,$Threshold=0){
$keypages = array(); //done to avoid returning notthing if the theshold is too high
foreach ($b as $bs){
if (isset($bs["neighboors"])){
$bs["leveragecentrality"] = 0;
$sum = 0; $avg=0;
foreach($bs["neighboors"] as $neighboor){
$sum += ($bs["degree"] - $b["$neighboor"]["degree"]) / ($bs["degree"] + $b["$neighboor"]["degree"]);
$avg += $b["$neighboor"]["degree"];
}
if ($bs["degree"] > 0){
$avg = $avg / $bs["degree"];
if ($avg > 0){
$bs["leveragecentrality"] = ($sum/$avg) / $bs["degree"];
}
}
if ($bs["leveragecentrality"] > $Threshold )
$keypages[] = $bs["name"];
}
}
return $keypages;
}
function GroupDegreeDistribution($groupname){
$pages = ListPages("/$groupname\./e");
//note that this could be replaced by proper ListPages call
$pages = CleanPageList($pages,$groupname);
$b = GenerateNetworkFromList($pages);
$distribution = array_fill(1,10,0);
// has no effect
foreach ($b as $node){
$degree = $node["degree"];
if (isset($distribution[$degree]))
$distribution[$degree]++;
else
$distribution[$degree]=1;
$sumdeg += $node["degree"];
$sumin += $node["incoming"];
$sumout += $node["outgoing"];
}
$avgdeg = $sumdeg / count($b);
$avgin = $sumin / count($b);
$avgout = $sumout / count($b);
/*
print "avgdeg = $avgdeg; ";
print "avgin = $avgin; ";
print "avgout = $avgout; ";
strangely equal, probably a mistake there
*/
ksort($distribution);
/* fails to get the proper last key
end($distribution);
$last_key = key($distribution);
print $last_key;
for ($i=1;i<$last_key;$i++)
if (!(isset($distribution[$i])))
$distribution[$i]=0;
*/
return array_unique($distribution);
}
?>