<?php # return the "key pages" of a group # todo # explicit importance via PTVs # existing PTVs startrecall, recall, startprepare, ... (cf MemoryHandling) # dedicated PTV # http://phpir.com/pagerank-in-php # http://faculty.ucr.edu/~hanneman/nettext/C10_Centrality.html#Betweenness # check if isset() really is necessary, seems it was avoided in Coeditions # transform as an extension of http://www.pmwiki.org/wiki/PmWiki/CustomPagelistSortOrder # would produce much better integration function GroupKeyPages($groupname,$selectionmechanism=""){ # XXX if the groupname does not exist then should return an empty array $pages = ListPages("/$groupname\./e"); //note that this could be replaced by proper ListPages call $pages = CleanPageList($pages,$groupname); $b = GenerateNetworkFromList($pages); //var_dump($b); switch ($selectionmechanism) { case "NetworkLeverageCentrality": $keypages = $selectionmechanism($b,0.05); # value threshold break; case "NetworkOutgoingIncoming": $keypages = $selectionmechanism($b,4,5); # outgoing and incoming degree threshold break; default: $keypages = NetworkDegree($b,8); # degree threshold } # XXX none of those methods leverage PTVs so far return array_unique($keypages); } function GenerateNetworkFromList($pagelist){ $b = array(); foreach ($pagelist as $page) { //$content = ReadPage($page,$since=0); $content = ReadPage($page,READPAGE_CURRENT); # check if it has PTVs : $relatedPTVs = array( "startrecall", "recall", "startprepare" ); foreach ($relatedPTVs as $PTV){ # if so, add the value $PTV_value = PageTextVar($page,$PTV); if ($PTV_value) { $b["$page"]["$PTV"] = $PTV_value; } } # XXX somehow seems to parse the list of pages twice $links = $content["targets"]; $links_array = explode(",",$links); foreach ($links_array as $link) { if (($linkindex = array_search($link,$pagelist)) !== false ){ if (isset($b["$page"]["outgoing"])) $b["$page"]["outgoing"]++; else { $b["$page"]["outgoing"] = 1; $b["$page"]["name"]=$page; } if (isset( $b["$link"]["incoming"])) $b["$link"]["incoming"]++; else { $b["$link"]["incoming"] = 1; $b["$link"]["name"]=$link; } if (isset($b["$page"]["degree"])) $b["$page"]["degree"]++; else { $b["$page"]["degree"] = 1; $b["$page"]["name"]=$page; } if (isset($b["$link"]["degree"])) $b["$link"]["degree"]++; else { $b["$link"]["degree"] = 1; $b["$link"]["name"]=$link; } $b["$page"]["neighboors"][] = $link; $b["$link"]["neighboors"][] = $page; } } } return $b; } function CleanPageList($pagelist,$groupname){ // rather arbitrary, not necessarily a good choice unset($pagelist[array_search("$groupname.$groupname",$pagelist)]); unset($pagelist[array_search("$groupname.Template",$pagelist)]); unset($pagelist[array_search("$groupname.GroupHeader",$pagelist)]); unset($pagelist[array_search("$groupname.GroupFooter",$pagelist)]); return $pagelist; } function NetworkOutgoingIncoming($b,$IncomingThreshold=0,$OutgoingThreshold=0){ $keypages = array(); //done to avoid returning notthing if the theshold is too high foreach ($b as $bs){ if (isset($bs["incoming"])) if ($bs["incoming"] > $IncomingThreshold ) $keypages[] = $bs["name"]; if (isset($bs["outgoing"])) if ($bs["outgoing"] > $OutgoingThreshold ) $keypages[] = $bs["name"]; } return $keypages; } function NetworkDegree($b, $DegreeThreshold=0){ $keypages = array(); //done to avoid returning notthing if the theshold is too high foreach ($b as $bs){ if (isset($bs["degree"])) if ($bs["degree"] > $DegreeThreshold ) $keypages[] = $bs["name"]; } return $keypages; } function NetworkLeverageCentrality($b,$Threshold=0){ $keypages = array(); //done to avoid returning notthing if the theshold is too high foreach ($b as $bs){ if (isset($bs["neighboors"])){ $bs["leveragecentrality"] = 0; $sum = 0; $avg=0; foreach($bs["neighboors"] as $neighboor){ $sum += ($bs["degree"] - $b["$neighboor"]["degree"]) / ($bs["degree"] + $b["$neighboor"]["degree"]); $avg += $b["$neighboor"]["degree"]; } if ($bs["degree"] > 0){ $avg = $avg / $bs["degree"]; if ($avg > 0){ $bs["leveragecentrality"] = ($sum/$avg) / $bs["degree"]; } } if ($bs["leveragecentrality"] > $Threshold ) $keypages[] = $bs["name"]; } } return $keypages; } function GroupDegreeDistribution($groupname){ $pages = ListPages("/$groupname\./e"); //note that this could be replaced by proper ListPages call $pages = CleanPageList($pages,$groupname); $b = GenerateNetworkFromList($pages); if (empty($b)) return array(); $distribution = array_fill(1,10,0); // has no effect foreach ($b as $node){ $degree = $node["degree"]; if (isset($distribution[$degree])) $distribution[$degree]++; else $distribution[$degree]=1; $sumdeg += $node["degree"]; $sumin += $node["incoming"]; $sumout += $node["outgoing"]; } $avgdeg = $sumdeg / count($b); $avgin = $sumin / count($b); $avgout = $sumout / count($b); /* print "avgdeg = $avgdeg; "; print "avgin = $avgin; "; print "avgout = $avgout; "; strangely equal, probably a mistake there */ ksort($distribution); /* fails to get the proper last key end($distribution); $last_key = key($distribution); print $last_key; for ($i=1;i<$last_key;$i++) if (!(isset($distribution[$i]))) $distribution[$i]=0; */ return array_unique($distribution); } ?>