From a9a13f249c84baf91d67b955031fcdf241bf5d38 Mon Sep 17 00:00:00 2001 From: Fabien Benetou Date: Tue, 14 Jun 2011 20:01:19 +0200 Subject: [PATCH] added wiki parsing script for GM script --- link_extractor/revertingpimlinks.sh | 38 +++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100755 link_extractor/revertingpimlinks.sh diff --git a/link_extractor/revertingpimlinks.sh b/link_extractor/revertingpimlinks.sh new file mode 100755 index 0000000..d006a57 --- /dev/null +++ b/link_extractor/revertingpimlinks.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# http://fabien.benetou.fr/Tools/Greasemonkey#RevertedPIMLinks +# TODO +# properly declare +# path +# groups to avoid +# pages to avoid + +echo 'extract all the links from the wiki by group' +for GROUP in $(ls ../wiki.d/ | sed "s/\..*//" | sort | uniq | grep -v PmWiki | grep -v Site ); +do + for PAGE in $(ls ../wiki.d/$GROUP.* | sed "s/\.\.\/wiki.d\///" ); do pmwiki n=$PAGE nolog=true | sed "s/http/\nhttp/g" | grep http | grep -v benetou.fr | grep -v seedea.org | grep -v 127.0.0.1 | grep -v .ico\" | sed "s/'.*//" | sed "s/<\/a>.*//" | sed "s/$/ $PAGE/"; done > links_from_$GROUP +done + +echo 'get all the links > sorted_global.txt' +cat links_from_* | sed "s/ .*//" | sort | uniq | grep -e "http://\w\|https://\w" > sorted_global.txt + +echo 'for every link check in which page it is mentionned and append it without duplicates > indexed_links_uniqued' +echo '' > indexed_links_uniqued +while read line; do + echo -n "$line " >> indexed_links_uniqued + grep -i $line links_from_* | sed "s/.* //" | sort | uniq | xargs >> indexed_links_uniqued +done < sorted_global.txt + +echo 'clean from improper URL (e.g. " present) sed "s/\"/\\\"/g"' +grep -v '"' indexed_links_uniqued > indexed_links_uniqued_cleaned + +echo 'format as User.js and make it available' +cat indexed_links_uniqued_cleaned | sed 's/\([^ ]\+\) \(.*\)/user_pref("greasemonkey.scriptvals.Utopiah\/reverted PIM links.rPIMlinks \1", "\2");/' > user.js + +echo 'compress for faster transfert' +bzip2 -k -f user.js #compress by a factor 10 + +echo 'make the script available via http://fabien.benetou.fr/pub/user.js.bz2' +mv user.js.bz2 ../pub/ + +#echo 'periodically call this very script' + #server cron, client cron