|
|
|
#! /usr/bin/env bash
|
|
|
|
# http://fabien.benetou.fr/Tools/Greasemonkey#RevertedPIMLinks
|
|
|
|
# TODO
|
|
|
|
# properly declare
|
|
|
|
# path
|
|
|
|
# groups to avoid
|
|
|
|
# pages to avoid
|
|
|
|
DATE=$(date +%s)
|
|
|
|
|
|
|
|
#cd /home/utopiah/web/benetou.fr/fabien/link_extractor
|
|
|
|
|
|
|
|
USERS=$(ls */*.log | grep -v twitter | grep -v identica | grep -v '#' | grep -v http)
|
|
|
|
# # to remove chanels and http to remove mistakes that screw the rest of the script
|
|
|
|
|
|
|
|
echo 'extract all the links from the logs'
|
|
|
|
# skipped here since the logs are small enough not to be pre-processed (rendered when its a wiki)
|
|
|
|
echo 'get all the links > sorted_global.txt'
|
|
|
|
grep http $USERS | grep -v "<Utopiah>" | sed "s/http/\nhttp/g" | grep http | sed "s/ .*//" | grep -e='[http://[:alnum:]|https://[:alnum:]]' | sort | uniq > sorted_global.txt
|
|
|
|
# XXX somehow http alone goes through, this should not be the case
|
|
|
|
|
|
|
|
echo 'for every link check in which page it is mentionned and append it without duplicates > indexed_links_uniqued'
|
|
|
|
echo '' > indexed_links_uniqued
|
|
|
|
while read line; do
|
|
|
|
echo -n "$line " >> indexed_links_uniqued
|
|
|
|
grep -i $line $USERS | sed "s/:.*//" | sort | uniq | xargs -0 echo "Discussion:" | sed "s/ //" >> indexed_links_uniqued
|
|
|
|
done < sorted_global.txt
|
|
|
|
|
|
|
|
echo 'clean from improper URL (e.g. " present) sed "s/\"/\\\"/g"'
|
|
|
|
grep -v '"' indexed_links_uniqued | sort | uniq > indexed_links_uniqued_cleaned
|
|
|
|
|
|
|
|
echo 'format as User.js and make it available'
|
|
|
|
cat indexed_links_uniqued_cleaned | grep http | sed 's/\([^ ]\+\) \(.*\)/user_pref("greasemonkey.scriptvals.Utopiah\/reverted PIM links.rPIMlinks \1", "\2");/' > user.js
|
|
|
|
echo "user_pref(\"greasemonkey.scriptvals.Utopiah/reverted PIM links.rPIMlinks IRCdate\", \"$DATE\");" >> user.js
|
|
|
|
|
|
|
|
echo 'compress for faster transfert'
|
|
|
|
bzip2 -k -f user.js #compress by a factor 10
|
|
|
|
|
|
|
|
echo 'make the script available via http://cloud.benetou.fr/discussions/user.js.bz2'
|
|
|
|
#mv user.js.bz2 ../pub/
|
|
|
|
echo '(note that since this is not merged with the existing user.js from user.js it will required another restart)'
|
|
|
|
|
|
|
|
#echo 'periodically call this very script'
|
|
|
|
#server cron added
|
|
|
|
|
|
|
|
#client cron not added
|