You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
48 lines
1.9 KiB
48 lines
1.9 KiB
14 years ago
|
#!/bin/sh
|
||
|
# http://fabien.benetou.fr/Tools/Greasemonkey#RevertedPIMLinks
|
||
|
# TODO
|
||
|
# properly declare
|
||
|
# path
|
||
|
# groups to avoid
|
||
|
# pages to avoid
|
||
|
WIKI=/home/utopiah/web/benetou.fr/fabien/
|
||
|
DATE=$(date +%s)
|
||
|
|
||
|
cd /home/utopiah/web/benetou.fr/fabien/link_extractor
|
||
|
|
||
|
echo 'extract all the links from the wiki by group'
|
||
|
for GROUP in $(ls ../wiki.d/ | sed "s/\..*//" | sort | uniq | grep -v PmWiki | grep -v Site );
|
||
|
do
|
||
|
for PAGE in $(ls ../wiki.d/$GROUP.* | sed "s/\.\.\/wiki.d\///" ); do pmwiki n=$PAGE nolog=true | sed "s/http/\nhttp/g" | grep http | grep -v benetou.fr | grep -v seedea.org | grep -v 127.0.0.1 | grep -v .ico\" | sed "s/'.*//" | sed "s/<\/a>.*//" | sed "s/$/ $PAGE/"; done > links_from_$GROUP
|
||
|
done
|
||
|
|
||
|
echo 'get all the links > sorted_global.txt'
|
||
|
cat links_from_* | sed "s/ .*//" | sort | uniq | grep -e "http://\w\|https://\w" > sorted_global.txt
|
||
|
|
||
|
echo 'for every link check in which page it is mentionned and append it without duplicates > indexed_links_uniqued'
|
||
|
echo '' > indexed_links_uniqued
|
||
|
while read line; do
|
||
|
echo -n "$line " >> indexed_links_uniqued
|
||
|
grep -i $line links_from_* | sed "s/.* //" | sort | uniq | xargs >> indexed_links_uniqued
|
||
|
done < sorted_global.txt
|
||
|
|
||
|
echo 'clean from improper URL (e.g. " present) sed "s/\"/\\\"/g"'
|
||
|
grep -v '"' indexed_links_uniqued > indexed_links_uniqued_cleaned
|
||
|
|
||
|
echo 'format as User.js and make it available'
|
||
|
cat indexed_links_uniqued_cleaned | sed 's/\([^ ]\+\) \(.*\)/user_pref("greasemonkey.scriptvals.Utopiah\/reverted PIM links.rPIMlinks \1", "\2");/' > user.js
|
||
|
echo "user_pref(\"greasemonkey.scriptvals.Utopiah/reverted PIM links.rPIMlinks date\", \"$DATE\");" >> user.js
|
||
|
|
||
|
# replaced by rsync
|
||
|
#echo 'compress for faster transfert'
|
||
|
#bzip2 -k -f user.js #compress by a factor 10
|
||
|
|
||
|
# replaced by rsync
|
||
|
# echo 'make the script available via http://fabien.benetou.fr/pub/user.js.bz2'
|
||
|
#mv user.js.bz2 ../pub/
|
||
|
|
||
|
#echo 'periodically call this very script'
|
||
|
#server cron added
|
||
|
|
||
|
#client cron not added
|