parent
0366b4ab4b
commit
c961ea41ef
@ -0,0 +1,14 @@ |
|||||||
|
#! /usr/bin/env bash |
||||||
|
|
||||||
|
# see http://fabien.benetou.fr/innovativ.it/www/HistoricalArchives/Seedea/Oimp/Ubiquitousvocabulary |
||||||
|
|
||||||
|
LOGS=web/thelab/stigmergylive/logs/ChannelLogger/freenode/#*/* |
||||||
|
|
||||||
|
cat $LOGS | grep "<Utopiah" | sed -e "s/[^a-zA-Z]/ /g" | sed -e "s/ /\n/g" | sed -e "s/^.\{1,3\}$//" |sort | uniq |
||||||
|
|
||||||
|
# cleaning by removing low frequency words |
||||||
|
# ... | sed -e "s/[^a-zA-Z]/ /g" | sed -e "s/ /\n/g" | sed -e "s/^.\{1,3\}$//" |sort | uniq -c | grep -v " 1" | grep -v " 2" | grep -v " 3" | wc -l |
||||||
|
|
||||||
|
# 10 most commonly used words |
||||||
|
# ... | sed -e "s/[^a-zA-Z]/ /g" | sed -e "s/ /\n/g" | sed -e "s/^.\{1,3\}$//" |sort | uniq -c | sort -n | tail -15 |
||||||
|
|
Loading…
Reference in new issue