You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
17 lines
748 B
17 lines
748 B
#! /usr/bin/env bash
|
|
|
|
# see related ideas
|
|
# http://fabien.benetou.fr/innovativ.it/www/HistoricalArchives/Seedea/Oimp/Ubiquitousvocabulary
|
|
# http://fabien.benetou.fr/Cookbook/Cognition#LearningNewLanguage
|
|
# and most_used_commands
|
|
|
|
LOGS=web/thelab/stigmergylive/logs/ChannelLogger/freenode/#*/*
|
|
|
|
cat $LOGS | grep "<Utopiah" | sed -e "s/[^a-zA-Z]/ /g" | sed -e "s/ /\n/g" | sed -e "s/^.\{1,3\}$//" |sort | uniq
|
|
|
|
# cleaning by removing low frequency words
|
|
# ... | sed -e "s/[^a-zA-Z]/ /g" | sed -e "s/ /\n/g" | sed -e "s/^.\{1,3\}$//" |sort | uniq -c | grep -v " 1" | grep -v " 2" | grep -v " 3" | wc -l
|
|
|
|
# 10 most commonly used words
|
|
# ... | sed -e "s/[^a-zA-Z]/ /g" | sed -e "s/ /\n/g" | sed -e "s/^.\{1,3\}$//" |sort | uniq -c | sort -n | tail -15
|
|
|
|
|