aboutsummaryrefslogtreecommitdiffstats
path: root/awklib/eg/prog/wordfreq.awk
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2010-07-16 12:41:09 +0300
committerArnold D. Robbins <arnold@skeeve.com>2010-07-16 12:41:09 +0300
commit8c042f99cc7465c86351d21331a129111b75345d (patch)
tree9656e653be0e42e5469cec77635c20356de152c2 /awklib/eg/prog/wordfreq.awk
parent8ceb5f934787eb7be5fb452fb39179df66119954 (diff)
downloadegawk-8c042f99cc7465c86351d21331a129111b75345d.tar.gz
egawk-8c042f99cc7465c86351d21331a129111b75345d.tar.bz2
egawk-8c042f99cc7465c86351d21331a129111b75345d.zip
Move to gawk-3.0.0.
Diffstat (limited to 'awklib/eg/prog/wordfreq.awk')
-rw-r--r--awklib/eg/prog/wordfreq.awk13
1 files changed, 13 insertions, 0 deletions
diff --git a/awklib/eg/prog/wordfreq.awk b/awklib/eg/prog/wordfreq.awk
new file mode 100644
index 00000000..b67fed47
--- /dev/null
+++ b/awklib/eg/prog/wordfreq.awk
@@ -0,0 +1,13 @@
+# Print list of word frequencies
+{
+ $0 = tolower($0) # remove case distinctions
+ gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation
+ for (i = 1; i <= NF; i++)
+ freq[$i]++
+}
+END {
+ sort = "sort +1 -nr"
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word] | sort
+ close(sort)
+}