diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-16 12:41:09 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-16 12:41:09 +0300 |
commit | 8c042f99cc7465c86351d21331a129111b75345d (patch) | |
tree | 9656e653be0e42e5469cec77635c20356de152c2 /awklib/eg | |
parent | 8ceb5f934787eb7be5fb452fb39179df66119954 (diff) | |
download | egawk-8c042f99cc7465c86351d21331a129111b75345d.tar.gz egawk-8c042f99cc7465c86351d21331a129111b75345d.tar.bz2 egawk-8c042f99cc7465c86351d21331a129111b75345d.zip |
Move to gawk-3.0.0.
Diffstat (limited to 'awklib/eg')
35 files changed, 1669 insertions, 0 deletions
diff --git a/awklib/eg/data/BBS-list b/awklib/eg/data/BBS-list new file mode 100644 index 00000000..1007417f --- /dev/null +++ b/awklib/eg/data/BBS-list @@ -0,0 +1,11 @@ +aardvark 555-5553 1200/300 B +alpo-net 555-3412 2400/1200/300 A +barfly 555-7685 1200/300 A +bites 555-1675 2400/1200/300 A +camelot 555-0542 300 C +core 555-2912 1200/300 C +fooey 555-1234 2400/1200/300 B +foot 555-6699 1200/300 B +macfoo 555-6480 1200/300 A +sdace 555-3430 2400/1200/300 A +sabafoo 555-2127 1200/300 C diff --git a/awklib/eg/data/inventory-shipped b/awklib/eg/data/inventory-shipped new file mode 100644 index 00000000..6788a0ef --- /dev/null +++ b/awklib/eg/data/inventory-shipped @@ -0,0 +1,17 @@ +Jan 13 25 15 115 +Feb 15 32 24 226 +Mar 15 24 34 228 +Apr 31 52 63 420 +May 16 34 29 208 +Jun 31 42 75 492 +Jul 24 34 67 436 +Aug 15 34 47 316 +Sep 13 55 37 277 +Oct 29 54 68 525 +Nov 20 87 82 577 +Dec 17 35 61 401 + +Jan 21 36 64 620 +Feb 26 58 80 652 +Mar 24 75 70 495 +Apr 21 70 74 514 diff --git a/awklib/eg/lib/assert.awk b/awklib/eg/lib/assert.awk new file mode 100644 index 00000000..914aa632 --- /dev/null +++ b/awklib/eg/lib/assert.awk @@ -0,0 +1,18 @@ +# assert --- assert that a condition is true. Otherwise exit. +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May, 1993 + +function assert(condition, string) +{ + if (! condition) { + printf("%s:%d: assertion failed: %s\n", + FILENAME, FNR, string) > "/dev/stderr" + _assert_exit = 1 + exit 1 + } +} + +END { + if (_assert_exit) + exit 1 +} diff --git a/awklib/eg/lib/ctime.awk b/awklib/eg/lib/ctime.awk new file mode 100644 index 00000000..0a50d262 --- /dev/null +++ b/awklib/eg/lib/ctime.awk @@ -0,0 +1,11 @@ +# ctime.awk +# +# awk version of C ctime(3) function + +function ctime(ts, format) +{ + format = "%a %b %d %H:%M:%S %Z %Y" + if (ts == 0) + ts = systime() # use current time as default + return strftime(format, ts) +} diff --git a/awklib/eg/lib/ftrans.awk b/awklib/eg/lib/ftrans.awk new file mode 100644 index 00000000..0d6e8108 --- /dev/null +++ b/awklib/eg/lib/ftrans.awk @@ -0,0 +1,15 @@ +# ftrans.awk --- handle data file transitions +# +# user supplies beginfile() and endfile() functions +# +# Arnold Robbins, arnold@gnu.ai.mit.edu. November 1992 +# Public Domain + +FNR == 1 { + if (_filename_ != "") + endfile(_filename_) + _filename_ = FILENAME + beginfile(FILENAME) +} + +END { endfile(_filename_) } diff --git a/awklib/eg/lib/getopt.awk b/awklib/eg/lib/getopt.awk new file mode 100644 index 00000000..70a1ec0f --- /dev/null +++ b/awklib/eg/lib/getopt.awk @@ -0,0 +1,82 @@ +# getopt --- do C library getopt(3) function in awk +# +# arnold@gnu.ai.mit.edu +# Public domain +# +# Initial version: March, 1991 +# Revised: May, 1993 + +# External variables: +# Optind -- index of ARGV for first non-option argument +# Optarg -- string value of argument to current option +# Opterr -- if non-zero, print our own diagnostic +# Optopt -- current option letter + +# Returns +# -1 at end of options +# ? for unrecognized option +# <c> a character representing the current option + +# Private Data +# _opti index in multi-flag option, e.g., -abc +function getopt(argc, argv, options, optl, thisopt, i) +{ + optl = length(options) + if (optl == 0) # no options given + return -1 + + if (argv[Optind] == "--") { # all done + Optind++ + _opti = 0 + return -1 + } else if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/) { + _opti = 0 + return -1 + } + if (_opti == 0) + _opti = 2 + thisopt = substr(argv[Optind], _opti, 1) + Optopt = thisopt + i = index(options, thisopt) + if (i == 0) { + if (Opterr) + printf("%c -- invalid option\n", + thisopt) > "/dev/stderr" + if (_opti >= length(argv[Optind])) { + Optind++ + _opti = 0 + } else + _opti++ + return "?" + } + if (substr(options, i + 1, 1) == ":") { + # get option argument + if (length(substr(argv[Optind], _opti + 1)) > 0) + Optarg = substr(argv[Optind], _opti + 1) + else + Optarg = argv[++Optind] + _opti = 0 + } else + Optarg = "" + if (_opti == 0 || _opti >= length(argv[Optind])) { + Optind++ + _opti = 0 + } else + _opti++ + return thisopt +} +BEGIN { + Opterr = 1 # default is to diagnose + Optind = 1 # skip ARGV[0] + + # test program + if (_getopt_test) { + while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) + printf("c = <%c>, optarg = <%s>\n", + _go_c, Optarg) + printf("non-option arguments:\n") + for (; Optind < ARGC; Optind++) + printf("\tARGV[%d] = <%s>\n", + Optind, ARGV[Optind]) + } +} diff --git a/awklib/eg/lib/gettime.awk b/awklib/eg/lib/gettime.awk new file mode 100644 index 00000000..500dfcef --- /dev/null +++ b/awklib/eg/lib/gettime.awk @@ -0,0 +1,61 @@ +# gettimeofday --- get the time of day in a usable format +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain, May 1993 +# +# Returns a string in the format of output of date(1) +# Populates the array argument time with individual values: +# time["second"] -- seconds (0 - 59) +# time["minute"] -- minutes (0 - 59) +# time["hour"] -- hours (0 - 23) +# time["althour"] -- hours (0 - 12) +# time["monthday"] -- day of month (1 - 31) +# time["month"] -- month of year (1 - 12) +# time["monthname"] -- name of the month +# time["shortmonth"] -- short name of the month +# time["year"] -- year within century (0 - 99) +# time["fullyear"] -- year with century (19xx or 20xx) +# time["weekday"] -- day of week (Sunday = 0) +# time["altweekday"] -- day of week (Monday = 0) +# time["weeknum"] -- week number, Sunday first day +# time["altweeknum"] -- week number, Monday first day +# time["dayname"] -- name of weekday +# time["shortdayname"] -- short name of weekday +# time["yearday"] -- day of year (0 - 365) +# time["timezone"] -- abbreviation of timezone name +# time["ampm"] -- AM or PM designation + +function gettimeofday(time, ret, now, i) +{ + # get time once, avoids unnecessary system calls + now = systime() + + # return date(1)-style output + ret = strftime("%a %b %d %H:%M:%S %Z %Y", now) + + # clear out target array + for (i in time) + delete time[i] + + # fill in values, force numeric values to be + # numeric by adding 0 + time["second"] = strftime("%S", now) + 0 + time["minute"] = strftime("%M", now) + 0 + time["hour"] = strftime("%H", now) + 0 + time["althour"] = strftime("%I", now) + 0 + time["monthday"] = strftime("%d", now) + 0 + time["month"] = strftime("%m", now) + 0 + time["monthname"] = strftime("%B", now) + time["shortmonth"] = strftime("%b", now) + time["year"] = strftime("%y", now) + 0 + time["fullyear"] = strftime("%Y", now) + 0 + time["weekday"] = strftime("%w", now) + 0 + time["altweekday"] = strftime("%u", now) + 0 + time["dayname"] = strftime("%A", now) + time["shortdayname"] = strftime("%a", now) + time["yearday"] = strftime("%j", now) + 0 + time["timezone"] = strftime("%Z", now) + time["ampm"] = strftime("%p", now) + time["weeknum"] = strftime("%U", now) + 0 + time["altweeknum"] = strftime("%W", now) + 0 + + return ret +} diff --git a/awklib/eg/lib/grcat.c b/awklib/eg/lib/grcat.c new file mode 100644 index 00000000..9742c592 --- /dev/null +++ b/awklib/eg/lib/grcat.c @@ -0,0 +1,34 @@ +/* + * grcat.c + * + * Generate a printable version of the group database + * + * Arnold Robbins, arnold@gnu.ai.mit.edu + * May 1993 + * Public Domain + */ + +#include <stdio.h> +#include <grp.h> + +int +main(argc, argv) +int argc; +char **argv; +{ + struct group *g; + int i; + + while ((g = getgrent()) != NULL) { + printf("%s:%s:%d:", g->gr_name, g->gr_passwd, + g->gr_gid); + for (i = 0; g->gr_mem[i] != NULL; i++) { + printf("%s", g->gr_mem[i]); + if (g->gr_mem[i+1] != NULL) + putchar(','); + } + putchar('\n'); + } + endgrent(); + exit(0); +} diff --git a/awklib/eg/lib/groupawk.in b/awklib/eg/lib/groupawk.in new file mode 100644 index 00000000..a8103a04 --- /dev/null +++ b/awklib/eg/lib/groupawk.in @@ -0,0 +1,80 @@ +# group.awk --- functions for dealing with the group file +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +BEGIN \ +{ + # Change to suit your system + _gr_awklib = "/usr/local/libexec/awk/" +} +function _gr_init( oldfs, oldrs, olddol0, grcat, n, a, i) +{ + if (_gr_inited) + return + + oldfs = FS + oldrs = RS + olddol0 = $0 + FS = ":" + RS = "\n" + + grcat = _gr_awklib "grcat" + while ((grcat | getline) > 0) { + if ($1 in _gr_byname) + _gr_byname[$1] = _gr_byname[$1] "," $4 + else + _gr_byname[$1] = $0 + if ($3 in _gr_bygid) + _gr_bygid[$3] = _gr_bygid[$3] "," $4 + else + _gr_bygid[$3] = $0 + + n = split($4, a, "[ \t]*,[ \t]*") + for (i = 1; i <= n; i++) + if (a[i] in _gr_groupsbyuser) + _gr_groupsbyuser[a[i]] = \ + _gr_groupsbyuser[a[i]] " " $1 + else + _gr_groupsbyuser[a[i]] = $1 + + _gr_bycount[++_gr_count] = $0 + } + close(grcat) + _gr_count = 0 + _gr_inited++ + FS = oldfs + RS = oldrs + $0 = olddol0 +} +function getgrnam(group) +{ + _gr_init() + if (group in _gr_byname) + return _gr_byname[group] + return "" +} +function getgrgid(gid) +{ + _gr_init() + if (gid in _gr_bygid) + return _gr_bygid[gid] + return "" +} +function getgruser(user) +{ + _gr_init() + if (user in _gr_groupsbyuser) + return _gr_groupsbyuser[user] + return "" +} +function getgrent() +{ + _gr_init() + if (++gr_count in _gr_bycount) + return _gr_bycount[_gr_count] + return "" +} +function endgrent() +{ + _gr_count = 0 +} diff --git a/awklib/eg/lib/join.awk b/awklib/eg/lib/join.awk new file mode 100644 index 00000000..e6b81656 --- /dev/null +++ b/awklib/eg/lib/join.awk @@ -0,0 +1,15 @@ +# join.awk --- join an array into a string +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +function join(array, start, end, sep, result, i) +{ + if (sep == "") + sep = " " + else if (sep == SUBSEP) # magic value + sep = "" + result = array[start] + for (i = start + 1; i <= end; i++) + result = result sep array[i] + return result +} diff --git a/awklib/eg/lib/mktime.awk b/awklib/eg/lib/mktime.awk new file mode 100644 index 00000000..60c5b60d --- /dev/null +++ b/awklib/eg/lib/mktime.awk @@ -0,0 +1,106 @@ +# mktime.awk --- convert a canonical date representation +# into a timestamp +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +BEGIN \ +{ + # Initialize table of month lengths + _tm_months[0,1] = _tm_months[1,1] = 31 + _tm_months[0,2] = 28; _tm_months[1,2] = 29 + _tm_months[0,3] = _tm_months[1,3] = 31 + _tm_months[0,4] = _tm_months[1,4] = 30 + _tm_months[0,5] = _tm_months[1,5] = 31 + _tm_months[0,6] = _tm_months[1,6] = 30 + _tm_months[0,7] = _tm_months[1,7] = 31 + _tm_months[0,8] = _tm_months[1,8] = 31 + _tm_months[0,9] = _tm_months[1,9] = 30 + _tm_months[0,10] = _tm_months[1,10] = 31 + _tm_months[0,11] = _tm_months[1,11] = 30 + _tm_months[0,12] = _tm_months[1,12] = 31 +} +# decide if a year is a leap year +function _tm_isleap(year, ret) +{ + ret = (year % 4 == 0 && year % 100 != 0) || + (year % 400 == 0) + + return ret +} +# convert a date into seconds +function _tm_addup(a, total, yearsecs, daysecs, + hoursecs, i, j) +{ + hoursecs = 60 * 60 + daysecs = 24 * hoursecs + yearsecs = 365 * daysecs + + total = (a[1] - 1970) * yearsecs + + # extra day for leap years + for (i = 1970; i < a[1]; i++) + if (_tm_isleap(i)) + total += daysecs + + j = _tm_isleap(a[1]) + for (i = 1; i < a[2]; i++) + total += _tm_months[j, i] * daysecs + + total += (a[3] - 1) * daysecs + total += a[4] * hoursecs + total += a[5] * 60 + total += a[6] + + return total +} +# mktime --- convert a date into seconds, +# compensate for time zone + +function mktime(str, res1, res2, a, b, i, j, t, diff) +{ + i = split(str, a, " ") # don't rely on FS + + if (i != 6) + return -1 + + # force numeric + for (j in a) + a[j] += 0 + + # validate + if (a[1] < 1970 || + a[2] < 1 || a[2] > 12 || + a[3] < 1 || a[3] > 31 || + a[4] < 0 || a[4] > 23 || + a[5] < 0 || a[5] > 59 || + a[6] < 0 || a[6] > 61 ) + return -1 + + res1 = _tm_addup(a) + t = strftime("%Y %m %d %H %M %S", res1) + + if (_tm_debug) + printf("(%s) -> (%s)\n", str, t) > "/dev/stderr" + + split(t, b, " ") + res2 = _tm_addup(b) + + diff = res1 - res2 + + if (_tm_debug) + printf("diff = %d seconds\n", diff) > "/dev/stderr" + + res1 += diff + + return res1 +} +BEGIN { + if (_tm_test) { + printf "Enter date as yyyy mm dd hh mm ss: " + getline _tm_test_date + + t = mktime(_tm_test_date) + r = strftime("%Y %m %d %H %M %S", t) + printf "Got back (%s)\n", r + } +} diff --git a/awklib/eg/lib/nextfile.awk b/awklib/eg/lib/nextfile.awk new file mode 100644 index 00000000..0f729a87 --- /dev/null +++ b/awklib/eg/lib/nextfile.awk @@ -0,0 +1,15 @@ +# nextfile --- skip remaining records in current file +# correctly handle successive occurrences of the same file +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May, 1993 + +# this should be read in before the "main" awk program + +function nextfile() { _abandon_ = FILENAME; next } + +_abandon_ == FILENAME { + if (FNR == 1) + _abandon_ = "" + else + next +} diff --git a/awklib/eg/lib/ord.awk b/awklib/eg/lib/ord.awk new file mode 100644 index 00000000..7e62cb88 --- /dev/null +++ b/awklib/eg/lib/ord.awk @@ -0,0 +1,54 @@ +# ord.awk --- do ord and chr +# +# Global identifiers: +# _ord_: numerical values indexed by characters +# _ord_init: function to initialize _ord_ +# +# Arnold Robbins +# arnold@gnu.ai.mit.edu +# Public Domain +# 16 January, 1992 +# 20 July, 1992, revised + +BEGIN { _ord_init() } +function _ord_init( low, high, i, t) +{ + low = sprintf("%c", 7) # BEL is ascii 7 + if (low == "\a") { # regular ascii + low = 0 + high = 127 + } else if (sprintf("%c", 128 + 7) == "\a") { + # ascii, mark parity + low = 128 + high = 255 + } else { # ebcdic(!) + low = 0 + high = 255 + } + + for (i = low; i <= high; i++) { + t = sprintf("%c", i) + _ord_[t] = i + } +} +function ord(str, c) +{ + # only first character is of interest + c = substr(str, 1, 1) + return _ord_[c] +} +function chr(c) +{ + # force c to be numeric by adding 0 + return sprintf("%c", c + 0) +} +#### test code #### +# BEGIN \ +# { +# for (;;) { +# printf("enter a character: ") +# if (getline var <= 0) +# break +# printf("ord(%s) = %d\n", var, ord(var)) +# } +# } diff --git a/awklib/eg/lib/passwdawk.in b/awklib/eg/lib/passwdawk.in new file mode 100644 index 00000000..7b64f60d --- /dev/null +++ b/awklib/eg/lib/passwdawk.in @@ -0,0 +1,56 @@ +# passwd.awk --- access password file information +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +BEGIN { + # tailor this to suit your system + _pw_awklib = "/usr/local/libexec/awk/" +} + +function _pw_init( oldfs, oldrs, olddol0, pwcat) +{ + if (_pw_inited) + return + oldfs = FS + oldrs = RS + olddol0 = $0 + FS = ":" + RS = "\n" + pwcat = _pw_awklib "pwcat" + while ((pwcat | getline) > 0) { + _pw_byname[$1] = $0 + _pw_byuid[$3] = $0 + _pw_bycount[++_pw_total] = $0 + } + close(pwcat) + _pw_count = 0 + _pw_inited = 1 + FS = oldfs + RS = oldrs + $0 = olddol0 +} +function getpwnam(name) +{ + _pw_init() + if (name in _pw_byname) + return _pw_byname[name] + return "" +} +function getpwuid(uid) +{ + _pw_init() + if (uid in _pw_byuid) + return _pw_byuid[uid] + return "" +} +function getpwent() +{ + _pw_init() + if (_pw_count < _pw_total) + return _pw_bycount[++_pw_count] + return "" +} +function endpwent() +{ + _pw_count = 0 +} diff --git a/awklib/eg/lib/pwcat.c b/awklib/eg/lib/pwcat.c new file mode 100644 index 00000000..ecd25861 --- /dev/null +++ b/awklib/eg/lib/pwcat.c @@ -0,0 +1,29 @@ +/* + * pwcat.c + * + * Generate a printable version of the password database + * + * Arnold Robbins + * arnold@gnu.ai.mit.edu + * May 1993 + * Public Domain + */ + +#include <stdio.h> +#include <pwd.h> + +int +main(argc, argv) +int argc; +char **argv; +{ + struct passwd *p; + + while ((p = getpwent()) != NULL) + printf("%s:%s:%d:%d:%s:%s:%s\n", + p->pw_name, p->pw_passwd, p->pw_uid, + p->pw_gid, p->pw_gecos, p->pw_dir, p->pw_shell); + + endpwent(); + exit(0); +} diff --git a/awklib/eg/misc/arraymax.awk b/awklib/eg/misc/arraymax.awk new file mode 100644 index 00000000..20dd1768 --- /dev/null +++ b/awklib/eg/misc/arraymax.awk @@ -0,0 +1,10 @@ +{ + if ($1 > max) + max = $1 + arr[$1] = $0 +} + +END { + for (x = 1; x <= max; x++) + print arr[x] +} diff --git a/awklib/eg/misc/arraymax.data b/awklib/eg/misc/arraymax.data new file mode 100644 index 00000000..dbee328c --- /dev/null +++ b/awklib/eg/misc/arraymax.data @@ -0,0 +1,5 @@ +5 I am the Five man +2 Who are you? The new number two! +4 . . . And four on the floor +1 Who is number one? +3 I three you. diff --git a/awklib/eg/misc/findpat.data b/awklib/eg/misc/findpat.data new file mode 100644 index 00000000..9f72969e --- /dev/null +++ b/awklib/eg/misc/findpat.data @@ -0,0 +1,7 @@ +FIND ru+n +My program runs +but not very quickly +FIND Melvin +JF+KM +This line is property of Reality Engineering Co. +Melvin was here. diff --git a/awklib/eg/misc/findpat.sh b/awklib/eg/misc/findpat.sh new file mode 100644 index 00000000..39710324 --- /dev/null +++ b/awklib/eg/misc/findpat.sh @@ -0,0 +1,10 @@ +awk '{ + if ($1 == "FIND") + regex = $2 + else { + where = match($0, regex) + if (where != 0) + print "Match of", regex, "found at", \ + where, "in", $0 + } +}' diff --git a/awklib/eg/prog/alarm.awk b/awklib/eg/prog/alarm.awk new file mode 100644 index 00000000..fa42dce0 --- /dev/null +++ b/awklib/eg/prog/alarm.awk @@ -0,0 +1,81 @@ +# alarm --- set an alarm +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +# usage: alarm time [ "message" [ count [ delay ] ] ] + +BEGIN \ +{ + # Initial argument sanity checking + usage1 = "usage: alarm time ['message' [count [delay]]]" + usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1]) + + if (ARGC < 2) { + print usage > "/dev/stderr" + exit 1 + } else if (ARGC == 5) { + delay = ARGV[4] + 0 + count = ARGV[3] + 0 + message = ARGV[2] + } else if (ARGC == 4) { + count = ARGV[3] + 0 + message = ARGV[2] + } else if (ARGC == 3) { + message = ARGV[2] + } else if (ARGV[1] !~ /[0-9]?[0-9]:[0-9][0-9]/) { + print usage1 > "/dev/stderr" + print usage2 > "/dev/stderr" + exit 1 + } + + # set defaults for once we reach the desired time + if (delay == 0) + delay = 180 # 3 minutes + if (count == 0) + count = 5 + if (message == "") + message = sprintf("\aIt is now %s!\a", ARGV[1]) + else if (index(message, "\a") == 0) + message = "\a" message "\a" + # split up dest time + split(ARGV[1], atime, ":") + hour = atime[1] + 0 # force numeric + minute = atime[2] + 0 # force numeric + + # get current broken down time + gettimeofday(now) + + # if time given is 12-hour hours and it's after that + # hour, e.g., `alarm 5:30' at 9 a.m. means 5:30 p.m., + # then add 12 to real hour + if (hour < 12 && now["hour"] > hour) + hour += 12 + + # set target time in seconds since midnight + target = (hour * 60 * 60) + (minute * 60) + + # get current time in seconds since midnight + current = (now["hour"] * 60 * 60) + \ + (now["minute"] * 60) + now["second"] + + # how long to sleep for + naptime = target - current + if (naptime <= 0) { + print "time is in the past!" > "/dev/stderr" + exit 1 + } + # zzzzzz..... go away if interrupted + if (system(sprintf("sleep %d", naptime)) != 0) + exit 1 + + # time to notify! + command = sprintf("sleep %d", delay) + for (i = 1; i <= count; i++) { + print message + # if sleep command interrupted, go away + if (system(command) != 0) + break + } + + exit 0 +} diff --git a/awklib/eg/prog/awksed.awk b/awklib/eg/prog/awksed.awk new file mode 100644 index 00000000..cd96ddeb --- /dev/null +++ b/awklib/eg/prog/awksed.awk @@ -0,0 +1,31 @@ +# awksed.awk --- do s/foo/bar/g using just print +# Thanks to Michael Brennan for the idea + +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# August 1995 + +function usage() +{ + print "usage: awksed pat repl [files...]" > "/dev/stderr" + exit 1 +} + +BEGIN { + # validate arguments + if (ARGC < 3) + usage() + + RS = ARGV[1] + ORS = ARGV[2] + + # don't use arguments as files + ARGV[1] = ARGV[2] = "" +} + +# look ma, no hands! +{ + if (RT == "") + printf "%s", $0 + else + print +} diff --git a/awklib/eg/prog/cut.awk b/awklib/eg/prog/cut.awk new file mode 100644 index 00000000..c69e6492 --- /dev/null +++ b/awklib/eg/prog/cut.awk @@ -0,0 +1,136 @@ +# cut.awk --- implement cut in awk +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +# Options: +# -f list Cut fields +# -d c Field delimiter character +# -c list Cut characters +# +# -s Suppress lines without the delimiter character + +function usage( e1, e2) +{ + e1 = "usage: cut [-f list] [-d c] [-s] [files...]" + e2 = "usage: cut [-c list] [files...]" + print e1 > "/dev/stderr" + print e2 > "/dev/stderr" + exit 1 +} +BEGIN \ +{ + FS = "\t" # default + OFS = FS + while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) { + if (c == "f") { + by_fields = 1 + fieldlist = Optarg + } else if (c == "c") { + by_chars = 1 + fieldlist = Optarg + OFS = "" + } else if (c == "d") { + if (length(Optarg) > 1) { + printf("Using first character of %s" \ + " for delimiter\n", Optarg) > "/dev/stderr" + Optarg = substr(Optarg, 1, 1) + } + FS = Optarg + OFS = FS + if (FS == " ") # defeat awk semantics + FS = "[ ]" + } else if (c == "s") + suppress++ + else + usage() + } + + for (i = 1; i < Optind; i++) + ARGV[i] = "" + if (by_fields && by_chars) + usage() + + if (by_fields == 0 && by_chars == 0) + by_fields = 1 # default + + if (fieldlist == "") { + print "cut: needs list for -c or -f" > "/dev/stderr" + exit 1 + } + + if (by_fields) + set_fieldlist() + else + set_charlist() +} +function set_fieldlist( n, m, i, j, k, f, g) +{ + n = split(fieldlist, f, ",") + j = 1 # index in flist + for (i = 1; i <= n; i++) { + if (index(f[i], "-") != 0) { # a range + m = split(f[i], g, "-") + if (m != 2 || g[1] >= g[2]) { + printf("bad field list: %s\n", + f[i]) > "/dev/stderr" + exit 1 + } + for (k = g[1]; k <= g[2]; k++) + flist[j++] = k + } else + flist[j++] = f[i] + } + nfields = j - 1 +} +function set_charlist( field, i, j, f, g, t, + filler, last, len) +{ + field = 1 # count total fields + n = split(fieldlist, f, ",") + j = 1 # index in flist + for (i = 1; i <= n; i++) { + if (index(f[i], "-") != 0) { # range + m = split(f[i], g, "-") + if (m != 2 || g[1] >= g[2]) { + printf(bad character list: %s\n", + f[i]) > "/dev/stderr" + exit 1 + } + len = g[2] - g[1] + 1 + if (g[1] > 1) # compute length of filler + filler = g[1] - last - 1 + else + filler = 0 + if (filler) + t[field++] = filler + t[field++] = len # length of field + last = g[2] + flist[j++] = field - 1 + } else { + if (f[i] > 1) + filler = f[i] - last - 1 + else + filler = 0 + if (filler) + t[field++] = filler + t[field++] = 1 + last = f[i] + flist[j++] = field - 1 + } + } + FIELDWIDTHS = join(t, 1, field - 1) + nfields = j - 1 +} +{ + if (by_fields && suppress && $0 !~ FS) + next + + for (i = 1; i <= nfields; i++) { + if ($flist[i] != "") { + printf "%s", $flist[i] + if (i < nfields && $flist[i+1] != "") + printf "%s", OFS + } + } + print "" +} diff --git a/awklib/eg/prog/dupword.awk b/awklib/eg/prog/dupword.awk new file mode 100644 index 00000000..8ae0fdc7 --- /dev/null +++ b/awklib/eg/prog/dupword.awk @@ -0,0 +1,16 @@ +# dupword --- find duplicate words in text +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# December 1991 + +{ + $0 = tolower($0) + gsub(/[^A-Za-z0-9 \t]/, ""); + if ($1 == prev) + printf("%s:%d: duplicate %s\n", + FILENAME, FNR, $1) + for (i = 2; i <= NF; i++) + if ($i == $(i-1)) + printf("%s:%d: duplicate %s\n", + FILENAME, FNR, $i) + prev = $NF +} diff --git a/awklib/eg/prog/egrep.awk b/awklib/eg/prog/egrep.awk new file mode 100644 index 00000000..5a5ec988 --- /dev/null +++ b/awklib/eg/prog/egrep.awk @@ -0,0 +1,96 @@ +# egrep.awk --- simulate egrep in awk +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +# Options: +# -c count of lines +# -s silent - use exit value +# -v invert test, success if no match +# -i ignore case +# -l print filenames only +# -e argument is pattern + +BEGIN { + while ((c = getopt(ARGC, ARGV, "ce:svil")) != -1) { + if (c == "c") + count_only++ + else if (c == "s") + no_print++ + else if (c == "v") + invert++ + else if (c == "i") + IGNORECASE = 1 + else if (c == "l") + filenames_only++ + else if (c == "e") + pattern = Optarg + else + usage() + } + if (pattern == "") + pattern = ARGV[Optind++] + + for (i = 1; i < Optind; i++) + ARGV[i] = "" + if (Optind >= ARGC) { + ARGV[1] = "-" + ARGC = 2 + } else if (ARGC - Optind > 1) + do_filenames++ + +# if (IGNORECASE) +# pattern = tolower(pattern) +} +#{ +# if (IGNORECASE) +# $0 = tolower($0) +#} +function beginfile(junk) +{ + fcount = 0 +} +function endfile(file) +{ + if (! no_print && count_only) + if (do_filenames) + print file ":" fcount + else + print fcount + + total += fcount +} +{ + matches = ($0 ~ pattern) + if (invert) + matches = ! matches + + fcount += matches # 1 or 0 + + if (! matches) + next + + if (no_print && ! count_only) + nextfile + + if (filenames_only && ! count_only) { + print FILENAME + nextfile + } + + if (do_filenames && ! count_only) + print FILENAME ":" $0 + else if (! count_only) + print +} +END \ +{ + if (total == 0) + exit 1 + exit 0 +} +function usage( e) +{ + e = "Usage: egrep [-csvil] [-e pat] [files ...]" + print e > "/dev/stderr" + exit 1 +} diff --git a/awklib/eg/prog/extract.awk b/awklib/eg/prog/extract.awk new file mode 100644 index 00000000..a9f5b80f --- /dev/null +++ b/awklib/eg/prog/extract.awk @@ -0,0 +1,72 @@ +# extract.awk --- extract files and run programs +# from texinfo files +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +BEGIN { IGNORECASE = 1 } + +/^@c(omment)?[ \t]+system/ \ +{ + if (NF < 3) { + e = (FILENAME ":" FNR) + e = (e ": badly formed `system' line") + print e > "/dev/stderr" + next + } + $1 = "" + $2 = "" + stat = system($0) + if (stat != 0) { + e = (FILENAME ":" FNR) + e = (e ": warning: system returned " stat) + print e > "/dev/stderr" + } +} +/^@c(omment)?[ \t]+file/ \ +{ + if (NF != 3) { + e = (FILENAME ":" FNR ": badly formed `file' line") + print e > "/dev/stderr" + next + } + if ($3 != curfile) { + if (curfile != "") + close(curfile) + curfile = $3 + } + + for (;;) { + if ((getline line) <= 0) + unexpected_eof() + if (line ~ /^@c(omment)?[ \t]+endfile/) + break + else if (line ~ /^@(end[ \t]+)?group/) + continue + if (index(line, "@") == 0) { + print line > curfile + continue + } + n = split(line, a, "@") + # if a[1] == "", means leading @, + # don't add one back in. + for (i = 2; i <= n; i++) { + if (a[i] == "") { # was an @@ + a[i] = "@" + if (a[i+1] == "") + i++ + } + } + print join(a, 1, n, SUBSEP) > curfile + } +} +function unexpected_eof() +{ + printf("%s:%d: unexpected EOF or error\n", \ + FILENAME, FNR) > "/dev/stderr" + exit 1 +} + +END { + if (curfile) + close(curfile) +} diff --git a/awklib/eg/prog/histsort.awk b/awklib/eg/prog/histsort.awk new file mode 100644 index 00000000..c2c9d1a7 --- /dev/null +++ b/awklib/eg/prog/histsort.awk @@ -0,0 +1,14 @@ +# histsort.awk --- compact a shell history file +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +# Thanks to Byron Rakitzis for the general idea +{ + if (data[$0]++ == 0) + lines[++count] = $0 +} + +END { + for (i = 1; i <= count; i++) + print lines[i] +} diff --git a/awklib/eg/prog/id.awk b/awklib/eg/prog/id.awk new file mode 100644 index 00000000..b29ef61a --- /dev/null +++ b/awklib/eg/prog/id.awk @@ -0,0 +1,69 @@ +# id.awk --- implement id in awk +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +# output is: +# uid=12(foo) euid=34(bar) gid=3(baz) \ +# egid=5(blat) groups=9(nine),2(two),1(one) + +BEGIN \ +{ + if ((getline < "/dev/user") < 0) { + err = "id: no /dev/user support - cannot run" + print err > "/dev/stderr" + exit 1 + } + close("/dev/user") + + uid = $1 + euid = $2 + gid = $3 + egid = $4 + + printf("uid=%d", uid) + pw = getpwuid(uid) + if (pw != "") { + split(pw, a, ":") + printf("(%s)", a[1]) + } + + if (euid != uid) { + printf(" euid=%d", euid) + pw = getpwuid(euid) + if (pw != "") { + split(pw, a, ":") + printf("(%s)", a[1]) + } + } + + printf(" gid=%d", gid) + pw = getgrgid(gid) + if (pw != "") { + split(pw, a, ":") + printf("(%s)", a[1]) + } + + if (egid != gid) { + printf(" egid=%d", egid) + pw = getgrgid(egid) + if (pw != "") { + split(pw, a, ":") + printf("(%s)", a[1]) + } + } + + if (NF > 4) { + printf(" groups="); + for (i = 5; i <= NF; i++) { + printf("%d", $i) + pw = getgrgid($i) + if (pw != "") { + split(pw, a, ":") + printf("(%s)", a[1]) + } + if (i < NF) + printf(",") + } + } + print "" +} diff --git a/awklib/eg/prog/igawk.sh b/awklib/eg/prog/igawk.sh new file mode 100644 index 00000000..a9fff180 --- /dev/null +++ b/awklib/eg/prog/igawk.sh @@ -0,0 +1,130 @@ +#! /bin/sh + +# igawk --- like gawk but do @include processing +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# July 1993 + +if [ "$1" = debug ] +then + set -x + shift +else + # cleanup on exit, hangup, interrupt, quit, termination + trap 'rm -f /tmp/ig.[se].$$' 0 1 2 3 15 +fi + +while [ $# -ne 0 ] # loop over arguments +do + case $1 in + --) shift; break;; + + -W) shift + set -- -W"$@" + continue;; + + -[vF]) opts="$opts $1 '$2'" + shift;; + + -[vF]*) opts="$opts '$1'" ;; + + -f) echo @include "$2" >> /tmp/ig.s.$$ + shift;; + + -f*) f=`echo "$1" | sed 's/-f//'` + echo @include "$f" >> /tmp/ig.s.$$ ;; + + -?file=*) # -Wfile or --file + f=`echo "$1" | sed 's/-.file=//'` + echo @include "$f" >> /tmp/ig.s.$$ ;; + + -?file) # get arg, $2 + echo @include "$2" >> /tmp/ig.s.$$ + shift;; + + -?source=*) # -Wsource or --source + t=`echo "$1" | sed 's/-.source=//'` + echo "$t" >> /tmp/ig.s.$$ ;; + + -?source) # get arg, $2 + echo "$2" >> /tmp/ig.s.$$ + shift;; + + -?version) + echo igawk: version 1.0 1>&2 + gawk --version + exit 0 ;; + + -[W-]*) opts="$opts '$1'" ;; + + *) break;; + esac + shift +done + +if [ ! -s /tmp/ig.s.$$ ] +then + if [ -z "$1" ] + then + echo igawk: no program! 1>&2 + exit 1 + else + echo "$1" > /tmp/ig.s.$$ + shift + fi +fi + +# at this point, /tmp/ig.s.$$ has the program +gawk -- ' +# process @include directives + +function pathto(file, i, t, junk) +{ + if (index(file, "/") != 0) + return file + + for (i = 1; i <= ndirs; i++) { + t = (pathlist[i] "/" file) + if ((getline junk < t) > 0) { + # found it + close(t) + return t + } + } + return "" +} +BEGIN { + path = ENVIRON["AWKPATH"] + ndirs = split(path, pathlist, ":") + for (i = 1; i <= ndirs; i++) { + if (pathlist[i] == "") + pathlist[i] = "." + } + stackptr = 0 + input[stackptr] = ARGV[1] # ARGV[1] is first file + + for (; stackptr >= 0; stackptr--) { + while ((getline < input[stackptr]) > 0) { + if (tolower($1) != "@include") { + print + continue + } + fpath = pathto($2) + if (fpath == "") { + printf("igawk:%s:%d: cannot find %s\n", \ + input[stackptr], FNR, $2) > "/dev/stderr" + continue + } + if (! (fpath in processed)) { + processed[fpath] = input[stackptr] + input[++stackptr] = fpath + } else + print $2, "included in", input[stackptr], \ + "already included in", \ + processed[fpath] > "/dev/stderr" + } + close(input[stackptr]) + } +}' /tmp/ig.s.$$ > /tmp/ig.e.$$ +eval gawk -f /tmp/ig.e.$$ $opts -- "$@" + +exit $? diff --git a/awklib/eg/prog/labels.awk b/awklib/eg/prog/labels.awk new file mode 100644 index 00000000..55815d20 --- /dev/null +++ b/awklib/eg/prog/labels.awk @@ -0,0 +1,53 @@ +# labels.awk +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# June 1992 + +# Program to print labels. Each label is 5 lines of data +# that may have blank lines. The label sheets have 2 +# blank lines at the top and 2 at the bottom. + +BEGIN { RS = "" ; MAXLINES = 100 } + +function printpage( i, j) +{ + if (Nlines <= 0) + return + + printf "\n\n" # header + + for (i = 1; i <= Nlines; i += 10) { + if (i == 21 || i == 61) + print "" + for (j = 0; j < 5; j++) { + if (i + j > MAXLINES) + break + printf " %-41s %s\n", line[i+j], line[i+j+5] + } + print "" + } + + printf "\n\n" # footer + + for (i in line) + line[i] = "" +} + +# main rule +{ + if (Count >= 20) { + printpage() + Count = 0 + Nlines = 0 + } + n = split($0, a, "\n") + for (i = 1; i <= n; i++) + line[++Nlines] = a[i] + for (; i <= 5; i++) + line[++Nlines] = "" + Count++ +} + +END \ +{ + printpage() +} diff --git a/awklib/eg/prog/split.awk b/awklib/eg/prog/split.awk new file mode 100644 index 00000000..e48653b4 --- /dev/null +++ b/awklib/eg/prog/split.awk @@ -0,0 +1,54 @@ +# split.awk --- do split in awk +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +# usage: split [-num] [file] [outname] + +BEGIN \ +{ + outfile = "x" # default + count = 1000 + if (ARGC > 4) + usage() + + i = 1 + if (ARGV[i] ~ /^-[0-9]+$/) { + count = -ARGV[i] + ARGV[i] = "" + i++ + } + # test argv in case reading from stdin instead of file + if (i in ARGV) + i++ # skip data file name + if (i in ARGV) { + outfile = ARGV[i] + ARGV[i] = "" + } + + s1 = s2 = "a" + out = (outfile s1 s2) +} +{ + if (++tcount > count) { + close(out) + if (s2 == "z") { + if (s1 == "z") { + printf("split: %s is too large to split\n", \ + FILENAME) > "/dev/stderr" + exit 1 + } + s1 = chr(ord(s1) + 1) + s2 = "a" + } else + s2 = chr(ord(s2) + 1) + out = (outfile s1 s2) + tcount = 1 + } + print > out +} +function usage( e) +{ + e = "usage: split [-num] [file] [outname]" + print e > "/dev/stderr" + exit 1 +} diff --git a/awklib/eg/prog/tee.awk b/awklib/eg/prog/tee.awk new file mode 100644 index 00000000..895e4398 --- /dev/null +++ b/awklib/eg/prog/tee.awk @@ -0,0 +1,38 @@ +# tee.awk --- tee in awk +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 +# Revised December 1995 + +BEGIN \ +{ + for (i = 1; i < ARGC; i++) + copy[i] = ARGV[i] + + if (ARGV[1] == "-a") { + append = 1 + delete ARGV[1] + delete copy[1] + ARGC-- + } + if (ARGC < 2) { + print "usage: tee [-a] file ..." > "/dev/stderr" + exit 1 + } + ARGV[1] = "-" + ARGC = 2 +} +{ + # moving the if outside the loop makes it run faster + if (append) + for (i in copy) + print >> copy[i] + else + for (i in copy) + print > copy[i] + print +} +END \ +{ + for (i in copy) + close(copy[i]) +} diff --git a/awklib/eg/prog/translate.awk b/awklib/eg/prog/translate.awk new file mode 100644 index 00000000..6e9aa5a5 --- /dev/null +++ b/awklib/eg/prog/translate.awk @@ -0,0 +1,46 @@ +# translate --- do tr like stuff +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# August 1989 + +# bugs: does not handle things like: tr A-Z a-z, it has +# to be spelled out. However, if `to' is shorter than `from', +# the last character in `to' is used for the rest of `from'. + +function stranslate(from, to, target, lf, lt, t_ar, i, c) +{ + lf = length(from) + lt = length(to) + for (i = 1; i <= lt; i++) + t_ar[substr(from, i, 1)] = substr(to, i, 1) + if (lt < lf) + for (; i <= lf; i++) + t_ar[substr(from, i, 1)] = substr(to, lt, 1) + for (i = 1; i <= lf; i++) { + c = substr(from, i, 1) + if (index(target, c) > 0) + gsub(c, t_ar[c], target) + } + return target +} + +function translate(from, to) +{ + return $0 = stranslate(from, to, $0) +} + +# main program +BEGIN { + if (ARGC < 3) { + print "usage: translate from to" > "/dev/stderr" + exit + } + FROM = ARGV[1] + TO = ARGV[2] + ARGC = 2 + ARGV[1] = "-" +} + +{ + translate(FROM, TO) + print +} diff --git a/awklib/eg/prog/uniq.awk b/awklib/eg/prog/uniq.awk new file mode 100644 index 00000000..5f63ef0f --- /dev/null +++ b/awklib/eg/prog/uniq.awk @@ -0,0 +1,116 @@ +# uniq.awk --- do uniq in awk +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +function usage( e) +{ + e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]" + print e > "/dev/stderr" + exit 1 +} + +# -c count lines. overrides -d and -u +# -d only repeated lines +# -u only non-repeated lines +# -n skip n fields +# +n skip n characters, skip fields first + +BEGIN \ +{ + count = 1 + outputfile = "/dev/stdout" + opts = "udc0:1:2:3:4:5:6:7:8:9:" + while ((c = getopt(ARGC, ARGV, opts)) != -1) { + if (c == "u") + non_repeated_only++ + else if (c == "d") + repeated_only++ + else if (c == "c") + do_count++ + else if (index("0123456789", c) != 0) { + # getopt requires args to options + # this messes us up for things like -5 + if (Optarg ~ /^[0-9]+$/) + fcount = (c Optarg) + 0 + else { + fcount = c + 0 + Optind-- + } + } else + usage() + } + + if (ARGV[Optind] ~ /^\+[0-9]+$/) { + charcount = substr(ARGV[Optind], 2) + 0 + Optind++ + } + + for (i = 1; i < Optind; i++) + ARGV[i] = "" + + if (repeated_only == 0 && non_repeated_only == 0) + repeated_only = non_repeated_only = 1 + + if (ARGC - Optind == 2) { + outputfile = ARGV[ARGC - 1] + ARGV[ARGC - 1] = "" + } +} +function are_equal( n, m, clast, cline, alast, aline) +{ + if (fcount == 0 && charcount == 0) + return (last == $0) + + if (fcount > 0) { + n = split(last, alast) + m = split($0, aline) + clast = join(alast, fcount+1, n) + cline = join(aline, fcount+1, m) + } else { + clast = last + cline = $0 + } + if (charcount) { + clast = substr(clast, charcount + 1) + cline = substr(cline, charcount + 1) + } + + return (clast == cline) +} +NR == 1 { + last = $0 + next +} + +{ + equal = are_equal() + + if (do_count) { # overrides -d and -u + if (equal) + count++ + else { + printf("%4d %s\n", count, last) > outputfile + last = $0 + count = 1 # reset + } + next + } + + if (equal) + count++ + else { + if ((repeated_only && count > 1) || + (non_repeated_only && count == 1)) + print last > outputfile + last = $0 + count = 1 + } +} + +END { + if (do_count) + printf("%4d %s\n", count, last) > outputfile + else if ((repeated_only && count > 1) || + (non_repeated_only && count == 1)) + print last > outputfile +} diff --git a/awklib/eg/prog/wc.awk b/awklib/eg/prog/wc.awk new file mode 100644 index 00000000..e9898159 --- /dev/null +++ b/awklib/eg/prog/wc.awk @@ -0,0 +1,68 @@ +# wc.awk --- count lines, words, characters +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +# Options: +# -l only count lines +# -w only count words +# -c only count characters +# +# Default is to count lines, words, characters + +BEGIN { + # let getopt print a message about + # invalid options. we ignore them + while ((c = getopt(ARGC, ARGV, "lwc")) != -1) { + if (c == "l") + do_lines = 1 + else if (c == "w") + do_words = 1 + else if (c == "c") + do_chars = 1 + } + for (i = 1; i < Optind; i++) + ARGV[i] = "" + + # if no options, do all + if (! do_lines && ! do_words && ! do_chars) + do_lines = do_words = do_chars = 1 + + print_total = (ARC - i > 2) +} +function beginfile(file) +{ + chars = lines = words = 0 + fname = FILENAME +} + +function endfile(file) +{ + tchars += chars + tlines += lines + twords += words + if (do_lines) + printf "\t%d", lines + if (do_words) + printf "\t%d", words + if (do_chars) + printf "\t%d", chars + printf "\t%s\n", fname +} +# do per line +{ + chars += length($0) + 1 # get newline + lines++ + words += NF +} + +END { + if (print_total) { + if (do_lines) + printf "\t%d", tlines + if (do_words) + printf "\t%d", twords + if (do_chars) + printf "\t%d", tchars + print "\ttotal" + } +} diff --git a/awklib/eg/prog/wordfreq.awk b/awklib/eg/prog/wordfreq.awk new file mode 100644 index 00000000..b67fed47 --- /dev/null +++ b/awklib/eg/prog/wordfreq.awk @@ -0,0 +1,13 @@ +# Print list of word frequencies +{ + $0 = tolower($0) # remove case distinctions + gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation + for (i = 1; i <= NF; i++) + freq[$i]++ +} +END { + sort = "sort +1 -nr" + for (word in freq) + printf "%s\t%d\n", word, freq[word] | sort + close(sort) +} |