aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2014-05-15 22:28:21 +0300
committerArnold D. Robbins <arnold@skeeve.com>2014-05-15 22:28:21 +0300
commit8b086817a7907d54dbe813f0dd05626b86e56cd1 (patch)
tree7e34a110a6e70581ffbebb3cbad8a1ff55a8f025
parentb34ea22faeecc99f81f4d897d5c4cc815eab2ddb (diff)
downloadegawk-8b086817a7907d54dbe813f0dd05626b86e56cd1.tar.gz
egawk-8b086817a7907d54dbe813f0dd05626b86e56cd1.tar.bz2
egawk-8b086817a7907d54dbe813f0dd05626b86e56cd1.zip
Edits through Chapter 11, fix displays for docbook.
-rw-r--r--awklib/eg/lib/getopt.awk2
-rw-r--r--awklib/eg/lib/gettime.awk2
-rw-r--r--awklib/eg/lib/grcat.c2
-rw-r--r--awklib/eg/lib/pwcat.c2
-rw-r--r--awklib/eg/prog/cut.awk2
-rw-r--r--awklib/eg/prog/egrep.awk4
-rw-r--r--awklib/eg/prog/id.awk37
-rw-r--r--awklib/eg/prog/split.awk5
-rw-r--r--doc/ChangeLog4
-rw-r--r--doc/gawk.info1210
-rw-r--r--doc/gawk.texi310
-rw-r--r--doc/gawktexi.in310
12 files changed, 964 insertions, 926 deletions
diff --git a/awklib/eg/lib/getopt.awk b/awklib/eg/lib/getopt.awk
index 4283a7e1..db957ceb 100644
--- a/awklib/eg/lib/getopt.awk
+++ b/awklib/eg/lib/getopt.awk
@@ -70,7 +70,7 @@ BEGIN {
# test program
if (_getopt_test) {
while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, optarg = <%s>\n",
+ printf("c = <%c>, Optarg = <%s>\n",
_go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
diff --git a/awklib/eg/lib/gettime.awk b/awklib/eg/lib/gettime.awk
index 4cb56330..3da9c8ab 100644
--- a/awklib/eg/lib/gettime.awk
+++ b/awklib/eg/lib/gettime.awk
@@ -31,7 +31,7 @@ function getlocaltime(time, ret, now, i)
now = systime()
# return date(1)-style output
- ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
+ ret = strftime(PROCINFO["strftime"], now)
# clear out target array
delete time
diff --git a/awklib/eg/lib/grcat.c b/awklib/eg/lib/grcat.c
index ff2913a1..7d6b6a74 100644
--- a/awklib/eg/lib/grcat.c
+++ b/awklib/eg/lib/grcat.c
@@ -1,7 +1,7 @@
/*
* grcat.c
*
- * Generate a printable version of the group database
+ * Generate a printable version of the group database.
*/
/*
* Arnold Robbins, arnold@skeeve.com, May 1993
diff --git a/awklib/eg/lib/pwcat.c b/awklib/eg/lib/pwcat.c
index 910e0329..934ef34e 100644
--- a/awklib/eg/lib/pwcat.c
+++ b/awklib/eg/lib/pwcat.c
@@ -1,7 +1,7 @@
/*
* pwcat.c
*
- * Generate a printable version of the password database
+ * Generate a printable version of the password database.
*/
/*
* Arnold Robbins, arnold@skeeve.com, May 1993
diff --git a/awklib/eg/prog/cut.awk b/awklib/eg/prog/cut.awk
index 1399411e..09ba1f7c 100644
--- a/awklib/eg/prog/cut.awk
+++ b/awklib/eg/prog/cut.awk
@@ -43,7 +43,7 @@ BEGIN \
if (FS == " ") # defeat awk semantics
FS = "[ ]"
} else if (c == "s")
- suppress++
+ suppress = 1
else
usage()
}
diff --git a/awklib/eg/prog/egrep.awk b/awklib/eg/prog/egrep.awk
index 56d199c8..86b3cfda 100644
--- a/awklib/eg/prog/egrep.awk
+++ b/awklib/eg/prog/egrep.awk
@@ -90,9 +90,7 @@ function endfile(file)
}
END \
{
- if (total == 0)
- exit 1
- exit 0
+ exit (total == 0)
}
function usage( e)
{
diff --git a/awklib/eg/prog/id.awk b/awklib/eg/prog/id.awk
index 8b60a245..cf744447 100644
--- a/awklib/eg/prog/id.awk
+++ b/awklib/eg/prog/id.awk
@@ -5,6 +5,7 @@
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993
# Revised February 1996
+# Revised May 2014
# output is:
# uid=12(foo) euid=34(bar) gid=3(baz) \
@@ -19,34 +20,26 @@ BEGIN \
printf("uid=%d", uid)
pw = getpwuid(uid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
if (euid != uid) {
printf(" euid=%d", euid)
pw = getpwuid(euid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
}
printf(" gid=%d", gid)
pw = getgrgid(gid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
if (egid != gid) {
printf(" egid=%d", egid)
pw = getgrgid(egid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
}
for (i = 1; ("group" i) in PROCINFO; i++) {
@@ -55,13 +48,17 @@ BEGIN \
group = PROCINFO["group" i]
printf("%d", group)
pw = getgrgid(group)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
if (("group" (i+1)) in PROCINFO)
printf(",")
}
print ""
}
+
+function pr_first_field(str, a)
+{
+ split(str, a, ":")
+ printf("(%s)", a[1])
+}
diff --git a/awklib/eg/prog/split.awk b/awklib/eg/prog/split.awk
index c907530b..bcc73ae6 100644
--- a/awklib/eg/prog/split.awk
+++ b/awklib/eg/prog/split.awk
@@ -4,8 +4,9 @@
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993
+# Revised slightly, May 2014
-# usage: split [-num] [file] [outname]
+# usage: split [-count] [file] [outname]
BEGIN {
outfile = "x" # default
@@ -14,7 +15,7 @@ BEGIN {
usage()
i = 1
- if (ARGV[i] ~ /^-[[:digit:]]+$/) {
+ if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) {
count = -ARGV[i]
ARGV[i] = ""
i++
diff --git a/doc/ChangeLog b/doc/ChangeLog
index ba4aa094..c0c382d3 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2014-05-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Fix displays for docbook, edits through Chapter 11.
+
2014-05-14 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Fix real preface for docbook.
diff --git a/doc/gawk.info b/doc/gawk.info
index d725b73e..a9fcc117 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -2276,8 +2276,8 @@ There are two ways to run `awk'--with an explicit program or with one
or more program files. Here are templates for both of them; items
enclosed in [...] in these templates are optional:
- awk [OPTIONS] -f progfile [`--'] FILE ...
- awk [OPTIONS] [`--'] 'PROGRAM' FILE ...
+ `awk' [OPTIONS] `-f' PROGFILE [`--'] FILE ...
+ `awk' [OPTIONS] [`--'] `'PROGRAM'' FILE ...
Besides traditional one-letter POSIX-style options, `gawk' also
supports GNU long options.
@@ -8933,10 +8933,10 @@ which (but not both) may be omitted. The purpose of the "action" is to
tell `awk' what to do once a match for the pattern is found. Thus, in
outline, an `awk' program generally looks like this:
- [PATTERN] { ACTION }
- PATTERN [{ ACTION }]
+ [PATTERN] `{ ACTION }'
+ PATTERN [`{ ACTION }']
...
- function NAME(ARGS) { ... }
+ `function NAME(ARGS) { ... }'
...
An action consists of one or more `awk' "statements", enclosed in
@@ -9024,7 +9024,7 @@ File: gawk.info, Node: If Statement, Next: While Statement, Up: Statements
The `if'-`else' statement is `awk''s decision-making statement. It
looks like this:
- if (CONDITION) THEN-BODY [else ELSE-BODY]
+ `if (CONDITION) THEN-BODY' [`else ELSE-BODY']
The CONDITION is an expression that controls what the rest of the
statement does. If the CONDITION is true, THEN-BODY is executed;
@@ -9507,7 +9507,7 @@ The `exit' statement causes `awk' to immediately stop executing the
current rule and to stop processing input; any remaining input is
ignored. The `exit' statement is written as follows:
- exit [RETURN CODE]
+ `exit' [RETURN CODE]
When an `exit' statement is executed from a `BEGIN' rule, the
program stops processing everything immediately. No input records are
@@ -12786,10 +12786,10 @@ starting to execute any of it.
The definition of a function named NAME looks like this:
- function NAME([PARAMETER-LIST])
- {
+ `function' NAME`('[PARAMETER-LIST]`)'
+ `{'
BODY-OF-FUNCTION
- }
+ `}'
Here, NAME is the name of the function to define. A valid function
name is like a valid variable name: a sequence of letters, digits, and
@@ -13232,7 +13232,7 @@ control to the calling part of the `awk' program. It can also be used
to return a value for use in the rest of the `awk' program. It looks
like this:
- return [EXPRESSION]
+ `return' [EXPRESSION]
The EXPRESSION part is optional. Due most likely to an oversight,
POSIX does not define what the return value is if you omit the
@@ -14250,7 +14250,7 @@ current time formatted in the same way as the `date' utility:
now = systime()
# return date(1)-style output
- ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
+ ret = strftime(PROCINFO["strftime"], now)
# clear out target array
delete time
@@ -14510,8 +14510,8 @@ File: gawk.info, Node: File Checking, Next: Empty Files, Prev: Rewind Functio
Normally, if you give `awk' a data file that isn't readable, it stops
with a fatal error. There are times when you might want to just ignore
-such files and keep going. You can do this by prepending the following
-program to your `awk' program:
+such files and keep going.(1) You can do this by prepending the
+following program to your `awk' program:
# readable.awk --- library file to skip over unreadable files
@@ -14531,10 +14531,16 @@ program to your `awk' program:
element from `ARGV' with `delete' skips the file (since it's no longer
in the list). See also *note ARGC and ARGV::.
+ ---------- Footnotes ----------
+
+ (1) The `BEGINFILE' special pattern (*note BEGINFILE/ENDFILE::)
+provides an alternative mechanism for dealing with files that can't be
+opened. However, the code here provides a portable solution.
+

File: gawk.info, Node: Empty Files, Next: Ignoring Assigns, Prev: File Checking, Up: Data File Management
-10.3.4 Checking For Zero-length Files
+10.3.4 Checking for Zero-length Files
-------------------------------------
All known `awk' implementations silently skip over zero-length files.
@@ -14879,7 +14885,7 @@ is in `ARGV[0]':
# test program
if (_getopt_test) {
while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, optarg = <%s>\n",
+ printf("c = <%c>, Optarg = <%s>\n",
_go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
@@ -14892,17 +14898,17 @@ is in `ARGV[0]':
result of two sample runs of the test program:
$ awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x
- -| c = <a>, optarg = <>
- -| c = <c>, optarg = <>
- -| c = <b>, optarg = <ARG>
+ -| c = <a>, Optarg = <>
+ -| c = <c>, Optarg = <>
+ -| c = <b>, Optarg = <ARG>
-| non-option arguments:
-| ARGV[3] = <bax>
-| ARGV[4] = <-x>
$ awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc
- -| c = <a>, optarg = <>
+ -| c = <a>, Optarg = <>
error--> x -- invalid option
- -| c = <?>, optarg = <>
+ -| c = <?>, Optarg = <>
-| non-option arguments:
-| ARGV[4] = <xyz>
-| ARGV[5] = <abc>
@@ -14961,7 +14967,7 @@ that "cats" the password database:
/*
* pwcat.c
*
- * Generate a printable version of the password database
+ * Generate a printable version of the password database.
*/
#include <stdio.h>
#include <pwd.h>
@@ -15186,7 +15192,7 @@ group database, is as follows:
/*
* grcat.c
*
- * Generate a printable version of the group database
+ * Generate a printable version of the group database.
*/
#include <stdio.h>
#include <grp.h>
@@ -15222,9 +15228,10 @@ Group Password
used; it is usually empty or set to `*'.
Group ID Number
- The group's numeric group ID number; this number must be unique
- within the file. (On some systems it's a C `long', and not an
- `int'. Thus we cast it to `long' for all cases.)
+ The group's numeric group ID number; the association of name to
+ number must be unique within the file. (On some systems it's a C
+ `long', and not an `int'. Thus we cast it to `long' for all
+ cases.)
Group Member List
A comma-separated list of user names. These users are members of
@@ -15333,10 +15340,7 @@ following:
For this reason, `_gr_init()' looks to see if a group name or group
ID number is already seen. If it is, then the user names are simply
-concatenated onto the previous list of users. (There is actually a
-subtle problem with the code just presented. Suppose that the first
-time there were no names. This code adds the names with a leading
-comma. It also doesn't check that there is a `$4'.)
+concatenated onto the previous list of users.(1)
Finally, `_gr_init()' closes the pipeline to `grcat', restores `FS'
(and `FIELDWIDTHS' or `FPAT' if necessary), `RS', and `$0', initializes
@@ -15401,6 +15405,12 @@ very simple, relying on `awk''s associative arrays to do work.
The `id' program in *note Id Program::, uses these functions.
+ ---------- Footnotes ----------
+
+ (1) There is actually a subtle problem with the code just presented.
+Suppose that the first time there were no names. This code adds the
+names with a leading comma. It also doesn't check that there is a `$4'.
+

File: gawk.info, Node: Walking Arrays, Prev: Group Functions, Up: Library Functions
@@ -15637,7 +15647,7 @@ by characters, the output field separator is set to the null string:
if (FS == " ") # defeat awk semantics
FS = "[ ]"
} else if (c == "s")
- suppress++
+ suppress = 1
else
usage()
}
@@ -15806,7 +15816,7 @@ The `egrep' utility searches files for patterns. It uses regular
expressions that are almost identical to those available in `awk'
(*note Regexp::). You invoke it as follows:
- egrep [ OPTIONS ] 'PATTERN' FILES ...
+ `egrep' [OPTIONS] `'PATTERN'' FILES ...
The PATTERN is a regular expression. In typical usage, the regular
expression is quoted to prevent the shell from expanding any of the
@@ -15950,6 +15960,11 @@ know the total number of lines that matched the pattern:
total += fcount
}
+ The `BEGINFILE' and `ENDFILE' special patterns (*note
+BEGINFILE/ENDFILE::) could be used, but then the program would be
+`gawk'-specific. Additionally, this example was written before `gawk'
+acquired `BEGINFILE' and `ENDFILE'.
+
The following rule does most of the work of matching lines. The
variable `matches' is true if the line matched the pattern. If the user
wants lines that did not match, the sense of `matches' is inverted
@@ -15997,9 +16012,7 @@ there are no matches, the exit status is one; otherwise it is zero:
END \
{
- if (total == 0)
- exit 1
- exit 0
+ exit (total == 0)
}
The `usage()' function prints a usage message in case of invalid
@@ -16041,7 +16054,7 @@ different from the real ones. If possible, `id' also supplies the
corresponding user and group names. The output might look like this:
$ id
- -| uid=500(arnold) gid=500(arnold) groups=6(disk),7(lp),19(floppy)
+ -| uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo)
This information is part of what is provided by `gawk''s `PROCINFO'
array (*note Built-in Variables::). However, the `id' utility provides
@@ -16074,34 +16087,26 @@ and the group numbers:
printf("uid=%d", uid)
pw = getpwuid(uid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
if (euid != uid) {
printf(" euid=%d", euid)
pw = getpwuid(euid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
}
printf(" gid=%d", gid)
pw = getgrgid(gid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
if (egid != gid) {
printf(" egid=%d", egid)
pw = getgrgid(egid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
}
for (i = 1; ("group" i) in PROCINFO; i++) {
@@ -16110,10 +16115,8 @@ and the group numbers:
group = PROCINFO["group" i]
printf("%d", group)
pw = getgrgid(group)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
if (("group" (i+1)) in PROCINFO)
printf(",")
}
@@ -16121,6 +16124,12 @@ and the group numbers:
print ""
}
+ function pr_first_field(str, a)
+ {
+ split(str, a, ":")
+ printf("(%s)", a[1])
+ }
+
The test in the `for' loop is worth noting. Any supplementary
groups in the `PROCINFO' array have the indices `"group1"' through
`"groupN"' for some N, i.e., the total number of supplementary groups.
@@ -16135,6 +16144,10 @@ the last group in the array and the loop exits.
then the condition is false the first time it's tested, and the loop
body never executes.
+ The `pr_first_field()' function simply isolates out some code that
+is used repeatedly, making the whole program slightly shorter and
+cleaner.
+

File: gawk.info, Node: Split Program, Next: Tee Program, Prev: Id Program, Up: Clones
@@ -16144,7 +16157,7 @@ File: gawk.info, Node: Split Program, Next: Tee Program, Prev: Id Program, U
The `split' program splits large text files into smaller pieces. Usage
is as follows:(1)
- split [-COUNT] file [ PREFIX ]
+ `split' [`-COUNT'] [FILE] [PREFIX]
By default, the output files are named `xaa', `xab', and so on. Each
file has 1000 lines in it, with the likely exception of the last file.
@@ -16168,7 +16181,7 @@ output file names:
# split.awk --- do split in awk
#
# Requires ord() and chr() library functions
- # usage: split [-num] [file] [outname]
+ # usage: split [-count] [file] [outname]
BEGIN {
outfile = "x" # default
@@ -16177,7 +16190,7 @@ output file names:
usage()
i = 1
- if (ARGV[i] ~ /^-[[:digit:]]+$/) {
+ if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) {
count = -ARGV[i]
ARGV[i] = ""
i++
@@ -16253,7 +16266,7 @@ The `tee' program is known as a "pipe fitting." `tee' copies its
standard input to its standard output and also duplicates it to the
files named on the command line. Its usage is as follows:
- tee [-a] file ...
+ `tee' [`-a'] FILE ...
The `-a' option tells `tee' to append to the named files, instead of
truncating them and starting over.
@@ -16342,7 +16355,7 @@ and by default removes duplicate lines. In other words, it only prints
unique lines--hence the name. `uniq' has a number of options. The
usage is as follows:
- uniq [-udc [-N]] [+N] [ INPUT FILE [ OUTPUT FILE ]]
+ `uniq' [`-udc' [`-N']] [`+N'] [INPUTFILE [OUTPUTFILE]]
The options for `uniq' are:
@@ -16365,11 +16378,11 @@ usage is as follows:
Skip N characters before comparing lines. Any fields specified
with `-N' are skipped first.
-`INPUT FILE'
+`INPUTFILE'
Data is read from the input file named on the command line,
instead of from the standard input.
-`OUTPUT FILE'
+`OUTPUTFILE'
The generated output is sent to the named output file, instead of
to the standard output.
@@ -16559,7 +16572,7 @@ File: gawk.info, Node: Wc Program, Prev: Uniq Program, Up: Clones
The `wc' (word count) utility counts lines, words, and characters in
one or more input files. Its usage is as follows:
- wc [-lwc] [ FILES ... ]
+ `wc' [`-lwc'] [FILES ...]
If no files are specified on the command line, `wc' reads its
standard input. If there are multiple files, it also prints total
@@ -16925,11 +16938,11 @@ there are more characters in the "from" list than in the "to" list, the
last character of the "to" list is used for the remaining characters in
the "from" list.
- Some time ago, a user proposed that a transliteration function should
-be added to `gawk'. The following program was written to prove that
-character transliteration could be done with a user-level function.
-This program is not as complete as the system `tr' utility but it does
-most of the job.
+ Once upon a time, a user proposed that a transliteration function
+should be added to `gawk'. The following program was written to prove
+that character transliteration could be done with a user-level
+function. This program is not as complete as the system `tr' utility
+but it does most of the job.
The `translate' program demonstrates one of the few weaknesses of
standard `awk': dealing with individual characters is very painful,
@@ -17010,8 +17023,8 @@ record:
While it is possible to do character transliteration in a user-level
function, it is not necessarily efficient, and we (the `gawk' authors)
started to consider adding a built-in function. However, shortly after
-writing this program, we learned that the System V Release 4 `awk' had
-added the `toupper()' and `tolower()' functions (*note String
+writing this program, we learned that Brian Kernighan had added the
+`toupper()' and `tolower()' functions to his `awk' (*note String
Functions::). These functions handle the vast majority of the cases
where character transliteration is necessary, and so we chose to simply
add those functions to `gawk' as well and then leave well enough alone.
@@ -17023,10 +17036,10 @@ program.
---------- Footnotes ----------
- (1) On some older systems, including Solaris, `tr' may require that
-the lists be written as range expressions enclosed in square brackets
-(`[a-z]') and quoted, to prevent the shell from attempting a file name
-expansion. This is not a feature.
+ (1) On some older systems, including Solaris, the system version of
+`tr' may require that the lists be written as range expressions
+enclosed in square brackets (`[a-z]') and quoted, to prevent the shell
+from attempting a file name expansion. This is not a feature.
(2) This program was written before `gawk' acquired the ability to
split each character in a string into separate array elements.
@@ -17146,7 +17159,7 @@ File: gawk.info, Node: Word Sorting, Next: History Sorting, Prev: Labels Prog
When working with large amounts of text, it can be interesting to know
how often different words appear. For example, an author may overuse
-certain words, in which case she might wish to find synonyms to
+certain words, in which case he or she might wish to find synonyms to
substitute for words that appear too often. This node develops a
program for counting words and presenting the frequency information in
a useful format.
@@ -17209,6 +17222,10 @@ script. Here is the new version of the program:
printf "%s\t%d\n", word, freq[word]
}
+ The regexp `/[^[:alnum:]_[:blank:]]/' might have been written
+`/[[:punct:]]/', but then underscores would also be removed, and we
+want to keep them.
+
Assuming we have saved this program in a file named `wordfreq.awk',
and that the data is in `file1', the following pipeline:
@@ -17286,8 +17303,7 @@ information. For example, using the following `print' statement in the
print data[lines[i]], lines[i]
- This works because `data[$0]' is incremented each time a line is
-seen.
+This works because `data[$0]' is incremented each time a line is seen.

File: gawk.info, Node: Extract Program, Next: Simple Sed, Prev: History Sorting, Up: Miscellaneous Programs
@@ -17418,8 +17434,9 @@ elements (`@@' in the original file), we have to add a single `@'
symbol back in.(1)
When the processing of the array is finished, `join()' is called
-with the value of `SUBSEP', to rejoin the pieces back into a single
-line. That line is then printed to the output file:
+with the value of `SUBSEP' (*note Multidimensional::), to rejoin the
+pieces back into a single line. That line is then printed to the
+output file:
/^@c(omment)?[ \t]+file/ \
{
@@ -17488,7 +17505,7 @@ closing the open file:
---------- Footnotes ----------
(1) This program was written before `gawk' had the `gensub()'
-function. Consider how you might use it to simplify the code.
+function. Consider how you might use it to simplify the code.

File: gawk.info, Node: Simple Sed, Next: Igawk Program, Prev: Extract Program, Up: Miscellaneous Programs
@@ -17827,12 +17844,12 @@ which represents the current directory:
pathlist[i] = "."
}
- The stack is initialized with `ARGV[1]', which will be `/dev/stdin'.
-The main loop comes next. Input lines are read in succession. Lines
-that do not start with `@include' are printed verbatim. If the line
-does start with `@include', the file name is in `$2'. `pathto()' is
-called to generate the full path. If it cannot, then the program
-prints an error message and continues.
+ The stack is initialized with `ARGV[1]', which will be
+`"/dev/stdin"'. The main loop comes next. Input lines are read in
+succession. Lines that do not start with `@include' are printed
+verbatim. If the line does start with `@include', the file name is in
+`$2'. `pathto()' is called to generate the full path. If it cannot,
+then the program prints an error message and continues.
The next thing to check is if the file is included already. The
`processed' array is indexed by the full file name of each included
@@ -17909,7 +17926,7 @@ supplied.
The `eval' command is a shell construct that reruns the shell's
parsing process. This keeps things properly quoted.
- This version of `igawk' represents my fifth version of this program.
+ This version of `igawk' represents the fifth version of this program.
There are four key simplifications that make the program work better:
* Using `@include' even for the files named with `-f' makes building
@@ -18083,7 +18100,9 @@ supplies the following copyright terms:
X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O,
O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O}'
- We leave it to you to determine what the program does.
+ We leave it to you to determine what the program does. (If you are
+truly desperate to understand it, see Chris Johansen's explanation,
+which is embedded in the Texinfo source file for this Info file.)

File: gawk.info, Node: Advanced Features, Next: Internationalization, Prev: Sample Programs, Up: Top
@@ -20413,7 +20432,7 @@ categories, as follows:
Program::) demonstrates:
gawk> dump
- -| # BEGIN
+ -| # BEGIN
-|
-| [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk]
-| [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR]
@@ -30122,7 +30141,7 @@ Index
* Menu:
* ! (exclamation point), ! operator: Boolean Ops. (line 67)
-* ! (exclamation point), ! operator <1>: Egrep Program. (line 170)
+* ! (exclamation point), ! operator <1>: Egrep Program. (line 175)
* ! (exclamation point), ! operator <2>: Ranges. (line 48)
* ! (exclamation point), ! operator: Precedence. (line 52)
* ! (exclamation point), != operator <1>: Precedence. (line 65)
@@ -30358,7 +30377,7 @@ Index
(line 38)
* \ (backslash), as field separator: Command Line Field Separator.
(line 27)
-* \ (backslash), continuing lines and <1>: Egrep Program. (line 220)
+* \ (backslash), continuing lines and <1>: Egrep Program. (line 223)
* \ (backslash), continuing lines and: Statements/Lines. (line 19)
* \ (backslash), continuing lines and, comments and: Statements/Lines.
(line 76)
@@ -30386,7 +30405,7 @@ Index
* _ (underscore), in names of private variables: Library Names.
(line 29)
* _ (underscore), translatable string: Programmer i18n. (line 69)
-* _gr_init() user-defined function: Group Functions. (line 82)
+* _gr_init() user-defined function: Group Functions. (line 83)
* _ord_init() user-defined function: Ordinal Functions. (line 16)
* _pw_init() user-defined function: Passwd Functions. (line 105)
* accessing fields: Fields. (line 6)
@@ -30632,7 +30651,7 @@ Index
(line 38)
* backslash (\), as field separator: Command Line Field Separator.
(line 27)
-* backslash (\), continuing lines and <1>: Egrep Program. (line 220)
+* backslash (\), continuing lines and <1>: Egrep Program. (line 223)
* backslash (\), continuing lines and: Statements/Lines. (line 19)
* backslash (\), continuing lines and, comments and: Statements/Lines.
(line 76)
@@ -31288,7 +31307,7 @@ Index
* END pattern, and profiling: Profiling. (line 62)
* END pattern, assert() user-defined function and: Assert Function.
(line 75)
-* END pattern, backslash continuation and: Egrep Program. (line 220)
+* END pattern, backslash continuation and: Egrep Program. (line 223)
* END pattern, Boolean patterns and: Expression Patterns. (line 70)
* END pattern, exit statement and: Exit Statement. (line 12)
* END pattern, next/nextfile statements and <1>: Next Statement.
@@ -31300,8 +31319,8 @@ Index
* ENDFILE pattern: BEGINFILE/ENDFILE. (line 6)
* ENDFILE pattern, Boolean patterns and: Expression Patterns. (line 70)
* endfile() user-defined function: Filetrans Function. (line 62)
-* endgrent() function (C library): Group Functions. (line 215)
-* endgrent() user-defined function: Group Functions. (line 218)
+* endgrent() function (C library): Group Functions. (line 213)
+* endgrent() user-defined function: Group Functions. (line 216)
* endpwent() function (C library): Passwd Functions. (line 210)
* endpwent() user-defined function: Passwd Functions. (line 213)
* ENVIRON array: Auto-set. (line 60)
@@ -31334,7 +31353,7 @@ Index
* evaluation order, concatenation: Concatenation. (line 41)
* evaluation order, functions: Calling Built-in. (line 30)
* examining fields: Fields. (line 6)
-* exclamation point (!), ! operator <1>: Egrep Program. (line 170)
+* exclamation point (!), ! operator <1>: Egrep Program. (line 175)
* exclamation point (!), ! operator <2>: Precedence. (line 52)
* exclamation point (!), ! operator: Boolean Ops. (line 67)
* exclamation point (!), != operator <1>: Precedence. (line 65)
@@ -31730,15 +31749,15 @@ Index
* getaddrinfo() function (C library): TCP/IP Networking. (line 38)
* getgrent() function (C library): Group Functions. (line 6)
* getgrent() user-defined function: Group Functions. (line 6)
-* getgrgid() function (C library): Group Functions. (line 186)
-* getgrgid() user-defined function: Group Functions. (line 189)
-* getgrnam() function (C library): Group Functions. (line 175)
-* getgrnam() user-defined function: Group Functions. (line 180)
-* getgruser() function (C library): Group Functions. (line 195)
-* getgruser() function, user-defined: Group Functions. (line 198)
+* getgrgid() function (C library): Group Functions. (line 184)
+* getgrgid() user-defined function: Group Functions. (line 187)
+* getgrnam() function (C library): Group Functions. (line 173)
+* getgrnam() user-defined function: Group Functions. (line 178)
+* getgruser() function (C library): Group Functions. (line 193)
+* getgruser() function, user-defined: Group Functions. (line 196)
* getline command: Reading Files. (line 20)
* getline command, _gr_init() user-defined function: Group Functions.
- (line 82)
+ (line 83)
* getline command, _pw_init() function: Passwd Functions. (line 154)
* getline command, coprocesses, using from <1>: Close Files And Pipes.
(line 6)
@@ -31939,6 +31958,7 @@ Index
* Java programming language: Glossary. (line 380)
* jawk: Other Versions. (line 112)
* Jedi knights: Undocumented. (line 6)
+* Johansen, Chris: Signature Program. (line 25)
* join() user-defined function: Join Function. (line 18)
* Kahrs, Ju"rgen <1>: Contributors. (line 70)
* Kahrs, Ju"rgen: Acknowledgments. (line 60)
@@ -33237,494 +33257,496 @@ Node: Other Features105547
Node: When106475
Node: Invoking Gawk108623
Node: Command Line110086
-Node: Options110869
-Ref: Options-Footnote-1126681
-Node: Other Arguments126706
-Node: Naming Standard Input129368
-Node: Environment Variables130462
-Node: AWKPATH Variable131020
-Ref: AWKPATH Variable-Footnote-1133798
-Ref: AWKPATH Variable-Footnote-2133843
-Node: AWKLIBPATH Variable134103
-Node: Other Environment Variables134862
-Node: Exit Status138517
-Node: Include Files139192
-Node: Loading Shared Libraries142770
-Node: Obsolete144153
-Node: Undocumented144850
-Node: Regexp145092
-Node: Regexp Usage146481
-Node: Escape Sequences148514
-Node: Regexp Operators154181
-Ref: Regexp Operators-Footnote-1161661
-Ref: Regexp Operators-Footnote-2161808
-Node: Bracket Expressions161906
-Ref: table-char-classes163796
-Node: GNU Regexp Operators166319
-Node: Case-sensitivity170042
-Ref: Case-sensitivity-Footnote-1172934
-Ref: Case-sensitivity-Footnote-2173169
-Node: Leftmost Longest173277
-Node: Computed Regexps174478
-Node: Reading Files177827
-Node: Records179829
-Node: awk split records180564
-Node: gawk split records185422
-Ref: gawk split records-Footnote-1189943
-Node: Fields189980
-Ref: Fields-Footnote-1192944
-Node: Nonconstant Fields193030
-Ref: Nonconstant Fields-Footnote-1195260
-Node: Changing Fields195462
-Node: Field Separators201416
-Node: Default Field Splitting204118
-Node: Regexp Field Splitting205235
-Node: Single Character Fields208576
-Node: Command Line Field Separator209635
-Node: Full Line Fields212977
-Ref: Full Line Fields-Footnote-1213485
-Node: Field Splitting Summary213531
-Ref: Field Splitting Summary-Footnote-1216630
-Node: Constant Size216731
-Node: Splitting By Content221338
-Ref: Splitting By Content-Footnote-1225088
-Node: Multiple Line225128
-Ref: Multiple Line-Footnote-1230984
-Node: Getline231163
-Node: Plain Getline233379
-Node: Getline/Variable235474
-Node: Getline/File236621
-Node: Getline/Variable/File238005
-Ref: Getline/Variable/File-Footnote-1239604
-Node: Getline/Pipe239691
-Node: Getline/Variable/Pipe242390
-Node: Getline/Coprocess243497
-Node: Getline/Variable/Coprocess244749
-Node: Getline Notes245486
-Node: Getline Summary248290
-Ref: table-getline-variants248698
-Node: Read Timeout249610
-Ref: Read Timeout-Footnote-1253437
-Node: Command line directories253495
-Node: Printing254377
-Node: Print256008
-Node: Print Examples257349
-Node: Output Separators260128
-Node: OFMT262144
-Node: Printf263502
-Node: Basic Printf264408
-Node: Control Letters265947
-Node: Format Modifiers269801
-Node: Printf Examples275828
-Node: Redirection278535
-Node: Special Files285507
-Node: Special FD286040
-Ref: Special FD-Footnote-1289664
-Node: Special Network289738
-Node: Special Caveats290588
-Node: Close Files And Pipes291384
-Ref: Close Files And Pipes-Footnote-1298522
-Ref: Close Files And Pipes-Footnote-2298670
-Node: Expressions298820
-Node: Values299952
-Node: Constants300628
-Node: Scalar Constants301308
-Ref: Scalar Constants-Footnote-1302167
-Node: Nondecimal-numbers302417
-Node: Regexp Constants305417
-Node: Using Constant Regexps305892
-Node: Variables308962
-Node: Using Variables309617
-Node: Assignment Options311341
-Node: Conversion313216
-Ref: table-locale-affects318652
-Ref: Conversion-Footnote-1319276
-Node: All Operators319385
-Node: Arithmetic Ops320015
-Node: Concatenation322520
-Ref: Concatenation-Footnote-1325316
-Node: Assignment Ops325436
-Ref: table-assign-ops330419
-Node: Increment Ops331736
-Node: Truth Values and Conditions335174
-Node: Truth Values336257
-Node: Typing and Comparison337306
-Node: Variable Typing338099
-Ref: Variable Typing-Footnote-1341999
-Node: Comparison Operators342121
-Ref: table-relational-ops342531
-Node: POSIX String Comparison346079
-Ref: POSIX String Comparison-Footnote-1347163
-Node: Boolean Ops347301
-Ref: Boolean Ops-Footnote-1351371
-Node: Conditional Exp351462
-Node: Function Calls353189
-Node: Precedence356947
-Node: Locales360616
-Node: Patterns and Actions362219
-Node: Pattern Overview363273
-Node: Regexp Patterns364950
-Node: Expression Patterns365493
-Node: Ranges369274
-Node: BEGIN/END372380
-Node: Using BEGIN/END373142
-Ref: Using BEGIN/END-Footnote-1375878
-Node: I/O And BEGIN/END375984
-Node: BEGINFILE/ENDFILE378269
-Node: Empty381205
-Node: Using Shell Variables381522
-Node: Action Overview383805
-Node: Statements386150
-Node: If Statement388004
-Node: While Statement389503
-Node: Do Statement391547
-Node: For Statement392703
-Node: Switch Statement395855
-Node: Break Statement397958
-Node: Continue Statement400013
-Node: Next Statement401806
-Node: Nextfile Statement404196
-Node: Exit Statement406851
-Node: Built-in Variables409253
-Node: User-modified410349
-Ref: User-modified-Footnote-1418034
-Node: Auto-set418096
-Ref: Auto-set-Footnote-1430661
-Ref: Auto-set-Footnote-2430866
-Node: ARGC and ARGV430922
-Node: Arrays434776
-Node: Array Basics436274
-Node: Array Intro437100
-Ref: figure-array-elements439073
-Node: Reference to Elements441480
-Node: Assigning Elements443753
-Node: Array Example444244
-Node: Scanning an Array445976
-Node: Controlling Scanning448991
-Ref: Controlling Scanning-Footnote-1454164
-Node: Delete454480
-Ref: Delete-Footnote-1457245
-Node: Numeric Array Subscripts457302
-Node: Uninitialized Subscripts459485
-Node: Multidimensional461110
-Node: Multiscanning464203
-Node: Arrays of Arrays465792
-Node: Functions470432
-Node: Built-in471251
-Node: Calling Built-in472329
-Node: Numeric Functions474317
-Ref: Numeric Functions-Footnote-1478151
-Ref: Numeric Functions-Footnote-2478508
-Ref: Numeric Functions-Footnote-3478556
-Node: String Functions478825
-Ref: String Functions-Footnote-1501836
-Ref: String Functions-Footnote-2501965
-Ref: String Functions-Footnote-3502213
-Node: Gory Details502300
-Ref: table-sub-escapes503969
-Ref: table-sub-posix-92505323
-Ref: table-sub-proposed506674
-Ref: table-posix-sub508028
-Ref: table-gensub-escapes509573
-Ref: Gory Details-Footnote-1510749
-Ref: Gory Details-Footnote-2510800
-Node: I/O Functions510951
-Ref: I/O Functions-Footnote-1518074
-Node: Time Functions518221
-Ref: Time Functions-Footnote-1528685
-Ref: Time Functions-Footnote-2528753
-Ref: Time Functions-Footnote-3528911
-Ref: Time Functions-Footnote-4529022
-Ref: Time Functions-Footnote-5529134
-Ref: Time Functions-Footnote-6529361
-Node: Bitwise Functions529627
-Ref: table-bitwise-ops530189
-Ref: Bitwise Functions-Footnote-1534434
-Node: Type Functions534618
-Node: I18N Functions535760
-Node: User-defined537405
-Node: Definition Syntax538209
-Ref: Definition Syntax-Footnote-1543124
-Node: Function Example543193
-Ref: Function Example-Footnote-1545837
-Node: Function Caveats545859
-Node: Calling A Function546377
-Node: Variable Scope547332
-Node: Pass By Value/Reference550320
-Node: Return Statement553828
-Node: Dynamic Typing556810
-Node: Indirect Calls557739
-Node: Library Functions567426
-Ref: Library Functions-Footnote-1570939
-Ref: Library Functions-Footnote-2571082
-Node: Library Names571253
-Ref: Library Names-Footnote-1574726
-Ref: Library Names-Footnote-2574946
-Node: General Functions575032
-Node: Strtonum Function576060
-Node: Assert Function578990
-Node: Round Function582316
-Node: Cliff Random Function583857
-Node: Ordinal Functions584873
-Ref: Ordinal Functions-Footnote-1587950
-Ref: Ordinal Functions-Footnote-2588202
-Node: Join Function588413
-Ref: Join Function-Footnote-1590184
-Node: Getlocaltime Function590384
-Node: Readfile Function594125
-Node: Data File Management595964
-Node: Filetrans Function596596
-Node: Rewind Function600665
-Node: File Checking602052
-Node: Empty Files603146
-Node: Ignoring Assigns605376
-Node: Getopt Function606930
-Ref: Getopt Function-Footnote-1618233
-Node: Passwd Functions618436
-Ref: Passwd Functions-Footnote-1627414
-Node: Group Functions627502
-Node: Walking Arrays635586
-Node: Sample Programs637722
-Node: Running Examples638396
-Node: Clones639124
-Node: Cut Program640348
-Node: Egrep Program650199
-Ref: Egrep Program-Footnote-1657972
-Node: Id Program658082
-Node: Split Program661731
-Ref: Split Program-Footnote-1665250
-Node: Tee Program665378
-Node: Uniq Program668181
-Node: Wc Program675610
-Ref: Wc Program-Footnote-1679876
-Ref: Wc Program-Footnote-2680076
-Node: Miscellaneous Programs680168
-Node: Dupword Program681356
-Node: Alarm Program683387
-Node: Translate Program688194
-Ref: Translate Program-Footnote-1692581
-Ref: Translate Program-Footnote-2692829
-Node: Labels Program692963
-Ref: Labels Program-Footnote-1696334
-Node: Word Sorting696418
-Node: History Sorting700302
-Node: Extract Program702141
-Ref: Extract Program-Footnote-1709644
-Node: Simple Sed709772
-Node: Igawk Program712834
-Ref: Igawk Program-Footnote-1728005
-Ref: Igawk Program-Footnote-2728206
-Node: Anagram Program728344
-Node: Signature Program731412
-Node: Advanced Features732512
-Node: Nondecimal Data734398
-Node: Array Sorting735981
-Node: Controlling Array Traversal736678
-Node: Array Sorting Functions744962
-Ref: Array Sorting Functions-Footnote-1748831
-Node: Two-way I/O749025
-Ref: Two-way I/O-Footnote-1754457
-Node: TCP/IP Networking754539
-Node: Profiling757383
-Node: Internationalization764886
-Node: I18N and L10N766311
-Node: Explaining gettext766997
-Ref: Explaining gettext-Footnote-1772065
-Ref: Explaining gettext-Footnote-2772249
-Node: Programmer i18n772414
-Node: Translator i18n776641
-Node: String Extraction777435
-Ref: String Extraction-Footnote-1778396
-Node: Printf Ordering778482
-Ref: Printf Ordering-Footnote-1781264
-Node: I18N Portability781328
-Ref: I18N Portability-Footnote-1783777
-Node: I18N Example783840
-Ref: I18N Example-Footnote-1786478
-Node: Gawk I18N786550
-Node: Debugger787171
-Node: Debugging788142
-Node: Debugging Concepts788575
-Node: Debugging Terms790431
-Node: Awk Debugging793028
-Node: Sample Debugging Session793920
-Node: Debugger Invocation794440
-Node: Finding The Bug795773
-Node: List of Debugger Commands802260
-Node: Breakpoint Control803594
-Node: Debugger Execution Control807258
-Node: Viewing And Changing Data810618
-Node: Execution Stack813974
-Node: Debugger Info815441
-Node: Miscellaneous Debugger Commands819435
-Node: Readline Support824613
-Node: Limitations825444
-Node: Arbitrary Precision Arithmetic827696
-Ref: Arbitrary Precision Arithmetic-Footnote-1829345
-Node: General Arithmetic829493
-Node: Floating Point Issues831213
-Node: String Conversion Precision832094
-Ref: String Conversion Precision-Footnote-1833799
-Node: Unexpected Results833908
-Node: POSIX Floating Point Problems836061
-Ref: POSIX Floating Point Problems-Footnote-1839886
-Node: Integer Programming839924
-Node: Floating-point Programming841663
-Ref: Floating-point Programming-Footnote-1847994
-Ref: Floating-point Programming-Footnote-2848264
-Node: Floating-point Representation848528
-Node: Floating-point Context849693
-Ref: table-ieee-formats850532
-Node: Rounding Mode851916
-Ref: table-rounding-modes852395
-Ref: Rounding Mode-Footnote-1855410
-Node: Gawk and MPFR855589
-Node: Arbitrary Precision Floats856998
-Ref: Arbitrary Precision Floats-Footnote-1859441
-Node: Setting Precision859757
-Ref: table-predefined-precision-strings860443
-Node: Setting Rounding Mode862588
-Ref: table-gawk-rounding-modes862992
-Node: Floating-point Constants864179
-Node: Changing Precision865608
-Ref: Changing Precision-Footnote-1867005
-Node: Exact Arithmetic867179
-Node: Arbitrary Precision Integers870317
-Ref: Arbitrary Precision Integers-Footnote-1873332
-Node: Dynamic Extensions873479
-Node: Extension Intro874937
-Node: Plugin License876202
-Node: Extension Mechanism Outline876887
-Ref: load-extension877304
-Ref: load-new-function878782
-Ref: call-new-function879777
-Node: Extension API Description881792
-Node: Extension API Functions Introduction883079
-Node: General Data Types888006
-Ref: General Data Types-Footnote-1893701
-Node: Requesting Values894000
-Ref: table-value-types-returned894737
-Node: Memory Allocation Functions895691
-Ref: Memory Allocation Functions-Footnote-1898437
-Node: Constructor Functions898533
-Node: Registration Functions900291
-Node: Extension Functions900976
-Node: Exit Callback Functions903278
-Node: Extension Version String904527
-Node: Input Parsers905177
-Node: Output Wrappers914934
-Node: Two-way processors919444
-Node: Printing Messages921652
-Ref: Printing Messages-Footnote-1922729
-Node: Updating `ERRNO'922881
-Node: Accessing Parameters923620
-Node: Symbol Table Access924850
-Node: Symbol table by name925364
-Node: Symbol table by cookie927340
-Ref: Symbol table by cookie-Footnote-1931472
-Node: Cached values931535
-Ref: Cached values-Footnote-1935025
-Node: Array Manipulation935116
-Ref: Array Manipulation-Footnote-1936214
-Node: Array Data Types936253
-Ref: Array Data Types-Footnote-1938956
-Node: Array Functions939048
-Node: Flattening Arrays942884
-Node: Creating Arrays949736
-Node: Extension API Variables954461
-Node: Extension Versioning955097
-Node: Extension API Informational Variables956998
-Node: Extension API Boilerplate958084
-Node: Finding Extensions961888
-Node: Extension Example962448
-Node: Internal File Description963178
-Node: Internal File Ops967269
-Ref: Internal File Ops-Footnote-1978778
-Node: Using Internal File Ops978918
-Ref: Using Internal File Ops-Footnote-1981265
-Node: Extension Samples981531
-Node: Extension Sample File Functions983055
-Node: Extension Sample Fnmatch991542
-Node: Extension Sample Fork993311
-Node: Extension Sample Inplace994524
-Node: Extension Sample Ord996302
-Node: Extension Sample Readdir997138
-Node: Extension Sample Revout998670
-Node: Extension Sample Rev2way999263
-Node: Extension Sample Read write array999953
-Node: Extension Sample Readfile1001836
-Node: Extension Sample API Tests1002936
-Node: Extension Sample Time1003461
-Node: gawkextlib1004825
-Node: Language History1007606
-Node: V7/SVR3.11009199
-Node: SVR41011519
-Node: POSIX1012961
-Node: BTL1014347
-Node: POSIX/GNU1015081
-Node: Feature History1020680
-Node: Common Extensions1033656
-Node: Ranges and Locales1034968
-Ref: Ranges and Locales-Footnote-11039585
-Ref: Ranges and Locales-Footnote-21039612
-Ref: Ranges and Locales-Footnote-31039846
-Node: Contributors1040067
-Node: Installation1045448
-Node: Gawk Distribution1046342
-Node: Getting1046826
-Node: Extracting1047652
-Node: Distribution contents1049344
-Node: Unix Installation1055065
-Node: Quick Installation1055682
-Node: Additional Configuration Options1058128
-Node: Configuration Philosophy1059864
-Node: Non-Unix Installation1062218
-Node: PC Installation1062676
-Node: PC Binary Installation1063987
-Node: PC Compiling1065835
-Node: PC Testing1068795
-Node: PC Using1069971
-Node: Cygwin1074139
-Node: MSYS1074948
-Node: VMS Installation1075462
-Node: VMS Compilation1076258
-Ref: VMS Compilation-Footnote-11077510
-Node: VMS Dynamic Extensions1077568
-Node: VMS Installation Details1078941
-Node: VMS Running1081192
-Node: VMS GNV1084026
-Node: VMS Old Gawk1084749
-Node: Bugs1085219
-Node: Other Versions1089137
-Node: Notes1095221
-Node: Compatibility Mode1096021
-Node: Additions1096804
-Node: Accessing The Source1097731
-Node: Adding Code1099171
-Node: New Ports1105216
-Node: Derived Files1109351
-Ref: Derived Files-Footnote-11114672
-Ref: Derived Files-Footnote-21114706
-Ref: Derived Files-Footnote-31115306
-Node: Future Extensions1115404
-Node: Implementation Limitations1115987
-Node: Extension Design1117235
-Node: Old Extension Problems1118389
-Ref: Old Extension Problems-Footnote-11119897
-Node: Extension New Mechanism Goals1119954
-Ref: Extension New Mechanism Goals-Footnote-11123319
-Node: Extension Other Design Decisions1123505
-Node: Extension Future Growth1125611
-Node: Old Extension Mechanism1126447
-Node: Basic Concepts1128187
-Node: Basic High Level1128868
-Ref: figure-general-flow1129140
-Ref: figure-process-flow1129739
-Ref: Basic High Level-Footnote-11132968
-Node: Basic Data Typing1133153
-Node: Glossary1136508
-Node: Copying1161739
-Node: GNU Free Documentation License1199295
-Node: Index1224431
+Node: Options110877
+Ref: Options-Footnote-1126689
+Node: Other Arguments126714
+Node: Naming Standard Input129376
+Node: Environment Variables130470
+Node: AWKPATH Variable131028
+Ref: AWKPATH Variable-Footnote-1133806
+Ref: AWKPATH Variable-Footnote-2133851
+Node: AWKLIBPATH Variable134111
+Node: Other Environment Variables134870
+Node: Exit Status138525
+Node: Include Files139200
+Node: Loading Shared Libraries142778
+Node: Obsolete144161
+Node: Undocumented144858
+Node: Regexp145100
+Node: Regexp Usage146489
+Node: Escape Sequences148522
+Node: Regexp Operators154189
+Ref: Regexp Operators-Footnote-1161669
+Ref: Regexp Operators-Footnote-2161816
+Node: Bracket Expressions161914
+Ref: table-char-classes163804
+Node: GNU Regexp Operators166327
+Node: Case-sensitivity170050
+Ref: Case-sensitivity-Footnote-1172942
+Ref: Case-sensitivity-Footnote-2173177
+Node: Leftmost Longest173285
+Node: Computed Regexps174486
+Node: Reading Files177835
+Node: Records179837
+Node: awk split records180572
+Node: gawk split records185430
+Ref: gawk split records-Footnote-1189951
+Node: Fields189988
+Ref: Fields-Footnote-1192952
+Node: Nonconstant Fields193038
+Ref: Nonconstant Fields-Footnote-1195268
+Node: Changing Fields195470
+Node: Field Separators201424
+Node: Default Field Splitting204126
+Node: Regexp Field Splitting205243
+Node: Single Character Fields208584
+Node: Command Line Field Separator209643
+Node: Full Line Fields212985
+Ref: Full Line Fields-Footnote-1213493
+Node: Field Splitting Summary213539
+Ref: Field Splitting Summary-Footnote-1216638
+Node: Constant Size216739
+Node: Splitting By Content221346
+Ref: Splitting By Content-Footnote-1225096
+Node: Multiple Line225136
+Ref: Multiple Line-Footnote-1230992
+Node: Getline231171
+Node: Plain Getline233387
+Node: Getline/Variable235482
+Node: Getline/File236629
+Node: Getline/Variable/File238013
+Ref: Getline/Variable/File-Footnote-1239612
+Node: Getline/Pipe239699
+Node: Getline/Variable/Pipe242398
+Node: Getline/Coprocess243505
+Node: Getline/Variable/Coprocess244757
+Node: Getline Notes245494
+Node: Getline Summary248298
+Ref: table-getline-variants248706
+Node: Read Timeout249618
+Ref: Read Timeout-Footnote-1253445
+Node: Command line directories253503
+Node: Printing254385
+Node: Print256016
+Node: Print Examples257357
+Node: Output Separators260136
+Node: OFMT262152
+Node: Printf263510
+Node: Basic Printf264416
+Node: Control Letters265955
+Node: Format Modifiers269809
+Node: Printf Examples275836
+Node: Redirection278543
+Node: Special Files285515
+Node: Special FD286048
+Ref: Special FD-Footnote-1289672
+Node: Special Network289746
+Node: Special Caveats290596
+Node: Close Files And Pipes291392
+Ref: Close Files And Pipes-Footnote-1298530
+Ref: Close Files And Pipes-Footnote-2298678
+Node: Expressions298828
+Node: Values299960
+Node: Constants300636
+Node: Scalar Constants301316
+Ref: Scalar Constants-Footnote-1302175
+Node: Nondecimal-numbers302425
+Node: Regexp Constants305425
+Node: Using Constant Regexps305900
+Node: Variables308970
+Node: Using Variables309625
+Node: Assignment Options311349
+Node: Conversion313224
+Ref: table-locale-affects318660
+Ref: Conversion-Footnote-1319284
+Node: All Operators319393
+Node: Arithmetic Ops320023
+Node: Concatenation322528
+Ref: Concatenation-Footnote-1325324
+Node: Assignment Ops325444
+Ref: table-assign-ops330427
+Node: Increment Ops331744
+Node: Truth Values and Conditions335182
+Node: Truth Values336265
+Node: Typing and Comparison337314
+Node: Variable Typing338107
+Ref: Variable Typing-Footnote-1342007
+Node: Comparison Operators342129
+Ref: table-relational-ops342539
+Node: POSIX String Comparison346087
+Ref: POSIX String Comparison-Footnote-1347171
+Node: Boolean Ops347309
+Ref: Boolean Ops-Footnote-1351379
+Node: Conditional Exp351470
+Node: Function Calls353197
+Node: Precedence356955
+Node: Locales360624
+Node: Patterns and Actions362227
+Node: Pattern Overview363281
+Node: Regexp Patterns364958
+Node: Expression Patterns365501
+Node: Ranges369282
+Node: BEGIN/END372388
+Node: Using BEGIN/END373150
+Ref: Using BEGIN/END-Footnote-1375886
+Node: I/O And BEGIN/END375992
+Node: BEGINFILE/ENDFILE378277
+Node: Empty381213
+Node: Using Shell Variables381530
+Node: Action Overview383813
+Node: Statements386164
+Node: If Statement388018
+Node: While Statement389521
+Node: Do Statement391565
+Node: For Statement392721
+Node: Switch Statement395873
+Node: Break Statement397976
+Node: Continue Statement400031
+Node: Next Statement401824
+Node: Nextfile Statement404214
+Node: Exit Statement406869
+Node: Built-in Variables409273
+Node: User-modified410369
+Ref: User-modified-Footnote-1418054
+Node: Auto-set418116
+Ref: Auto-set-Footnote-1430681
+Ref: Auto-set-Footnote-2430886
+Node: ARGC and ARGV430942
+Node: Arrays434796
+Node: Array Basics436294
+Node: Array Intro437120
+Ref: figure-array-elements439093
+Node: Reference to Elements441500
+Node: Assigning Elements443773
+Node: Array Example444264
+Node: Scanning an Array445996
+Node: Controlling Scanning449011
+Ref: Controlling Scanning-Footnote-1454184
+Node: Delete454500
+Ref: Delete-Footnote-1457265
+Node: Numeric Array Subscripts457322
+Node: Uninitialized Subscripts459505
+Node: Multidimensional461130
+Node: Multiscanning464223
+Node: Arrays of Arrays465812
+Node: Functions470452
+Node: Built-in471271
+Node: Calling Built-in472349
+Node: Numeric Functions474337
+Ref: Numeric Functions-Footnote-1478171
+Ref: Numeric Functions-Footnote-2478528
+Ref: Numeric Functions-Footnote-3478576
+Node: String Functions478845
+Ref: String Functions-Footnote-1501856
+Ref: String Functions-Footnote-2501985
+Ref: String Functions-Footnote-3502233
+Node: Gory Details502320
+Ref: table-sub-escapes503989
+Ref: table-sub-posix-92505343
+Ref: table-sub-proposed506694
+Ref: table-posix-sub508048
+Ref: table-gensub-escapes509593
+Ref: Gory Details-Footnote-1510769
+Ref: Gory Details-Footnote-2510820
+Node: I/O Functions510971
+Ref: I/O Functions-Footnote-1518094
+Node: Time Functions518241
+Ref: Time Functions-Footnote-1528705
+Ref: Time Functions-Footnote-2528773
+Ref: Time Functions-Footnote-3528931
+Ref: Time Functions-Footnote-4529042
+Ref: Time Functions-Footnote-5529154
+Ref: Time Functions-Footnote-6529381
+Node: Bitwise Functions529647
+Ref: table-bitwise-ops530209
+Ref: Bitwise Functions-Footnote-1534454
+Node: Type Functions534638
+Node: I18N Functions535780
+Node: User-defined537425
+Node: Definition Syntax538229
+Ref: Definition Syntax-Footnote-1543154
+Node: Function Example543223
+Ref: Function Example-Footnote-1545867
+Node: Function Caveats545889
+Node: Calling A Function546407
+Node: Variable Scope547362
+Node: Pass By Value/Reference550350
+Node: Return Statement553858
+Node: Dynamic Typing556842
+Node: Indirect Calls557771
+Node: Library Functions567458
+Ref: Library Functions-Footnote-1570971
+Ref: Library Functions-Footnote-2571114
+Node: Library Names571285
+Ref: Library Names-Footnote-1574758
+Ref: Library Names-Footnote-2574978
+Node: General Functions575064
+Node: Strtonum Function576092
+Node: Assert Function579022
+Node: Round Function582348
+Node: Cliff Random Function583889
+Node: Ordinal Functions584905
+Ref: Ordinal Functions-Footnote-1587982
+Ref: Ordinal Functions-Footnote-2588234
+Node: Join Function588445
+Ref: Join Function-Footnote-1590216
+Node: Getlocaltime Function590416
+Node: Readfile Function594152
+Node: Data File Management595991
+Node: Filetrans Function596623
+Node: Rewind Function600692
+Node: File Checking602079
+Ref: File Checking-Footnote-1603211
+Node: Empty Files603412
+Node: Ignoring Assigns605642
+Node: Getopt Function607196
+Ref: Getopt Function-Footnote-1618499
+Node: Passwd Functions618702
+Ref: Passwd Functions-Footnote-1627681
+Node: Group Functions627769
+Ref: Group Functions-Footnote-1635711
+Node: Walking Arrays635924
+Node: Sample Programs638060
+Node: Running Examples638734
+Node: Clones639462
+Node: Cut Program640686
+Node: Egrep Program650539
+Ref: Egrep Program-Footnote-1658510
+Node: Id Program658620
+Node: Split Program662284
+Ref: Split Program-Footnote-1665822
+Node: Tee Program665950
+Node: Uniq Program668757
+Node: Wc Program676187
+Ref: Wc Program-Footnote-1680455
+Ref: Wc Program-Footnote-2680655
+Node: Miscellaneous Programs680747
+Node: Dupword Program681935
+Node: Alarm Program683966
+Node: Translate Program688773
+Ref: Translate Program-Footnote-1693164
+Ref: Translate Program-Footnote-2693434
+Node: Labels Program693568
+Ref: Labels Program-Footnote-1696939
+Node: Word Sorting697023
+Node: History Sorting701066
+Node: Extract Program702902
+Ref: Extract Program-Footnote-1710432
+Node: Simple Sed710561
+Node: Igawk Program713623
+Ref: Igawk Program-Footnote-1728798
+Ref: Igawk Program-Footnote-2728999
+Node: Anagram Program729137
+Node: Signature Program732205
+Node: Advanced Features733452
+Node: Nondecimal Data735338
+Node: Array Sorting736921
+Node: Controlling Array Traversal737618
+Node: Array Sorting Functions745902
+Ref: Array Sorting Functions-Footnote-1749771
+Node: Two-way I/O749965
+Ref: Two-way I/O-Footnote-1755397
+Node: TCP/IP Networking755479
+Node: Profiling758323
+Node: Internationalization765826
+Node: I18N and L10N767251
+Node: Explaining gettext767937
+Ref: Explaining gettext-Footnote-1773005
+Ref: Explaining gettext-Footnote-2773189
+Node: Programmer i18n773354
+Node: Translator i18n777581
+Node: String Extraction778375
+Ref: String Extraction-Footnote-1779336
+Node: Printf Ordering779422
+Ref: Printf Ordering-Footnote-1782204
+Node: I18N Portability782268
+Ref: I18N Portability-Footnote-1784717
+Node: I18N Example784780
+Ref: I18N Example-Footnote-1787418
+Node: Gawk I18N787490
+Node: Debugger788111
+Node: Debugging789082
+Node: Debugging Concepts789515
+Node: Debugging Terms791371
+Node: Awk Debugging793968
+Node: Sample Debugging Session794860
+Node: Debugger Invocation795380
+Node: Finding The Bug796713
+Node: List of Debugger Commands803200
+Node: Breakpoint Control804534
+Node: Debugger Execution Control808198
+Node: Viewing And Changing Data811558
+Node: Execution Stack814914
+Node: Debugger Info816381
+Node: Miscellaneous Debugger Commands820375
+Node: Readline Support825559
+Node: Limitations826390
+Node: Arbitrary Precision Arithmetic828642
+Ref: Arbitrary Precision Arithmetic-Footnote-1830291
+Node: General Arithmetic830439
+Node: Floating Point Issues832159
+Node: String Conversion Precision833040
+Ref: String Conversion Precision-Footnote-1834745
+Node: Unexpected Results834854
+Node: POSIX Floating Point Problems837007
+Ref: POSIX Floating Point Problems-Footnote-1840832
+Node: Integer Programming840870
+Node: Floating-point Programming842609
+Ref: Floating-point Programming-Footnote-1848940
+Ref: Floating-point Programming-Footnote-2849210
+Node: Floating-point Representation849474
+Node: Floating-point Context850639
+Ref: table-ieee-formats851478
+Node: Rounding Mode852862
+Ref: table-rounding-modes853341
+Ref: Rounding Mode-Footnote-1856356
+Node: Gawk and MPFR856535
+Node: Arbitrary Precision Floats857944
+Ref: Arbitrary Precision Floats-Footnote-1860387
+Node: Setting Precision860703
+Ref: table-predefined-precision-strings861389
+Node: Setting Rounding Mode863534
+Ref: table-gawk-rounding-modes863938
+Node: Floating-point Constants865125
+Node: Changing Precision866554
+Ref: Changing Precision-Footnote-1867951
+Node: Exact Arithmetic868125
+Node: Arbitrary Precision Integers871263
+Ref: Arbitrary Precision Integers-Footnote-1874278
+Node: Dynamic Extensions874425
+Node: Extension Intro875883
+Node: Plugin License877148
+Node: Extension Mechanism Outline877833
+Ref: load-extension878250
+Ref: load-new-function879728
+Ref: call-new-function880723
+Node: Extension API Description882738
+Node: Extension API Functions Introduction884025
+Node: General Data Types888952
+Ref: General Data Types-Footnote-1894647
+Node: Requesting Values894946
+Ref: table-value-types-returned895683
+Node: Memory Allocation Functions896637
+Ref: Memory Allocation Functions-Footnote-1899383
+Node: Constructor Functions899479
+Node: Registration Functions901237
+Node: Extension Functions901922
+Node: Exit Callback Functions904224
+Node: Extension Version String905473
+Node: Input Parsers906123
+Node: Output Wrappers915880
+Node: Two-way processors920390
+Node: Printing Messages922598
+Ref: Printing Messages-Footnote-1923675
+Node: Updating `ERRNO'923827
+Node: Accessing Parameters924566
+Node: Symbol Table Access925796
+Node: Symbol table by name926310
+Node: Symbol table by cookie928286
+Ref: Symbol table by cookie-Footnote-1932418
+Node: Cached values932481
+Ref: Cached values-Footnote-1935971
+Node: Array Manipulation936062
+Ref: Array Manipulation-Footnote-1937160
+Node: Array Data Types937199
+Ref: Array Data Types-Footnote-1939902
+Node: Array Functions939994
+Node: Flattening Arrays943830
+Node: Creating Arrays950682
+Node: Extension API Variables955407
+Node: Extension Versioning956043
+Node: Extension API Informational Variables957944
+Node: Extension API Boilerplate959030
+Node: Finding Extensions962834
+Node: Extension Example963394
+Node: Internal File Description964124
+Node: Internal File Ops968215
+Ref: Internal File Ops-Footnote-1979724
+Node: Using Internal File Ops979864
+Ref: Using Internal File Ops-Footnote-1982211
+Node: Extension Samples982477
+Node: Extension Sample File Functions984001
+Node: Extension Sample Fnmatch992488
+Node: Extension Sample Fork994257
+Node: Extension Sample Inplace995470
+Node: Extension Sample Ord997248
+Node: Extension Sample Readdir998084
+Node: Extension Sample Revout999616
+Node: Extension Sample Rev2way1000209
+Node: Extension Sample Read write array1000899
+Node: Extension Sample Readfile1002782
+Node: Extension Sample API Tests1003882
+Node: Extension Sample Time1004407
+Node: gawkextlib1005771
+Node: Language History1008552
+Node: V7/SVR3.11010145
+Node: SVR41012465
+Node: POSIX1013907
+Node: BTL1015293
+Node: POSIX/GNU1016027
+Node: Feature History1021626
+Node: Common Extensions1034602
+Node: Ranges and Locales1035914
+Ref: Ranges and Locales-Footnote-11040531
+Ref: Ranges and Locales-Footnote-21040558
+Ref: Ranges and Locales-Footnote-31040792
+Node: Contributors1041013
+Node: Installation1046394
+Node: Gawk Distribution1047288
+Node: Getting1047772
+Node: Extracting1048598
+Node: Distribution contents1050290
+Node: Unix Installation1056011
+Node: Quick Installation1056628
+Node: Additional Configuration Options1059074
+Node: Configuration Philosophy1060810
+Node: Non-Unix Installation1063164
+Node: PC Installation1063622
+Node: PC Binary Installation1064933
+Node: PC Compiling1066781
+Node: PC Testing1069741
+Node: PC Using1070917
+Node: Cygwin1075085
+Node: MSYS1075894
+Node: VMS Installation1076408
+Node: VMS Compilation1077204
+Ref: VMS Compilation-Footnote-11078456
+Node: VMS Dynamic Extensions1078514
+Node: VMS Installation Details1079887
+Node: VMS Running1082138
+Node: VMS GNV1084972
+Node: VMS Old Gawk1085695
+Node: Bugs1086165
+Node: Other Versions1090083
+Node: Notes1096167
+Node: Compatibility Mode1096967
+Node: Additions1097750
+Node: Accessing The Source1098677
+Node: Adding Code1100117
+Node: New Ports1106162
+Node: Derived Files1110297
+Ref: Derived Files-Footnote-11115618
+Ref: Derived Files-Footnote-21115652
+Ref: Derived Files-Footnote-31116252
+Node: Future Extensions1116350
+Node: Implementation Limitations1116933
+Node: Extension Design1118181
+Node: Old Extension Problems1119335
+Ref: Old Extension Problems-Footnote-11120843
+Node: Extension New Mechanism Goals1120900
+Ref: Extension New Mechanism Goals-Footnote-11124265
+Node: Extension Other Design Decisions1124451
+Node: Extension Future Growth1126557
+Node: Old Extension Mechanism1127393
+Node: Basic Concepts1129133
+Node: Basic High Level1129814
+Ref: figure-general-flow1130086
+Ref: figure-process-flow1130685
+Ref: Basic High Level-Footnote-11133914
+Node: Basic Data Typing1134099
+Node: Glossary1137454
+Node: Copying1162685
+Node: GNU Free Documentation License1200241
+Node: Index1225377

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 470b2822..88098df3 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -58,6 +58,7 @@
@set SUBSECTION subsection
@set DARKCORNER @inmargin{@image{lflashlight,1cm}, @image{rflashlight,1cm}}
@set COMMONEXT (c.e.)
+@set PAGE page
@end iftex
@ifinfo
@set DOCUMENT Info file
@@ -67,6 +68,7 @@
@set SUBSECTION node
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE screen
@end ifinfo
@ifhtml
@set DOCUMENT Web page
@@ -76,6 +78,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE screen
@end ifhtml
@ifdocbook
@set DOCUMENT book
@@ -85,6 +88,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE page
@end ifdocbook
@ifxml
@set DOCUMENT book
@@ -94,6 +98,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE page
@end ifxml
@ifplaintext
@set DOCUMENT book
@@ -103,6 +108,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE page
@end ifplaintext
@ifdocbook
@@ -3388,19 +3394,10 @@ There are two ways to run @command{awk}---with an explicit program or with
one or more program files. Here are templates for both of them; items
enclosed in [@dots{}] in these templates are optional:
-@ifnotdocbook
-@example
-awk @r{[@var{options}]} -f progfile @r{[@code{--}]} @var{file} @dots{}
-awk @r{[@var{options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
-@end example
-@end ifnotdocbook
-
-@c FIXME - find a better way to mark this up in docbook
-@docbook
-<screen>awk [<replaceable>options</replaceable>] -f progfile [<literal>--</literal>] <replaceable>file</replaceable> &#8230;
-awk [<replaceable>options</replaceable>] [<literal>--</literal>] '<replaceable>program</replaceable>' <replaceable>file</replaceable> &#8230;
-</screen>
-@end docbook
+@display
+@command{awk} [@var{options}] @option{-f} @var{progfile} [@option{--}] @var{file} @dots{}
+@command{awk} [@var{options}] [@option{--}] @code{'@var{program}'} @var{file} @dots{}
+@end display
@cindex GNU long options
@cindex long options
@@ -12948,13 +12945,13 @@ both) may be omitted. The purpose of the @dfn{action} is to tell
@command{awk} what to do once a match for the pattern is found. Thus,
in outline, an @command{awk} program generally looks like this:
-@example
-@r{[}@var{pattern}@r{]} @{ @var{action} @}
- @var{pattern} @r{[}@{ @var{action} @}@r{]}
+@display
+[@var{pattern}] @code{@{ @var{action} @}}
+ @var{pattern} [@code{@{ @var{action} @}}]
@dots{}
-function @var{name}(@var{args}) @{ @dots{} @}
+@code{function @var{name}(@var{args}) @{ @dots{} @}}
@dots{}
-@end example
+@end display
@cindex @code{@{@}} (braces), actions and
@cindex braces (@code{@{@}}), actions and
@@ -13069,9 +13066,9 @@ newlines or semicolons.
The @code{if}-@code{else} statement is @command{awk}'s decision-making
statement. It looks like this:
-@example
-if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]}
-@end example
+@display
+@code{if (@var{condition}) @var{then-body}} [@code{else @var{else-body}}]
+@end display
@noindent
The @var{condition} is an expression that controls what the rest of the
@@ -13669,9 +13666,9 @@ The @code{exit} statement causes @command{awk} to immediately stop
executing the current rule and to stop processing input; any remaining input
is ignored. The @code{exit} statement is written as follows:
-@example
-exit @r{[}@var{return code}@r{]}
-@end example
+@display
+@code{exit} [@var{return code}]
+@end display
@cindex @code{BEGIN} pattern, @code{exit} statement and
@cindex @code{END} pattern, @code{exit} statement and
@@ -18510,12 +18507,12 @@ entire program before starting to execute any of it.
The definition of a function named @var{name} looks like this:
-@example
-function @var{name}(@r{[}@var{parameter-list}@r{]})
-@{
+@display
+@code{function} @var{name}@code{(}[@var{parameter-list}]@code{)}
+@code{@{}
@var{body-of-function}
-@}
-@end example
+@code{@}}
+@end display
@cindex names, functions
@cindex functions, names of
@@ -19037,9 +19034,9 @@ This statement returns control to the calling part of the @command{awk} program.
can also be used to return a value for use in the rest of the @command{awk}
program. It looks like this:
-@example
-return @r{[}@var{expression}@r{]}
-@end example
+@display
+@code{return} [@var{expression}]
+@end display
The @var{expression} part is optional.
Due most likely to an oversight, POSIX does not define what the return
@@ -20348,7 +20345,7 @@ function getlocaltime(time, ret, now, i)
now = systime()
# return date(1)-style output
- ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
+ ret = strftime(PROCINFO["strftime"], now)
# clear out target array
delete time
@@ -20704,10 +20701,12 @@ The @code{rewind()} function also relies on the @code{nextfile} keyword
@cindex readable data files@comma{} checking
@cindex files, skipping
Normally, if you give @command{awk} a data file that isn't readable,
-it stops with a fatal error. There are times when you
-might want to just ignore such files and keep going. You can
-do this by prepending the following program to your @command{awk}
-program:
+it stops with a fatal error. There are times when you might want to
+just ignore such files and keep going.@footnote{The @code{BEGINFILE}
+special pattern (@pxref{BEGINFILE/ENDFILE}) provides an alternative
+mechanism for dealing with files that can't be opened. However, the
+code here provides a portable solution.} You can do this by prepending
+the following program to your @command{awk} program:
@cindex @code{readable.awk} program
@example
@@ -20745,7 +20744,7 @@ skips the file (since it's no longer in the list).
See also @ref{ARGC and ARGV}.
@node Empty Files
-@subsection Checking For Zero-length Files
+@subsection Checking for Zero-length Files
All known @command{awk} implementations silently skip over zero-length files.
This is a by-product of @command{awk}'s implicit
@@ -21218,7 +21217,7 @@ BEGIN @{
# test program
if (_getopt_test) @{
while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, optarg = <%s>\n",
+ printf("c = <%c>, Optarg = <%s>\n",
_go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
@@ -21234,32 +21233,31 @@ result of two sample runs of the test program:
@example
$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x}
-@print{} c = <a>, optarg = <>
-@print{} c = <c>, optarg = <>
-@print{} c = <b>, optarg = <ARG>
+@print{} c = <a>, Optarg = <>
+@print{} c = <c>, Optarg = <>
+@print{} c = <b>, Optarg = <ARG>
@print{} non-option arguments:
@print{} ARGV[3] = <bax>
@print{} ARGV[4] = <-x>
$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc}
-@print{} c = <a>, optarg = <>
+@print{} c = <a>, Optarg = <>
@error{} x -- invalid option
-@print{} c = <?>, optarg = <>
+@print{} c = <?>, Optarg = <>
@print{} non-option arguments:
@print{} ARGV[4] = <xyz>
@print{} ARGV[5] = <abc>
@end example
-In both runs,
-the first @option{--} terminates the arguments to @command{awk}, so that it does
-not try to interpret the @option{-a}, etc., as its own options.
+In both runs, the first @option{--} terminates the arguments to
+@command{awk}, so that it does not try to interpret the @option{-a},
+etc., as its own options.
@quotation NOTE
-After @code{getopt()} is through, it is the responsibility of the user level
-code to
-clear out all the elements of @code{ARGV} from 1 to @code{Optind},
-so that @command{awk} does not try to process the command-line options
-as file names.
+After @code{getopt()} is through, it is the responsibility of the
+user level code to clear out all the elements of @code{ARGV} from 1
+to @code{Optind}, so that @command{awk} does not try to process the
+command-line options as file names.
@end quotation
Several of the sample programs presented in
@@ -21328,7 +21326,7 @@ Following is @command{pwcat}, a C program that ``cats'' the password database:
/*
* pwcat.c
*
- * Generate a printable version of the password database
+ * Generate a printable version of the password database.
*/
@c endfile
@ignore
@@ -21674,7 +21672,7 @@ is as follows:
/*
* grcat.c
*
- * Generate a printable version of the group database
+ * Generate a printable version of the group database.
*/
@c endfile
@ignore
@@ -21761,7 +21759,7 @@ it is usually empty or set to @samp{*}.
@item Group ID Number
The group's numeric group ID number;
-this number must be unique within the file.
+the association of name to number must be unique within the file.
(On some systems it's a C @code{long}, and not an @code{int}. Thus
we cast it to @code{long} for all cases.)
@@ -21897,10 +21895,10 @@ tvpeople:*:101:david,conan,tom,joan
For this reason, @code{_gr_init()} looks to see if a group name or
group ID number is already seen. If it is, then the user names are
-simply concatenated onto the previous list of users. (There is actually a
+simply concatenated onto the previous list of users.@footnote{There is actually a
subtle problem with the code just presented. Suppose that
the first time there were no names. This code adds the names with
-a leading comma. It also doesn't check that there is a @code{$4}.)
+a leading comma. It also doesn't check that there is a @code{$4}.}
Finally, @code{_gr_init()} closes the pipeline to @command{grcat}, restores
@code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0},
@@ -22270,13 +22268,7 @@ function usage( e1, e2)
@noindent
The variables @code{e1} and @code{e2} are used so that the function
-fits nicely on the
-@ifnotinfo
-page.
-@end ifnotinfo
-@ifnottex
-screen.
-@end ifnottex
+fits nicely on the @value{PAGE}.
@cindex @code{BEGIN} pattern, running @command{awk} programs and
@cindex @code{FS} variable, running @command{awk} programs and
@@ -22315,7 +22307,7 @@ BEGIN \
if (FS == " ") # defeat awk semantics
FS = "[ ]"
@} else if (c == "s")
- suppress++
+ suppress = 1
else
usage()
@}
@@ -22528,9 +22520,9 @@ expressions that are almost identical to those available in @command{awk}
(@pxref{Regexp}).
You invoke it as follows:
-@example
-egrep @r{[} @var{options} @r{]} '@var{pattern}' @var{files} @dots{}
-@end example
+@display
+@command{egrep} [@var{options}] @code{'@var{pattern}'} @var{files} @dots{}
+@end display
The @var{pattern} is a regular expression. In typical usage, the regular
expression is quoted to prevent the shell from expanding any of the
@@ -22712,6 +22704,11 @@ function endfile(file)
@c endfile
@end example
+The @code{BEGINFILE} and @code{ENDFILE} special patterns
+(@pxref{BEGINFILE/ENDFILE}) could be used, but then the program would be
+@command{gawk}-specific. Additionally, this example was written before
+@command{gawk} acquired @code{BEGINFILE} and @code{ENDFILE}.
+
The following rule does most of the work of matching lines. The variable
@code{matches} is true if the line matched the pattern. If the user
wants lines that did not match, the sense of @code{matches} is inverted
@@ -22768,9 +22765,7 @@ there are no matches, the exit status is one; otherwise it is zero:
@c file eg/prog/egrep.awk
END \
@{
- if (total == 0)
- exit 1
- exit 0
+ exit (total == 0)
@}
@c endfile
@end example
@@ -22824,7 +22819,7 @@ corresponding user and group names. The output might look like this:
@example
$ @kbd{id}
-@print{} uid=500(arnold) gid=500(arnold) groups=6(disk),7(lp),19(floppy)
+@print{} uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo)
@end example
@cindex @code{PROCINFO} array, and user and group ID numbers
@@ -22860,6 +22855,7 @@ numbers:
# Arnold Robbins, arnold@@skeeve.com, Public Domain
# May 1993
# Revised February 1996
+# Revised May 2014
@c endfile
@end ignore
@@ -22879,34 +22875,26 @@ BEGIN \
printf("uid=%d", uid)
pw = getpwuid(uid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
if (euid != uid) @{
printf(" euid=%d", euid)
pw = getpwuid(euid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
@}
printf(" gid=%d", gid)
pw = getgrgid(gid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
if (egid != gid) @{
printf(" egid=%d", egid)
pw = getgrgid(egid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
@}
for (i = 1; ("group" i) in PROCINFO; i++) @{
@@ -22915,16 +22903,20 @@ BEGIN \
group = PROCINFO["group" i]
printf("%d", group)
pw = getgrgid(group)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
if (("group" (i+1)) in PROCINFO)
printf(",")
@}
print ""
@}
+
+function pr_first_field(str, a)
+@{
+ split(str, a, ":")
+ printf("(%s)", a[1])
+@}
@c endfile
@end example
@@ -22944,9 +22936,13 @@ The loop is also correct if there are @emph{no} supplementary
groups; then the condition is false the first time it's
tested, and the loop body never executes.
+The @code{pr_first_field()} function simply isolates out some
+code that is used repeatedly, making the whole program
+slightly shorter and cleaner.
+
@c exercise!!!
@ignore
-The POSIX version of @command{id} takes arguments that control which
+The POSIX version of @command{id} takes options that control which
information is printed. Modify this version to accept the same
arguments and perform in the same way.
@end ignore
@@ -22966,9 +22962,9 @@ Usage is as follows:@footnote{This is the traditional usage. The
POSIX usage is different, but not relevant for what the program
aims to demonstrate.}
-@example
-split @r{[}-@var{count}@r{]} file @r{[} @var{prefix} @r{]}
-@end example
+@display
+@command{split} [@code{-@var{count}}] [@var{file}] [@var{prefix}]
+@end display
By default,
the output files are named @file{xaa}, @file{xab}, and so on. Each file has
@@ -23002,11 +22998,12 @@ is used as the prefix for the output file names:
#
# Arnold Robbins, arnold@@skeeve.com, Public Domain
# May 1993
+# Revised slightly, May 2014
@c endfile
@end ignore
@c file eg/prog/split.awk
-# usage: split [-num] [file] [outname]
+# usage: split [-count] [file] [outname]
BEGIN @{
outfile = "x" # default
@@ -23015,7 +23012,7 @@ BEGIN @{
usage()
i = 1
- if (ARGV[i] ~ /^-[[:digit:]]+$/) @{
+ if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) @{
count = -ARGV[i]
ARGV[i] = ""
i++
@@ -23087,13 +23084,7 @@ function usage( e)
@noindent
The variable @code{e} is used so that the function
-fits nicely on the
-@ifinfo
-screen.
-@end ifinfo
-@ifnotinfo
-page.
-@end ifnotinfo
+fits nicely on the @value{PAGE}.
This program is a bit sloppy; it relies on @command{awk} to automatically close the last file
instead of doing it in an @code{END} rule.
@@ -23116,9 +23107,9 @@ The @code{tee} program is known as a ``pipe fitting.'' @code{tee} copies
its standard input to its standard output and also duplicates it to the
files named on the command line. Its usage is as follows:
-@example
-tee @r{[}-a@r{]} file @dots{}
-@end example
+@display
+@command{tee} [@option{-a}] @var{file} @dots{}
+@end display
The @option{-a} option tells @code{tee} to append to the named files, instead of
truncating them and starting over.
@@ -23243,9 +23234,9 @@ input, and by default removes duplicate lines. In other words, it only
prints unique lines---hence the name. @command{uniq} has a number of
options. The usage is as follows:
-@example
-uniq @r{[}-udc @r{[}-@var{n}@r{]]} @r{[}+@var{n}@r{]} @r{[} @var{input file} @r{[} @var{output file} @r{]]}
-@end example
+@display
+@command{uniq} [@option{-udc} [@code{-@var{n}}]] [@code{+@var{n}}] [@var{inputfile} [@var{outputfile}]]
+@end display
The options for @command{uniq} are:
@@ -23269,11 +23260,11 @@ by runs of spaces and/or TABs.
Skip @var{n} characters before comparing lines. Any fields specified with
@samp{-@var{n}} are skipped first.
-@item @var{input file}
+@item @var{inputfile}
Data is read from the input file named on the command line, instead of from
the standard input.
-@item @var{output file}
+@item @var{outputfile}
The generated output is sent to the named output file, instead of to the
standard output.
@end table
@@ -23510,9 +23501,9 @@ END @{
The @command{wc} (word count) utility counts lines, words, and characters in
one or more input files. Its usage is as follows:
-@example
-wc @r{[}-lwc@r{]} @r{[} @var{files} @dots{} @r{]}
-@end example
+@display
+@command{wc} [@option{-lwc}] [@var{files} @dots{}]
+@end display
If no files are specified on the command line, @command{wc} reads its standard
input. If there are multiple files, it also prints total counts for all
@@ -23993,19 +23984,18 @@ often used to map uppercase letters into lowercase for further processing:
@end example
@command{tr} requires two lists of characters.@footnote{On some older
-systems,
-including Solaris,
-@command{tr} may require that the lists be written as
-range expressions enclosed in square brackets (@samp{[a-z]}) and quoted,
-to prevent the shell from attempting a file name expansion. This is
-not a feature.} When processing the input, the first character in the
-first list is replaced with the first character in the second list,
-the second character in the first list is replaced with the second
-character in the second list, and so on. If there are more characters
-in the ``from'' list than in the ``to'' list, the last character of the
-``to'' list is used for the remaining characters in the ``from'' list.
-
-Some time ago,
+systems, including Solaris, the system version of @command{tr} may require
+that the lists be written as range expressions enclosed in square brackets
+(@samp{[a-z]}) and quoted, to prevent the shell from attempting a file
+name expansion. This is not a feature.} When processing the input, the
+first character in the first list is replaced with the first character
+in the second list, the second character in the first list is replaced
+with the second character in the second list, and so on. If there are
+more characters in the ``from'' list than in the ``to'' list, the last
+character of the ``to'' list is used for the remaining characters in the
+``from'' list.
+
+Once upon a time,
@c early or mid-1989!
a user proposed that a transliteration function should
be added to @command{gawk}.
@@ -24119,13 +24109,12 @@ BEGIN @{
While it is possible to do character transliteration in a user-level
function, it is not necessarily efficient, and we (the @command{gawk}
authors) started to consider adding a built-in function. However,
-shortly after writing this program, we learned that the System V Release 4
-@command{awk} had added the @code{toupper()} and @code{tolower()} functions
-(@pxref{String Functions}).
-These functions handle the vast majority of the
-cases where character transliteration is necessary, and so we chose to
-simply add those functions to @command{gawk} as well and then leave well
-enough alone.
+shortly after writing this program, we learned that Brian Kernighan
+had added the @code{toupper()} and @code{tolower()} functions to his
+@command{awk} (@pxref{String Functions}). These functions handle the
+vast majority of the cases where character transliteration is necessary,
+and so we chose to simply add those functions to @command{gawk} as well
+and then leave well enough alone.
An obvious improvement to this program would be to set up the
@code{t_ar} array only once, in a @code{BEGIN} rule. However, this
@@ -24158,7 +24147,18 @@ The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that
@command{awk} splits records at blank lines
(@pxref{Records}).
It sets @code{MAXLINES} to 100, since 100 is the maximum number
-of lines on the page (20 * 5 = 100).
+of lines on the page
+@iftex
+(@math{20 @cdot 5 = 100}).
+@end iftex
+@ifnottex
+@ifnotdocbook
+(20 * 5 = 100).
+@end ifnotdocbook
+@end ifnottex
+@docbook
+(20 &sdot; 5 = 100). @c
+@end docbook
Most of the work is done in the @code{printpage()} function.
The label lines are stored sequentially in the @code{line} array. But they
@@ -24270,7 +24270,7 @@ END \
When working with large amounts of text, it can be interesting to know
how often different words appear. For example, an author may overuse
-certain words, in which case she might wish to find synonyms to substitute
+certain words, in which case he or she might wish to find synonyms to substitute
for words that appear too often. This @value{SUBSECTION} develops a
program for counting words and presenting the frequency information
in a useful format.
@@ -24348,6 +24348,10 @@ END @{
@}
@end example
+The regexp @samp{/[^[:alnum:]_[:blank:]]/} might have been written
+@samp{/[[:punct:]]/}, but then underscores would also be removed,
+and we want to keep them.
+
Assuming we have saved this program in a file named @file{wordfreq.awk},
and that the data is in @file{file1}, the following pipeline:
@@ -24459,6 +24463,7 @@ information. For example, using the following @code{print} statement in the
print data[lines[i]], lines[i]
@end example
+@noindent
This works because @code{data[$0]} is incremented each time a line is
seen.
@c ENDOFRANGE lidu
@@ -24614,13 +24619,7 @@ BEGIN @{ IGNORECASE = 1 @}
@noindent
The variable @code{e} is used so that the rule
-fits nicely on the
-@ifnotinfo
-page.
-@end ifnotinfo
-@ifnottex
-screen.
-@end ifnottex
+fits nicely on the @value{PAGE}.
The second rule handles moving data into files. It verifies that a
file name is given in the directive. If the file named is not the
@@ -24649,10 +24648,13 @@ Each element of @code{a} that is empty indicates two successive @samp{@@}
symbols in the original line. For each two empty elements (@samp{@@@@} in
the original file), we have to add a single @samp{@@} symbol back
in.@footnote{This program was written before @command{gawk} had the
-@code{gensub()} function. Consider how you might use it to simplify the code.}
+@code{gensub()} function.
+@c exercise!!
+Consider how you might use it to simplify the code.}
When the processing of the array is finished, @code{join()} is called with the
-value of @code{SUBSEP}, to rejoin the pieces back into a single
+value of @code{SUBSEP} (@pxref{Multidimensional}),
+to rejoin the pieces back into a single
line. That line is then printed to the output file:
@example
@@ -25177,7 +25179,7 @@ BEGIN @{
@c endfile
@end example
-The stack is initialized with @code{ARGV[1]}, which will be @samp{/dev/stdin}.
+The stack is initialized with @code{ARGV[1]}, which will be @code{"/dev/stdin"}.
The main loop comes next. Input lines are read in succession. Lines that
do not start with @code{@@include} are printed verbatim.
If the line does start with @code{@@include}, the file name is in @code{$2}.
@@ -25287,7 +25289,7 @@ eval gawk $opts -- '"$processed_program"' '"$@@"'
The @command{eval} command is a shell construct that reruns the shell's parsing
process. This keeps things properly quoted.
-This version of @command{igawk} represents my fifth version of this program.
+This version of @command{igawk} represents the fifth version of this program.
There are four key simplifications that make the program work better:
@itemize @bullet
@@ -25497,6 +25499,9 @@ babels beslab
babery yabber
@dots{}
@end example
+
+@c Exercise: Avoid the use of external sort command
+
@c ENDOFRANGE anagram
@node Signature Program
@@ -25528,7 +25533,10 @@ X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O,
O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O@}'
@end example
-We leave it to you to determine what the program does.
+@cindex Johansen, Chris
+We leave it to you to determine what the program does. (If you are
+truly desperate to understand it, see Chris Johansen's explanation,
+which is embedded in the Texinfo source file for this @value{DOCUMENT}.)
@ignore
To: "Arnold Robbins" <arnold@skeeve.com>
@@ -28656,7 +28664,7 @@ partial dump of Davide Brini's obfuscated code
@smallexample
gawk> @kbd{dump}
-@print{} # BEGIN
+@print{} # BEGIN
@print{}
@print{} [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk]
@print{} [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR]
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index af323c1b..599bd098 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -53,6 +53,7 @@
@set SUBSECTION subsection
@set DARKCORNER @inmargin{@image{lflashlight,1cm}, @image{rflashlight,1cm}}
@set COMMONEXT (c.e.)
+@set PAGE page
@end iftex
@ifinfo
@set DOCUMENT Info file
@@ -62,6 +63,7 @@
@set SUBSECTION node
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE screen
@end ifinfo
@ifhtml
@set DOCUMENT Web page
@@ -71,6 +73,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE screen
@end ifhtml
@ifdocbook
@set DOCUMENT book
@@ -80,6 +83,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE page
@end ifdocbook
@ifxml
@set DOCUMENT book
@@ -89,6 +93,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE page
@end ifxml
@ifplaintext
@set DOCUMENT book
@@ -98,6 +103,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE page
@end ifplaintext
@ifdocbook
@@ -3316,19 +3322,10 @@ There are two ways to run @command{awk}---with an explicit program or with
one or more program files. Here are templates for both of them; items
enclosed in [@dots{}] in these templates are optional:
-@ifnotdocbook
-@example
-awk @r{[@var{options}]} -f progfile @r{[@code{--}]} @var{file} @dots{}
-awk @r{[@var{options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
-@end example
-@end ifnotdocbook
-
-@c FIXME - find a better way to mark this up in docbook
-@docbook
-<screen>awk [<replaceable>options</replaceable>] -f progfile [<literal>--</literal>] <replaceable>file</replaceable> &#8230;
-awk [<replaceable>options</replaceable>] [<literal>--</literal>] '<replaceable>program</replaceable>' <replaceable>file</replaceable> &#8230;
-</screen>
-@end docbook
+@display
+@command{awk} [@var{options}] @option{-f} @var{progfile} [@option{--}] @var{file} @dots{}
+@command{awk} [@var{options}] [@option{--}] @code{'@var{program}'} @var{file} @dots{}
+@end display
@cindex GNU long options
@cindex long options
@@ -12328,13 +12325,13 @@ both) may be omitted. The purpose of the @dfn{action} is to tell
@command{awk} what to do once a match for the pattern is found. Thus,
in outline, an @command{awk} program generally looks like this:
-@example
-@r{[}@var{pattern}@r{]} @{ @var{action} @}
- @var{pattern} @r{[}@{ @var{action} @}@r{]}
+@display
+[@var{pattern}] @code{@{ @var{action} @}}
+ @var{pattern} [@code{@{ @var{action} @}}]
@dots{}
-function @var{name}(@var{args}) @{ @dots{} @}
+@code{function @var{name}(@var{args}) @{ @dots{} @}}
@dots{}
-@end example
+@end display
@cindex @code{@{@}} (braces), actions and
@cindex braces (@code{@{@}}), actions and
@@ -12449,9 +12446,9 @@ newlines or semicolons.
The @code{if}-@code{else} statement is @command{awk}'s decision-making
statement. It looks like this:
-@example
-if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]}
-@end example
+@display
+@code{if (@var{condition}) @var{then-body}} [@code{else @var{else-body}}]
+@end display
@noindent
The @var{condition} is an expression that controls what the rest of the
@@ -13049,9 +13046,9 @@ The @code{exit} statement causes @command{awk} to immediately stop
executing the current rule and to stop processing input; any remaining input
is ignored. The @code{exit} statement is written as follows:
-@example
-exit @r{[}@var{return code}@r{]}
-@end example
+@display
+@code{exit} [@var{return code}]
+@end display
@cindex @code{BEGIN} pattern, @code{exit} statement and
@cindex @code{END} pattern, @code{exit} statement and
@@ -17683,12 +17680,12 @@ entire program before starting to execute any of it.
The definition of a function named @var{name} looks like this:
-@example
-function @var{name}(@r{[}@var{parameter-list}@r{]})
-@{
+@display
+@code{function} @var{name}@code{(}[@var{parameter-list}]@code{)}
+@code{@{}
@var{body-of-function}
-@}
-@end example
+@code{@}}
+@end display
@cindex names, functions
@cindex functions, names of
@@ -18210,9 +18207,9 @@ This statement returns control to the calling part of the @command{awk} program.
can also be used to return a value for use in the rest of the @command{awk}
program. It looks like this:
-@example
-return @r{[}@var{expression}@r{]}
-@end example
+@display
+@code{return} [@var{expression}]
+@end display
The @var{expression} part is optional.
Due most likely to an oversight, POSIX does not define what the return
@@ -19521,7 +19518,7 @@ function getlocaltime(time, ret, now, i)
now = systime()
# return date(1)-style output
- ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
+ ret = strftime(PROCINFO["strftime"], now)
# clear out target array
delete time
@@ -19848,10 +19845,12 @@ The @code{rewind()} function also relies on the @code{nextfile} keyword
@cindex readable data files@comma{} checking
@cindex files, skipping
Normally, if you give @command{awk} a data file that isn't readable,
-it stops with a fatal error. There are times when you
-might want to just ignore such files and keep going. You can
-do this by prepending the following program to your @command{awk}
-program:
+it stops with a fatal error. There are times when you might want to
+just ignore such files and keep going.@footnote{The @code{BEGINFILE}
+special pattern (@pxref{BEGINFILE/ENDFILE}) provides an alternative
+mechanism for dealing with files that can't be opened. However, the
+code here provides a portable solution.} You can do this by prepending
+the following program to your @command{awk} program:
@cindex @code{readable.awk} program
@example
@@ -19889,7 +19888,7 @@ skips the file (since it's no longer in the list).
See also @ref{ARGC and ARGV}.
@node Empty Files
-@subsection Checking For Zero-length Files
+@subsection Checking for Zero-length Files
All known @command{awk} implementations silently skip over zero-length files.
This is a by-product of @command{awk}'s implicit
@@ -20362,7 +20361,7 @@ BEGIN @{
# test program
if (_getopt_test) @{
while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, optarg = <%s>\n",
+ printf("c = <%c>, Optarg = <%s>\n",
_go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
@@ -20378,32 +20377,31 @@ result of two sample runs of the test program:
@example
$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x}
-@print{} c = <a>, optarg = <>
-@print{} c = <c>, optarg = <>
-@print{} c = <b>, optarg = <ARG>
+@print{} c = <a>, Optarg = <>
+@print{} c = <c>, Optarg = <>
+@print{} c = <b>, Optarg = <ARG>
@print{} non-option arguments:
@print{} ARGV[3] = <bax>
@print{} ARGV[4] = <-x>
$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc}
-@print{} c = <a>, optarg = <>
+@print{} c = <a>, Optarg = <>
@error{} x -- invalid option
-@print{} c = <?>, optarg = <>
+@print{} c = <?>, Optarg = <>
@print{} non-option arguments:
@print{} ARGV[4] = <xyz>
@print{} ARGV[5] = <abc>
@end example
-In both runs,
-the first @option{--} terminates the arguments to @command{awk}, so that it does
-not try to interpret the @option{-a}, etc., as its own options.
+In both runs, the first @option{--} terminates the arguments to
+@command{awk}, so that it does not try to interpret the @option{-a},
+etc., as its own options.
@quotation NOTE
-After @code{getopt()} is through, it is the responsibility of the user level
-code to
-clear out all the elements of @code{ARGV} from 1 to @code{Optind},
-so that @command{awk} does not try to process the command-line options
-as file names.
+After @code{getopt()} is through, it is the responsibility of the
+user level code to clear out all the elements of @code{ARGV} from 1
+to @code{Optind}, so that @command{awk} does not try to process the
+command-line options as file names.
@end quotation
Several of the sample programs presented in
@@ -20472,7 +20470,7 @@ Following is @command{pwcat}, a C program that ``cats'' the password database:
/*
* pwcat.c
*
- * Generate a printable version of the password database
+ * Generate a printable version of the password database.
*/
@c endfile
@ignore
@@ -20818,7 +20816,7 @@ is as follows:
/*
* grcat.c
*
- * Generate a printable version of the group database
+ * Generate a printable version of the group database.
*/
@c endfile
@ignore
@@ -20905,7 +20903,7 @@ it is usually empty or set to @samp{*}.
@item Group ID Number
The group's numeric group ID number;
-this number must be unique within the file.
+the association of name to number must be unique within the file.
(On some systems it's a C @code{long}, and not an @code{int}. Thus
we cast it to @code{long} for all cases.)
@@ -21041,10 +21039,10 @@ tvpeople:*:101:david,conan,tom,joan
For this reason, @code{_gr_init()} looks to see if a group name or
group ID number is already seen. If it is, then the user names are
-simply concatenated onto the previous list of users. (There is actually a
+simply concatenated onto the previous list of users.@footnote{There is actually a
subtle problem with the code just presented. Suppose that
the first time there were no names. This code adds the names with
-a leading comma. It also doesn't check that there is a @code{$4}.)
+a leading comma. It also doesn't check that there is a @code{$4}.}
Finally, @code{_gr_init()} closes the pipeline to @command{grcat}, restores
@code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0},
@@ -21414,13 +21412,7 @@ function usage( e1, e2)
@noindent
The variables @code{e1} and @code{e2} are used so that the function
-fits nicely on the
-@ifnotinfo
-page.
-@end ifnotinfo
-@ifnottex
-screen.
-@end ifnottex
+fits nicely on the @value{PAGE}.
@cindex @code{BEGIN} pattern, running @command{awk} programs and
@cindex @code{FS} variable, running @command{awk} programs and
@@ -21459,7 +21451,7 @@ BEGIN \
if (FS == " ") # defeat awk semantics
FS = "[ ]"
@} else if (c == "s")
- suppress++
+ suppress = 1
else
usage()
@}
@@ -21672,9 +21664,9 @@ expressions that are almost identical to those available in @command{awk}
(@pxref{Regexp}).
You invoke it as follows:
-@example
-egrep @r{[} @var{options} @r{]} '@var{pattern}' @var{files} @dots{}
-@end example
+@display
+@command{egrep} [@var{options}] @code{'@var{pattern}'} @var{files} @dots{}
+@end display
The @var{pattern} is a regular expression. In typical usage, the regular
expression is quoted to prevent the shell from expanding any of the
@@ -21856,6 +21848,11 @@ function endfile(file)
@c endfile
@end example
+The @code{BEGINFILE} and @code{ENDFILE} special patterns
+(@pxref{BEGINFILE/ENDFILE}) could be used, but then the program would be
+@command{gawk}-specific. Additionally, this example was written before
+@command{gawk} acquired @code{BEGINFILE} and @code{ENDFILE}.
+
The following rule does most of the work of matching lines. The variable
@code{matches} is true if the line matched the pattern. If the user
wants lines that did not match, the sense of @code{matches} is inverted
@@ -21912,9 +21909,7 @@ there are no matches, the exit status is one; otherwise it is zero:
@c file eg/prog/egrep.awk
END \
@{
- if (total == 0)
- exit 1
- exit 0
+ exit (total == 0)
@}
@c endfile
@end example
@@ -21968,7 +21963,7 @@ corresponding user and group names. The output might look like this:
@example
$ @kbd{id}
-@print{} uid=500(arnold) gid=500(arnold) groups=6(disk),7(lp),19(floppy)
+@print{} uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo)
@end example
@cindex @code{PROCINFO} array, and user and group ID numbers
@@ -22004,6 +21999,7 @@ numbers:
# Arnold Robbins, arnold@@skeeve.com, Public Domain
# May 1993
# Revised February 1996
+# Revised May 2014
@c endfile
@end ignore
@@ -22023,34 +22019,26 @@ BEGIN \
printf("uid=%d", uid)
pw = getpwuid(uid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
if (euid != uid) @{
printf(" euid=%d", euid)
pw = getpwuid(euid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
@}
printf(" gid=%d", gid)
pw = getgrgid(gid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
if (egid != gid) @{
printf(" egid=%d", egid)
pw = getgrgid(egid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
@}
for (i = 1; ("group" i) in PROCINFO; i++) @{
@@ -22059,16 +22047,20 @@ BEGIN \
group = PROCINFO["group" i]
printf("%d", group)
pw = getgrgid(group)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
if (("group" (i+1)) in PROCINFO)
printf(",")
@}
print ""
@}
+
+function pr_first_field(str, a)
+@{
+ split(str, a, ":")
+ printf("(%s)", a[1])
+@}
@c endfile
@end example
@@ -22088,9 +22080,13 @@ The loop is also correct if there are @emph{no} supplementary
groups; then the condition is false the first time it's
tested, and the loop body never executes.
+The @code{pr_first_field()} function simply isolates out some
+code that is used repeatedly, making the whole program
+slightly shorter and cleaner.
+
@c exercise!!!
@ignore
-The POSIX version of @command{id} takes arguments that control which
+The POSIX version of @command{id} takes options that control which
information is printed. Modify this version to accept the same
arguments and perform in the same way.
@end ignore
@@ -22110,9 +22106,9 @@ Usage is as follows:@footnote{This is the traditional usage. The
POSIX usage is different, but not relevant for what the program
aims to demonstrate.}
-@example
-split @r{[}-@var{count}@r{]} file @r{[} @var{prefix} @r{]}
-@end example
+@display
+@command{split} [@code{-@var{count}}] [@var{file}] [@var{prefix}]
+@end display
By default,
the output files are named @file{xaa}, @file{xab}, and so on. Each file has
@@ -22146,11 +22142,12 @@ is used as the prefix for the output file names:
#
# Arnold Robbins, arnold@@skeeve.com, Public Domain
# May 1993
+# Revised slightly, May 2014
@c endfile
@end ignore
@c file eg/prog/split.awk
-# usage: split [-num] [file] [outname]
+# usage: split [-count] [file] [outname]
BEGIN @{
outfile = "x" # default
@@ -22159,7 +22156,7 @@ BEGIN @{
usage()
i = 1
- if (ARGV[i] ~ /^-[[:digit:]]+$/) @{
+ if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) @{
count = -ARGV[i]
ARGV[i] = ""
i++
@@ -22231,13 +22228,7 @@ function usage( e)
@noindent
The variable @code{e} is used so that the function
-fits nicely on the
-@ifinfo
-screen.
-@end ifinfo
-@ifnotinfo
-page.
-@end ifnotinfo
+fits nicely on the @value{PAGE}.
This program is a bit sloppy; it relies on @command{awk} to automatically close the last file
instead of doing it in an @code{END} rule.
@@ -22260,9 +22251,9 @@ The @code{tee} program is known as a ``pipe fitting.'' @code{tee} copies
its standard input to its standard output and also duplicates it to the
files named on the command line. Its usage is as follows:
-@example
-tee @r{[}-a@r{]} file @dots{}
-@end example
+@display
+@command{tee} [@option{-a}] @var{file} @dots{}
+@end display
The @option{-a} option tells @code{tee} to append to the named files, instead of
truncating them and starting over.
@@ -22387,9 +22378,9 @@ input, and by default removes duplicate lines. In other words, it only
prints unique lines---hence the name. @command{uniq} has a number of
options. The usage is as follows:
-@example
-uniq @r{[}-udc @r{[}-@var{n}@r{]]} @r{[}+@var{n}@r{]} @r{[} @var{input file} @r{[} @var{output file} @r{]]}
-@end example
+@display
+@command{uniq} [@option{-udc} [@code{-@var{n}}]] [@code{+@var{n}}] [@var{inputfile} [@var{outputfile}]]
+@end display
The options for @command{uniq} are:
@@ -22413,11 +22404,11 @@ by runs of spaces and/or TABs.
Skip @var{n} characters before comparing lines. Any fields specified with
@samp{-@var{n}} are skipped first.
-@item @var{input file}
+@item @var{inputfile}
Data is read from the input file named on the command line, instead of from
the standard input.
-@item @var{output file}
+@item @var{outputfile}
The generated output is sent to the named output file, instead of to the
standard output.
@end table
@@ -22654,9 +22645,9 @@ END @{
The @command{wc} (word count) utility counts lines, words, and characters in
one or more input files. Its usage is as follows:
-@example
-wc @r{[}-lwc@r{]} @r{[} @var{files} @dots{} @r{]}
-@end example
+@display
+@command{wc} [@option{-lwc}] [@var{files} @dots{}]
+@end display
If no files are specified on the command line, @command{wc} reads its standard
input. If there are multiple files, it also prints total counts for all
@@ -23137,19 +23128,18 @@ often used to map uppercase letters into lowercase for further processing:
@end example
@command{tr} requires two lists of characters.@footnote{On some older
-systems,
-including Solaris,
-@command{tr} may require that the lists be written as
-range expressions enclosed in square brackets (@samp{[a-z]}) and quoted,
-to prevent the shell from attempting a file name expansion. This is
-not a feature.} When processing the input, the first character in the
-first list is replaced with the first character in the second list,
-the second character in the first list is replaced with the second
-character in the second list, and so on. If there are more characters
-in the ``from'' list than in the ``to'' list, the last character of the
-``to'' list is used for the remaining characters in the ``from'' list.
-
-Some time ago,
+systems, including Solaris, the system version of @command{tr} may require
+that the lists be written as range expressions enclosed in square brackets
+(@samp{[a-z]}) and quoted, to prevent the shell from attempting a file
+name expansion. This is not a feature.} When processing the input, the
+first character in the first list is replaced with the first character
+in the second list, the second character in the first list is replaced
+with the second character in the second list, and so on. If there are
+more characters in the ``from'' list than in the ``to'' list, the last
+character of the ``to'' list is used for the remaining characters in the
+``from'' list.
+
+Once upon a time,
@c early or mid-1989!
a user proposed that a transliteration function should
be added to @command{gawk}.
@@ -23263,13 +23253,12 @@ BEGIN @{
While it is possible to do character transliteration in a user-level
function, it is not necessarily efficient, and we (the @command{gawk}
authors) started to consider adding a built-in function. However,
-shortly after writing this program, we learned that the System V Release 4
-@command{awk} had added the @code{toupper()} and @code{tolower()} functions
-(@pxref{String Functions}).
-These functions handle the vast majority of the
-cases where character transliteration is necessary, and so we chose to
-simply add those functions to @command{gawk} as well and then leave well
-enough alone.
+shortly after writing this program, we learned that Brian Kernighan
+had added the @code{toupper()} and @code{tolower()} functions to his
+@command{awk} (@pxref{String Functions}). These functions handle the
+vast majority of the cases where character transliteration is necessary,
+and so we chose to simply add those functions to @command{gawk} as well
+and then leave well enough alone.
An obvious improvement to this program would be to set up the
@code{t_ar} array only once, in a @code{BEGIN} rule. However, this
@@ -23302,7 +23291,18 @@ The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that
@command{awk} splits records at blank lines
(@pxref{Records}).
It sets @code{MAXLINES} to 100, since 100 is the maximum number
-of lines on the page (20 * 5 = 100).
+of lines on the page
+@iftex
+(@math{20 @cdot 5 = 100}).
+@end iftex
+@ifnottex
+@ifnotdocbook
+(20 * 5 = 100).
+@end ifnotdocbook
+@end ifnottex
+@docbook
+(20 &sdot; 5 = 100). @c
+@end docbook
Most of the work is done in the @code{printpage()} function.
The label lines are stored sequentially in the @code{line} array. But they
@@ -23414,7 +23414,7 @@ END \
When working with large amounts of text, it can be interesting to know
how often different words appear. For example, an author may overuse
-certain words, in which case she might wish to find synonyms to substitute
+certain words, in which case he or she might wish to find synonyms to substitute
for words that appear too often. This @value{SUBSECTION} develops a
program for counting words and presenting the frequency information
in a useful format.
@@ -23492,6 +23492,10 @@ END @{
@}
@end example
+The regexp @samp{/[^[:alnum:]_[:blank:]]/} might have been written
+@samp{/[[:punct:]]/}, but then underscores would also be removed,
+and we want to keep them.
+
Assuming we have saved this program in a file named @file{wordfreq.awk},
and that the data is in @file{file1}, the following pipeline:
@@ -23603,6 +23607,7 @@ information. For example, using the following @code{print} statement in the
print data[lines[i]], lines[i]
@end example
+@noindent
This works because @code{data[$0]} is incremented each time a line is
seen.
@c ENDOFRANGE lidu
@@ -23758,13 +23763,7 @@ BEGIN @{ IGNORECASE = 1 @}
@noindent
The variable @code{e} is used so that the rule
-fits nicely on the
-@ifnotinfo
-page.
-@end ifnotinfo
-@ifnottex
-screen.
-@end ifnottex
+fits nicely on the @value{PAGE}.
The second rule handles moving data into files. It verifies that a
file name is given in the directive. If the file named is not the
@@ -23793,10 +23792,13 @@ Each element of @code{a} that is empty indicates two successive @samp{@@}
symbols in the original line. For each two empty elements (@samp{@@@@} in
the original file), we have to add a single @samp{@@} symbol back
in.@footnote{This program was written before @command{gawk} had the
-@code{gensub()} function. Consider how you might use it to simplify the code.}
+@code{gensub()} function.
+@c exercise!!
+Consider how you might use it to simplify the code.}
When the processing of the array is finished, @code{join()} is called with the
-value of @code{SUBSEP}, to rejoin the pieces back into a single
+value of @code{SUBSEP} (@pxref{Multidimensional}),
+to rejoin the pieces back into a single
line. That line is then printed to the output file:
@example
@@ -24321,7 +24323,7 @@ BEGIN @{
@c endfile
@end example
-The stack is initialized with @code{ARGV[1]}, which will be @samp{/dev/stdin}.
+The stack is initialized with @code{ARGV[1]}, which will be @code{"/dev/stdin"}.
The main loop comes next. Input lines are read in succession. Lines that
do not start with @code{@@include} are printed verbatim.
If the line does start with @code{@@include}, the file name is in @code{$2}.
@@ -24431,7 +24433,7 @@ eval gawk $opts -- '"$processed_program"' '"$@@"'
The @command{eval} command is a shell construct that reruns the shell's parsing
process. This keeps things properly quoted.
-This version of @command{igawk} represents my fifth version of this program.
+This version of @command{igawk} represents the fifth version of this program.
There are four key simplifications that make the program work better:
@itemize @bullet
@@ -24641,6 +24643,9 @@ babels beslab
babery yabber
@dots{}
@end example
+
+@c Exercise: Avoid the use of external sort command
+
@c ENDOFRANGE anagram
@node Signature Program
@@ -24672,7 +24677,10 @@ X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O,
O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O@}'
@end example
-We leave it to you to determine what the program does.
+@cindex Johansen, Chris
+We leave it to you to determine what the program does. (If you are
+truly desperate to understand it, see Chris Johansen's explanation,
+which is embedded in the Texinfo source file for this @value{DOCUMENT}.)
@ignore
To: "Arnold Robbins" <arnold@skeeve.com>
@@ -27800,7 +27808,7 @@ partial dump of Davide Brini's obfuscated code
@smallexample
gawk> @kbd{dump}
-@print{} # BEGIN
+@print{} # BEGIN
@print{}
@print{} [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk]
@print{} [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR]