aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2014-09-27 22:33:01 +0300
committerArnold D. Robbins <arnold@skeeve.com>2014-09-27 22:33:01 +0300
commit9701514d4ad1152da564ebf6690c514becd4339a (patch)
tree69cf8c9a9991cb4f9fed6fbc2415f0605c52578e
parent6b1b9c16a1b55804df36457de0650414ab3f017d (diff)
parente71e74ac9af232d58e6c672e37ddf7e8737d68b1 (diff)
downloadegawk-9701514d4ad1152da564ebf6690c514becd4339a.tar.gz
egawk-9701514d4ad1152da564ebf6690c514becd4339a.tar.bz2
egawk-9701514d4ad1152da564ebf6690c514becd4339a.zip
Merge branch 'master' into comment
-rw-r--r--ChangeLog31
-rw-r--r--NEWS3
-rw-r--r--awkgram.c18
-rw-r--r--awkgram.y18
-rw-r--r--awklib/eg/lib/ctime.awk3
-rw-r--r--awklib/eg/lib/ftrans.awk2
-rw-r--r--awklib/eg/lib/gettime.awk2
-rw-r--r--awklib/eg/lib/groupawk.in3
-rw-r--r--awklib/eg/lib/noassign.awk2
-rw-r--r--awklib/eg/lib/quicksort.awk2
-rw-r--r--awklib/eg/lib/readable.awk2
-rw-r--r--awklib/eg/lib/strtonum.awk2
-rw-r--r--awklib/eg/misc/arraymax.awk10
-rw-r--r--awklib/eg/misc/findpat.awk13
-rw-r--r--awklib/eg/prog/cut.awk8
-rw-r--r--awklib/eg/prog/egrep.awk7
-rw-r--r--awklib/eg/prog/extract.awk11
-rw-r--r--awklib/eg/prog/id.awk22
-rw-r--r--awklib/eg/prog/split.awk5
-rw-r--r--awklib/eg/prog/uniq.awk5
-rw-r--r--configh.in3
-rwxr-xr-xconfigure13
-rw-r--r--configure.ac6
-rw-r--r--doc/ChangeLog17
-rw-r--r--doc/gawk.info3225
-rw-r--r--doc/gawk.texi1589
-rw-r--r--doc/gawktexi.in1506
-rw-r--r--io.c17
-rw-r--r--pc/ChangeLog4
-rw-r--r--pc/Makefile.tst79
-rw-r--r--pc/config.h6
-rw-r--r--profile.c2
-rw-r--r--test/ChangeLog5
-rw-r--r--test/profile2.ok2
-rw-r--r--test/profile3.ok2
35 files changed, 3480 insertions, 3165 deletions
diff --git a/ChangeLog b/ChangeLog
index d63dcdd5..a128ba22 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,34 @@
+2014-09-27 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awkgram.y (check_for_bad): Bitwise-and the bad character with 0xFF
+ to avoid sign extension into a large integer.
+
+ Unrelated:
+
+ * configure.ac: Add an option to enable locale letters in identifiers.
+ Undocumented and subject to being rescinded at any time in the future.
+ * NEWS: Mention to look at configure --help.
+
+ Unrelated:
+
+ * profile.c (pprint): Use "rule(s)" instead of "block(s)" in the
+ header.
+
+2014-09-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awkgram.y (yylex): Don't check for junk characters inside
+ quoted strings. Caused issues on DJGPP and Solaris.
+
+ Unrelated
+
+ * io.c (devopen): Straighten things out with respect to
+ compatibility with BWK awk.
+
+2014-09-19 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awkgram.y: Further commentary as to the treacherousness
+ of isalnum and isalpha.
+
2014-09-15 Arnold D. Robbins <arnold@skeeve.com>
Finish removing use of isalpha and isalnum.
diff --git a/NEWS b/NEWS
index 19a5bd59..1c1acf07 100644
--- a/NEWS
+++ b/NEWS
@@ -60,6 +60,9 @@ Changes from 4.1.1 to 4.1.2
beside those of the English alphabet in identifiers. This has
been fixed. (isalpha and isalnum are NOT our friends.)
+ If you feel that you must have this misfeature, use `configure --help'
+ to see what option to use when configuring gawk to reenable it.
+
XX. A number of bugs have been fixed. See the ChangeLog.
Changes from 4.1.0 to 4.1.1
diff --git a/awkgram.c b/awkgram.c
index fac2070a..4fe3b04e 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -5225,7 +5225,7 @@ check_bad_char(int c)
}
if (iscntrl(c) && ! isspace(c))
- fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), c);
+ fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), c & 0xFF);
}
/* nextc --- get the next input character */
@@ -5895,7 +5895,11 @@ retry:
case '"':
string:
esc_seen = false;
- while ((c = nextc(true)) != '"') {
+ /*
+ * Allow any kind of junk in quoted string,
+ * so pass false to nextc().
+ */
+ while ((c = nextc(false)) != '"') {
if (c == '\n') {
pushback();
yyerror(_("unterminated string"));
@@ -8261,6 +8265,16 @@ install_builtins(void)
/* is_alpha --- return true if c is an English letter */
+/*
+ * The scene of the murder was grisly to look upon. When the inspector
+ * arrived, the sergeant turned to him and said, "Another programmer stabbed
+ * in the back. He never knew what happened."
+ *
+ * The inspector replied, "Looks like the MO of isalpha, and his even meaner
+ * big brother, isalnum. The Locale brothers." The sergeant merely
+ * shuddered in horror.
+ */
+
bool
is_alpha(int c)
{
diff --git a/awkgram.y b/awkgram.y
index c58d35ac..e7784e9d 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -2887,7 +2887,7 @@ check_bad_char(int c)
}
if (iscntrl(c) && ! isspace(c))
- fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), c);
+ fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), c & 0xFF);
}
/* nextc --- get the next input character */
@@ -3557,7 +3557,11 @@ retry:
case '"':
string:
esc_seen = false;
- while ((c = nextc(true)) != '"') {
+ /*
+ * Allow any kind of junk in quoted string,
+ * so pass false to nextc().
+ */
+ while ((c = nextc(false)) != '"') {
if (c == '\n') {
pushback();
yyerror(_("unterminated string"));
@@ -5923,6 +5927,16 @@ install_builtins(void)
/* is_alpha --- return true if c is an English letter */
+/*
+ * The scene of the murder was grisly to look upon. When the inspector
+ * arrived, the sergeant turned to him and said, "Another programmer stabbed
+ * in the back. He never knew what happened."
+ *
+ * The inspector replied, "Looks like the MO of isalpha, and his even meaner
+ * big brother, isalnum. The Locale brothers." The sergeant merely
+ * shuddered in horror.
+ */
+
bool
is_alpha(int c)
{
diff --git a/awklib/eg/lib/ctime.awk b/awklib/eg/lib/ctime.awk
index ca750370..cea25b7a 100644
--- a/awklib/eg/lib/ctime.awk
+++ b/awklib/eg/lib/ctime.awk
@@ -4,7 +4,8 @@
function ctime(ts, format)
{
- format = PROCINFO["strftime"]
+ format = "%a %b %e %H:%M:%S %Z %Y"
+
if (ts == 0)
ts = systime() # use current time as default
return strftime(format, ts)
diff --git a/awklib/eg/lib/ftrans.awk b/awklib/eg/lib/ftrans.awk
index 1709ac82..2fec27ef 100644
--- a/awklib/eg/lib/ftrans.awk
+++ b/awklib/eg/lib/ftrans.awk
@@ -12,4 +12,4 @@ FNR == 1 {
beginfile(FILENAME)
}
-END { endfile(_filename_) }
+END { endfile(_filename_) }
diff --git a/awklib/eg/lib/gettime.awk b/awklib/eg/lib/gettime.awk
index 3da9c8ab..4cb56330 100644
--- a/awklib/eg/lib/gettime.awk
+++ b/awklib/eg/lib/gettime.awk
@@ -31,7 +31,7 @@ function getlocaltime(time, ret, now, i)
now = systime()
# return date(1)-style output
- ret = strftime(PROCINFO["strftime"], now)
+ ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
# clear out target array
delete time
diff --git a/awklib/eg/lib/groupawk.in b/awklib/eg/lib/groupawk.in
index 9382bce8..54a27f3d 100644
--- a/awklib/eg/lib/groupawk.in
+++ b/awklib/eg/lib/groupawk.in
@@ -38,8 +38,7 @@ function _gr_init( oldfs, oldrs, olddol0, grcat,
n = split($4, a, "[ \t]*,[ \t]*")
for (i = 1; i <= n; i++)
if (a[i] in _gr_groupsbyuser)
- _gr_groupsbyuser[a[i]] = \
- _gr_groupsbyuser[a[i]] " " $1
+ _gr_groupsbyuser[a[i]] = gr_groupsbyuser[a[i]] " " $1
else
_gr_groupsbyuser[a[i]] = $1
diff --git a/awklib/eg/lib/noassign.awk b/awklib/eg/lib/noassign.awk
index 1f750edf..99227b37 100644
--- a/awklib/eg/lib/noassign.awk
+++ b/awklib/eg/lib/noassign.awk
@@ -7,7 +7,7 @@
function disable_assigns(argc, argv, i)
{
for (i = 1; i < argc; i++)
- if (argv[i] ~ /^[[:alpha:]_][[:alnum:]_]*=.*/)
+ if (argv[i] ~ /^[a-zA-Z_][a-zA-Z0-9_]*=.*/)
argv[i] = ("./" argv[i])
}
diff --git a/awklib/eg/lib/quicksort.awk b/awklib/eg/lib/quicksort.awk
index 43357ac6..3ba2d6e3 100644
--- a/awklib/eg/lib/quicksort.awk
+++ b/awklib/eg/lib/quicksort.awk
@@ -26,7 +26,7 @@ function quicksort(data, left, right, less_than, i, last)
# quicksort_swap --- helper function for quicksort, should really be inline
-function quicksort_swap(data, i, j, temp)
+function quicksort_swap(data, i, j, temp)
{
temp = data[i]
data[i] = data[j]
diff --git a/awklib/eg/lib/readable.awk b/awklib/eg/lib/readable.awk
index 6942dcca..37970a82 100644
--- a/awklib/eg/lib/readable.awk
+++ b/awklib/eg/lib/readable.awk
@@ -6,7 +6,7 @@
BEGIN {
for (i = 1; i < ARGC; i++) {
- if (ARGV[i] ~ /^[[:alpha:]_][[:alnum:]_]*=.*/ \
+ if (ARGV[i] ~ /^[a-zA-Z_][a-zA-Z0-9_]*=.*/ \
|| ARGV[i] == "-" || ARGV[i] == "/dev/stdin")
continue # assignment or standard input
else if ((getline junk < ARGV[i]) < 0) # unreadable
diff --git a/awklib/eg/lib/strtonum.awk b/awklib/eg/lib/strtonum.awk
index f82c89c5..cd56a449 100644
--- a/awklib/eg/lib/strtonum.awk
+++ b/awklib/eg/lib/strtonum.awk
@@ -51,7 +51,7 @@ function mystrtonum(str, ret, n, i, k, c)
# a[5] = "123.45"
# a[6] = "1.e3"
# a[7] = "1.32"
-# a[7] = "1.32E2"
+# a[8] = "1.32E2"
#
# for (i = 1; i in a; i++)
# print a[i], strtonum(a[i]), mystrtonum(a[i])
diff --git a/awklib/eg/misc/arraymax.awk b/awklib/eg/misc/arraymax.awk
index 20dd1768..64197f56 100644
--- a/awklib/eg/misc/arraymax.awk
+++ b/awklib/eg/misc/arraymax.awk
@@ -1,10 +1,10 @@
{
- if ($1 > max)
- max = $1
- arr[$1] = $0
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
}
END {
- for (x = 1; x <= max; x++)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ print arr[x]
}
diff --git a/awklib/eg/misc/findpat.awk b/awklib/eg/misc/findpat.awk
index e9bef9ea..9d799434 100644
--- a/awklib/eg/misc/findpat.awk
+++ b/awklib/eg/misc/findpat.awk
@@ -1,10 +1,9 @@
{
- if ($1 == "FIND")
- regex = $2
- else {
- where = match($0, regex)
- if (where != 0)
- print "Match of", regex, "found at",
- where, "in", $0
+ if ($1 == "FIND")
+ regex = $2
+ else {
+ where = match($0, regex)
+ if (where != 0)
+ print "Match of", regex, "found at", where, "in", $0
}
}
diff --git a/awklib/eg/prog/cut.awk b/awklib/eg/prog/cut.awk
index 56e35e71..080279bc 100644
--- a/awklib/eg/prog/cut.awk
+++ b/awklib/eg/prog/cut.awk
@@ -12,12 +12,10 @@
#
# Requires getopt() and join() library functions
-function usage( e1, e2)
+function usage()
{
- e1 = "usage: cut [-f list] [-d c] [-s] [files...]"
- e2 = "usage: cut [-c list] [files...]"
- print e1 > "/dev/stderr"
- print e2 > "/dev/stderr"
+ print("usage: cut [-f list] [-d c] [-s] [files...]") > "/dev/stderr"
+ print("usage: cut [-c list] [files...]") > "/dev/stderr"
exit 1
}
BEGIN {
diff --git a/awklib/eg/prog/egrep.awk b/awklib/eg/prog/egrep.awk
index 094bdea5..a4165a90 100644
--- a/awklib/eg/prog/egrep.awk
+++ b/awklib/eg/prog/egrep.awk
@@ -91,10 +91,9 @@ function endfile(file)
END {
exit (total == 0)
}
-function usage( e)
+function usage()
{
- e = "Usage: egrep [-csvil] [-e pat] [files ...]"
- e = e "\n\tegrep [-csvil] pat [files ...]"
- print e > "/dev/stderr"
+ print("Usage: egrep [-csvil] [-e pat] [files ...]") > "/dev/stderr"
+ print("\n\tegrep [-csvil] pat [files ...]") > "/dev/stderr"
exit 1
}
diff --git a/awklib/eg/prog/extract.awk b/awklib/eg/prog/extract.awk
index 12e30b54..24f40ce5 100644
--- a/awklib/eg/prog/extract.awk
+++ b/awklib/eg/prog/extract.awk
@@ -1,5 +1,4 @@
-# extract.awk --- extract files and run programs
-# from texinfo files
+# extract.awk --- extract files and run programs from texinfo files
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993
@@ -7,8 +6,7 @@
BEGIN { IGNORECASE = 1 }
-/^@c(omment)?[ \t]+system/ \
-{
+/^@c(omment)?[ \t]+system/ {
if (NF < 3) {
e = ("extract: " FILENAME ":" FNR)
e = (e ": badly formed `system' line")
@@ -24,8 +22,7 @@ BEGIN { IGNORECASE = 1 }
print e > "/dev/stderr"
}
}
-/^@c(omment)?[ \t]+file/ \
-{
+/^@c(omment)?[ \t]+file/ {
if (NF != 3) {
e = ("extract: " FILENAME ":" FNR ": badly formed `file' line")
print e > "/dev/stderr"
@@ -66,7 +63,7 @@ BEGIN { IGNORECASE = 1 }
function unexpected_eof()
{
printf("extract: %s:%d: unexpected EOF or error\n",
- FILENAME, FNR) > "/dev/stderr"
+ FILENAME, FNR) > "/dev/stderr"
exit 1
}
diff --git a/awklib/eg/prog/id.awk b/awklib/eg/prog/id.awk
index 992fa57c..b6061f9b 100644
--- a/awklib/eg/prog/id.awk
+++ b/awklib/eg/prog/id.awk
@@ -6,6 +6,7 @@
# May 1993
# Revised February 1996
# Revised May 2014
+# Revised September 2014
# output is:
# uid=12(foo) euid=34(bar) gid=3(baz) \
@@ -19,26 +20,22 @@ BEGIN {
printf("uid=%d", uid)
pw = getpwuid(uid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
if (euid != uid) {
printf(" euid=%d", euid)
pw = getpwuid(euid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
}
printf(" gid=%d", gid)
pw = getgrgid(gid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
if (egid != gid) {
printf(" egid=%d", egid)
pw = getgrgid(egid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
}
for (i = 1; ("group" i) in PROCINFO; i++) {
@@ -47,8 +44,7 @@ BEGIN {
group = PROCINFO["group" i]
printf("%d", group)
pw = getgrgid(group)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
if (("group" (i+1)) in PROCINFO)
printf(",")
}
@@ -58,6 +54,8 @@ BEGIN {
function pr_first_field(str, a)
{
- split(str, a, ":")
- printf("(%s)", a[1])
+ if (str != "") {
+ split(str, a, ":")
+ printf("(%s)", a[1])
+ }
}
diff --git a/awklib/eg/prog/split.awk b/awklib/eg/prog/split.awk
index bcc73ae6..6a7198f6 100644
--- a/awklib/eg/prog/split.awk
+++ b/awklib/eg/prog/split.awk
@@ -50,9 +50,8 @@ BEGIN {
}
print > out
}
-function usage( e)
+function usage()
{
- e = "usage: split [-num] [file] [outname]"
- print e > "/dev/stderr"
+ print("usage: split [-num] [file] [outname]") > "/dev/stderr"
exit 1
}
diff --git a/awklib/eg/prog/uniq.awk b/awklib/eg/prog/uniq.awk
index 2a2cf63e..7dd16099 100644
--- a/awklib/eg/prog/uniq.awk
+++ b/awklib/eg/prog/uniq.awk
@@ -5,10 +5,9 @@
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993
-function usage( e)
+function usage()
{
- e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]"
- print e > "/dev/stderr"
+ print("Usage: uniq [-udc [-n]] [+n] [ in [ out ]]") > "/dev/stderr"
exit 1
}
diff --git a/configh.in b/configh.in
index 1ca2946a..301fa21a 100644
--- a/configh.in
+++ b/configh.in
@@ -320,6 +320,9 @@
/* Define to 1 if the system has the type `_Bool'. */
#undef HAVE__BOOL
+/* enable severe portability problems */
+#undef I_DONT_KNOW_WHAT_IM_DOING
+
/* disable lint checks */
#undef NO_LINT
diff --git a/configure b/configure
index 038e2081..cb2e6ba7 100755
--- a/configure
+++ b/configure
@@ -761,6 +761,7 @@ enable_option_checking
enable_silent_rules
with_whiny_user_strftime
enable_lint
+enable_severe_portability_problems
enable_dependency_tracking
enable_largefile
enable_nls
@@ -1405,6 +1406,7 @@ Optional Features:
--enable-silent-rules less verbose build output (undo: "make V=1")
--disable-silent-rules verbose build output (undo: "make V=0")
--disable-lint Disable gawk lint checking
+ --enable-severe-portability-problems Enable really nasty portability problems
--enable-dependency-tracking
do not reject slow dependency extractors
--disable-dependency-tracking
@@ -3181,6 +3183,17 @@ $as_echo "#define NO_LINT 1" >>confdefs.h
fi
+# Check whether --enable-severe-portability-problems was given.
+if test "${enable_severe_portability_problems+set}" = set; then :
+ enableval=$enable_severe_portability_problems; if test "$enableval" = yes
+ then
+
+$as_echo "#define I_DONT_KNOW_WHAT_IM_DOING 1" >>confdefs.h
+
+ fi
+
+fi
+
# Make sure we can run config.sub.
$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
diff --git a/configure.ac b/configure.ac
index 8b4f188e..6122ee07 100644
--- a/configure.ac
+++ b/configure.ac
@@ -58,6 +58,12 @@ AC_ARG_ENABLE([lint], [ --disable-lint Disable gawk lint checking],
AC_DEFINE(NO_LINT, 1, [disable lint checks])
fi
)
+AC_ARG_ENABLE([severe-portability-problems], [ --enable-severe-portability-problems Enable really nasty portability problems],
+ if test "$enableval" = yes
+ then
+ AC_DEFINE(I_DONT_KNOW_WHAT_IM_DOING, 1, [enable severe portability problems])
+ fi
+)
AC_CANONICAL_HOST
AC_USE_SYSTEM_EXTENSIONS
diff --git a/doc/ChangeLog b/doc/ChangeLog
index b917e642..7693f5e5 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,20 @@
+2014-09-27 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Lots more fixes after reading through the MS.
+
+2014-09-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Rework the documentation of special files in
+ Chapter 5; some reordering as well as rewriting.
+
+2014-09-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktex.in: Continue fixes after reading through the MS.
+
+2014-09-21 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktex.in: Start on fixes after reading through the MS.
+
2014-09-18 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Fix italics in quotations. Some docbook special
diff --git a/doc/gawk.info b/doc/gawk.info
index f1aa1f4a..c41ef683 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -14,7 +14,7 @@ Free Software Foundation, Inc.
This is Edition 4.1 of `GAWK: Effective AWK Programming: A User's
-Guide for GNU Awk', for the 4.1.1 (or later) version of the GNU
+Guide for GNU Awk', for the 4.1.2 (or later) version of the GNU
implementation of AWK.
Permission is granted to copy, distribute and/or modify this document
@@ -42,7 +42,7 @@ Free Software Foundation, Inc.
This is Edition 4.1 of `GAWK: Effective AWK Programming: A User's
-Guide for GNU Awk', for the 4.1.1 (or later) version of the GNU
+Guide for GNU Awk', for the 4.1.2 (or later) version of the GNU
implementation of AWK.
Permission is granted to copy, distribute and/or modify this document
@@ -189,8 +189,8 @@ entitled "GNU Free Documentation License".
* Regexp Field Splitting:: Using regexps as the field separator.
* Single Character Fields:: Making each character a separate
field.
-* Command Line Field Separator:: Setting `FS' from the
- command line.
+* Command Line Field Separator:: Setting `FS' from the command
+ line.
* Full Line Fields:: Making the full line be a single
field.
* Field Splitting Summary:: Some final points and a summary table.
@@ -234,10 +234,12 @@ entitled "GNU Free Documentation License".
* Printf Examples:: Several examples.
* Redirection:: How to redirect output to multiple
files and pipes.
+* Special FD:: Special files for I/O.
* Special Files:: File name interpretation in
`gawk'. `gawk' allows
access to inherited file descriptors.
-* Special FD:: Special files for I/O.
+* Other Inherited Files:: Accessing other open files with
+ `gawk'.
* Special Network:: Special files for network
communications.
* Special Caveats:: Things to watch out for.
@@ -350,12 +352,12 @@ entitled "GNU Free Documentation License".
elements.
* Controlling Scanning:: Controlling the order in which arrays
are scanned.
-* Delete:: The `delete' statement removes an
- element from an array.
* Numeric Array Subscripts:: How to use numbers as subscripts in
`awk'.
* Uninitialized Subscripts:: Using Uninitialized variables as
subscripts.
+* Delete:: The `delete' statement removes an
+ element from an array.
* Multidimensional:: Emulating multidimensional arrays in
`awk'.
* Multiscanning:: Scanning multidimensional arrays.
@@ -693,7 +695,7 @@ on Unix, I found the gray AWK book, a.k.a. Aho, Kernighan and
Weinberger, `The AWK Programming Language', Addison-Wesley, 1988.
AWK's simple programming paradigm--find a pattern in the input and then
perform an action--often reduced complex or tedious data manipulations
-to few lines of code. I was excited to try my hand at programming in
+to a few lines of code. I was excited to try my hand at programming in
AWK.
Alas, the `awk' on my computer was a limited version of the
@@ -779,8 +781,8 @@ with the proper options or environment variables (*note Options::), it
is fully compatible with the POSIX(1) specification of the `awk'
language and with the Unix version of `awk' maintained by Brian
Kernighan. This means that all properly written `awk' programs should
-work with `gawk'. Thus, we usually don't distinguish between `gawk'
-and other `awk' implementations.
+work with `gawk'. So most of the time, we don't distinguish between
+`gawk' and other `awk' implementations.
Using `awk' allows you to:
@@ -803,9 +805,9 @@ and other `awk' implementations.
* Perform simple network communications
- * Profile and debug `awk' programs.
+ * Profile and debug `awk' programs
- * Extend the language with functions written in C or C++.
+ * Extend the language with functions written in C or C++
This Info file teaches you about the `awk' language and how you can
use it effectively. You should already be familiar with basic system
@@ -818,9 +820,8 @@ different computing environments. This Info file, while describing the
of `awk' called `gawk' (which stands for "GNU `awk'"). `gawk' runs on
a broad range of Unix systems, ranging from Intel-architecture PC-based
computers up through large-scale systems. `gawk' has also been ported
-to Mac OS X, Microsoft Windows (all versions) and OS/2 PCs, and OpenVMS.
-(Some other, obsolete systems to which `gawk' was once ported are no
-longer supported and the code for those systems has been removed.)
+to Mac OS X, Microsoft Windows (all versions) and OS/2 PCs, and
+OpenVMS.(3)
* Menu:
@@ -840,11 +841,14 @@ longer supported and the code for those systems has been removed.)
(1) The 2008 POSIX standard is accessible online at
`http://www.opengroup.org/onlinepubs/9699919799/'.
- (2) These commands are available on POSIX-compliant systems, as well
-as on traditional Unix-based systems. If you are using some other
+ (2) These utilities are available on POSIX-compliant systems, as
+well as on traditional Unix-based systems. If you are using some other
operating system, you still need to be familiar with the ideas of I/O
redirection and pipes.
+ (3) Some other, obsolete systems to which `gawk' was once ported are
+no longer supported and the code for those systems has been removed.
+

File: gawk.info, Node: History, Next: Names, Up: Preface
@@ -958,7 +962,7 @@ heading "sidebar."
Most of the time, the examples use complete `awk' programs. Some of
the more advanced sections show only the part of the `awk' program that
-illustrates the concept currently being described.
+illustrates the concept being described.
While this Info file is aimed principally at people who have not been
exposed to `awk', there is a lot of information here that even the `awk'
@@ -1002,7 +1006,8 @@ described, as well as sorting arrays in `gawk'. It also describes how
`gawk' provides arrays of arrays.
*note Functions::, describes the built-in functions `awk' and `gawk'
-provide, as well as how to define your own functions.
+provide, as well as how to define your own functions. It also
+discusses how `gawk' lets you call functions indirectly.
Part II shows how to use `awk' and `gawk' for problem solving.
There is lots of code here for you to read and learn from. It contains
@@ -1457,21 +1462,21 @@ advice (from Douglas Adams's `The Hitchhiker's Guide to the Galaxy'),
to keep you from worrying about the complexities of computer
programming:
- $ awk "BEGIN { print "Don\47t Panic!" }"
+ $ awk 'BEGIN { print "Don\47t Panic!" }'
-| Don't Panic!
`awk' executes statements associated with `BEGIN' before reading any
input. If there are no other statements in your program, as is the
case here, `awk' just stops, instead of trying to read input it doesn't
-know how to process. The `\47' is a magic way of getting a single
-quote into the program, without having to engage in ugly shell quoting
-tricks.
+know how to process. The `\47' is a magic way (explained later) of
+getting a single quote into the program, without having to engage in
+ugly shell quoting tricks.
- NOTE: As a side note, if you use Bash as your shell, you should
- execute the command `set +H' before running this program
- interactively, to disable the C shell-style command history, which
- treats `!' as a special character. We recommend putting this
- command into your personal startup file.
+ NOTE: If you use Bash as your shell, you should execute the
+ command `set +H' before running this program interactively, to
+ disable the C shell-style command history, which treats `!' as a
+ special character. We recommend putting this command into your
+ personal startup file.
This next simple `awk' program emulates the `cat' utility; it copies
whatever you type on the keyboard to its standard output (why this
@@ -1494,9 +1499,9 @@ File: gawk.info, Node: Long, Next: Executable Scripts, Prev: Read Terminal,
1.1.3 Running Long Programs
---------------------------
-Sometimes your `awk' programs can be very long. In this case, it is
-more convenient to put the program into a separate file. In order to
-tell `awk' to use that file for its program, you type:
+Sometimes `awk' programs are very long. In these cases, it is more
+convenient to put the program into a separate file. In order to tell
+`awk' to use that file for its program, you type:
awk -f SOURCE-FILE INPUT-FILE1 INPUT-FILE2 ...
@@ -1512,14 +1517,16 @@ into the file `advice'. Then this command:
does the same thing as this one:
- awk "BEGIN { print \"Don't Panic!\" }"
+ awk 'BEGIN { print "Don\47t Panic!" }'
This was explained earlier (*note Read Terminal::). Note that you
don't usually need single quotes around the file name that you specify
with `-f', because most file names don't contain any of the shell's
special characters. Notice that in `advice', the `awk' program did not
have single quotes around it. The quotes are only needed for programs
-that are provided on the `awk' command line.
+that are provided on the `awk' command line. (Also, placing the
+program in a file allows us to use a literal single quote in the program
+text, instead of the magic `\47'.)
If you want to clearly identify your `awk' program files as such,
you can add the extension `.awk' to the file name. This doesn't affect
@@ -1563,7 +1570,7 @@ program is written in `awk'.
utility reads your program and then processes your data according to
the instructions in your program. (This is different from a "compiled"
language such as C, where your program is first compiled into machine
-code that is executed directly by your system's hardware.) The `awk'
+code that is executed directly by your system's processor.) The `awk'
utility is thus termed an "interpreter". Many modern languages are
interperted.
@@ -1573,8 +1580,8 @@ to pass to that interpreter. The operating system then runs the
interpreter with the given argument and the full argument list of the
executed program. The first argument in the list is the full file name
of the `awk' program. The rest of the argument list contains either
-options to `awk', or data files, or both. Note that on many systems
-`awk' may be found in `/usr/bin' instead of in `/bin'. Caveat Emptor.
+options to `awk', or data files, or both. (Note that on many systems
+`awk' may be found in `/usr/bin' instead of in `/bin'.)
Some systems limit the length of the interpreter name to 32
characters. Often, this can be dealt with by using a symbolic link.
@@ -1714,8 +1721,11 @@ the quoting rules.
the characters `$', ``', `\', and `"', all of which must be
preceded by a backslash within double-quoted text if they are to
be passed on literally to the program. (The leading backslash is
- stripped first.) Thus, the example seen in *note Read Terminal::,
- is applicable:
+ stripped first.) Thus, the example seen in *note Read Terminal:::
+
+ awk 'BEGIN { print "Don\47t Panic!" }'
+
+ could instead be written this way:
$ awk "BEGIN { print \"Don't Panic!\" }"
-| Don't Panic!
@@ -1781,6 +1791,9 @@ this:
$ awk -v sq="'" 'BEGIN { print "Here is a single quote <" sq ">" }'
-| Here is a single quote <'>
+ (Here, the two string constants and the value of `sq' are
+concatenated into a single string which is printed by `print'.)
+
If you really need both single and double quotes in your `awk'
program, it is probably best to move it into a separate file, where the
shell won't be part of the picture, and you can say what you mean.
@@ -1816,12 +1829,12 @@ The second data file, called `inventory-shipped', contains information
about monthly shipments. In both files, each line is considered to be
one "record".
- In the data file `mail-list', each record contains the name of a
-person, his/her phone number, his/her email-address, and a code for
-their relationship with the author of the list. The columns are
-aligned using spaces. An `A' in the last column means that the person
-is an acquaintance. An `F' in the last column means that the person is
-a friend. An `R' means that the person is a relative:
+ In `mail-list', each record contains the name of a person, his/her
+phone number, his/her email-address, and a code for their relationship
+with the author of the list. The columns are aligned using spaces. An
+`A' in the last column means that the person is an acquaintance. An
+`F' in the last column means that the person is a friend. An `R' means
+that the person is a relative:
Amelia 555-5553 amelia.zodiacusque@gmail.com F
Anthony 555-3412 anthony.asserturo@hotmail.com A
@@ -1939,7 +1952,7 @@ different ways to do the same things shown here:
* Print the length of the longest line in `data':
expand data | awk '{ if (x < length($0)) x = length($0) }
- END { print "maximum line length is " x }'
+ END { print "maximum line length is " x }'
This example differs slightly from the previous one: The input is
processed by the `expand' utility to change TABs into spaces, so
@@ -1962,7 +1975,7 @@ different ways to do the same things shown here:
* Print the total number of bytes used by FILES:
ls -l FILES | awk '{ x += $5 }
- END { print "total bytes: " x }'
+ END { print "total bytes: " x }'
* Print the total number of kilobytes used by FILES:
@@ -1991,13 +2004,13 @@ File: gawk.info, Node: Two Rules, Next: More Complex, Prev: Very Simple, Up:
=============================
The `awk' utility reads the input files one line at a time. For each
-line, `awk' tries the patterns of each of the rules. If several
-patterns match, then several actions execute in the order in which they
-appear in the `awk' program. If no patterns match, then no actions run.
+line, `awk' tries the patterns of each rule. If several patterns
+match, then several actions execute in the order in which they appear
+in the `awk' program. If no patterns match, then no actions run.
After processing all the rules that match the line (and perhaps
there are none), `awk' reads the next line. (However, *note Next
-Statement::, and also *note Nextfile Statement::). This continues
+Statement::, and also *note Nextfile Statement::.) This continues
until the program reaches the end of the file. For example, the
following `awk' program contains two rules:
@@ -2061,11 +2074,11 @@ date the file was last modified. Its output looks like this:
The first field contains read-write permissions, the second field
contains the number of links to the file, and the third field
-identifies the owner of the file. The fourth field identifies the group
-of the file. The fifth field contains the size of the file in bytes.
-The sixth, seventh, and eighth fields contain the month, day, and time,
+identifies the file's owner. The fourth field identifies the file's
+group. The fifth field contains the file's size in bytes. The sixth,
+seventh, and eighth fields contain the month, day, and time,
respectively, that the file was last modified. Finally, the ninth field
-contains the file name.(1)
+contains the file name.
The `$6 == "Nov"' in our `awk' program is an expression that tests
whether the sixth field of the output from `ls -l' matches the string
@@ -2087,11 +2100,6 @@ displays your output. By manipulating fields and using `print'
statements, you can produce some very useful and impressive-looking
reports.
- ---------- Footnotes ----------
-
- (1) The `LC_ALL=C' is needed to produce this traditional-style
-output from `ls'.
-

File: gawk.info, Node: Statements/Lines, Next: Other Features, Prev: More Complex, Up: Getting Started
@@ -2388,7 +2396,7 @@ The following list describes options mandated by the POSIX standard:
CAUTION: Using `-v' to set the values of the built-in
variables may lead to surprising results. `awk' will reset
the values of those variables as it needs to, possibly
- ignoring any predefined value you may have given.
+ ignoring any initial value you may have given.
`-W GAWK-OPT'
Provide an implementation-specific option. This is the POSIX
@@ -2439,9 +2447,9 @@ The following list describes options mandated by the POSIX standard:
`-d'[FILE]
`--dump-variables'[`='FILE]
Print a sorted list of global variables, their types, and final
- values to FILE. If no FILE is provided, print this list to the
- file named `awkvars.out' in the current directory. No space is
- allowed between the `-d' and FILE, if FILE is supplied.
+ values to FILE. If no FILE is provided, print this list to a file
+ named `awkvars.out' in the current directory. No space is allowed
+ between the `-d' and FILE, if FILE is supplied.
Having a list of all global variables is a good way to look for
typographical errors in your programs. You would also use this
@@ -2504,7 +2512,7 @@ The following list describes options mandated by the POSIX standard:
`-i' SOURCE-FILE
`--include' SOURCE-FILE
- Read `awk' source library from SOURCE-FILE. This option is
+ Read an `awk' source library from SOURCE-FILE. This option is
completely equivalent to using the `@include' directive inside
your program. This option is very similar to the `-f' option, but
there are two important differences. First, when `-i' is used,
@@ -2525,8 +2533,8 @@ The following list describes options mandated by the POSIX standard:
not be specified in the extension name. The extension
initialization routine should be named `dl_load()'. An
alternative is to use the `@load' keyword inside the program to
- load a shared library. This feature is described in detail in
- *note Dynamic Extensions::.
+ load a shared library. This advanced feature is described in
+ detail in *note Dynamic Extensions::.
`-L'[VALUE]
`--lint'[`='VALUE]
@@ -2562,6 +2570,8 @@ The following list describes options mandated by the POSIX standard:
CAUTION: This option can severely break old programs. Use
with care.
+ This option may disappear in a future version of `gawk'.
+
`-N'
`--use-lc-numeric'
Force the use of the locale's decimal point character when parsing
@@ -2661,8 +2671,9 @@ it is, `awk' reads its program source from all of the named files, as
if they had been concatenated together into one big file. This is
useful for creating libraries of `awk' functions. These functions can
be written once and then retrieved from a standard place, instead of
-having to be included into each individual program. (As mentioned in
-*note Definition Syntax::, function names must be unique.)
+having to be included into each individual program. The `-i' option is
+similar in this regard. (As mentioned in *note Definition Syntax::,
+function names must be unique.)
With standard `awk', library functions can still be used, even if
the program is entered at the keyboard, by specifying `-f /dev/tty'.
@@ -2719,14 +2730,17 @@ Any additional arguments on the command line are normally treated as
input files to be processed in the order specified. However, an
argument that has the form `VAR=VALUE', assigns the value VALUE to the
variable VAR--it does not specify a file at all. (See *note Assignment
-Options::.)
+Options::.) In the following example, COUNT=1 is a variable assignment,
+not a file name:
- All these arguments are made available to your `awk' program in the
-`ARGV' array (*note Built-in Variables::). Command-line options and
-the program text (if present) are omitted from `ARGV'. All other
-arguments, including variable assignments, are included. As each
-element of `ARGV' is processed, `gawk' sets the variable `ARGIND' to
-the index in `ARGV' of the current element.
+ awk -f program.awk file1 count=1 file2
+
+ All the command-line arguments are made available to your `awk'
+program in the `ARGV' array (*note Built-in Variables::). Command-line
+options and the program text (if present) are omitted from `ARGV'. All
+other arguments, including variable assignments, are included. As
+each element of `ARGV' is processed, `gawk' sets the variable `ARGIND'
+to the index in `ARGV' of the current element.
Changing `ARGC' and `ARGV' in your `awk' program lets you control
how `awk' processes the input files; this is described in more detail
@@ -2835,8 +2849,8 @@ variable. If that variable does not exist, `gawk' uses a default path,
The search path feature is particularly helpful for building
libraries of useful `awk' functions. The library files can be placed
in a standard directory in the default path and then specified on the
-command line with a short file name. Otherwise, the full file name
-would have to be typed for each file.
+command line with a short file name. Otherwise, you would have to type
+the full file name for each file.
By using the `-i' option, or the `-e' and `-f' options, your
command-line `awk' programs can use facilities in `awk' library files
@@ -2844,21 +2858,20 @@ command-line `awk' programs can use facilities in `awk' library files
in compatibility mode. This is true for both `--traditional' and
`--posix'. *Note Options::.
- If the source code is not found after the initial search, the path
-is searched again after adding the default `.awk' suffix to the file
-name.
+ If the source code file is not found after the initial search, the
+path is searched again after adding the default `.awk' suffix to the
+file name.
- NOTE: To include the current directory in the path, either place
- `.' explicitly in the path or write a null entry in the path. (A
- null entry is indicated by starting or ending the path with a
- colon or by placing two colons next to each other [`::'].) This
- path search mechanism is similar to the shell's. (See `The
- Bourne-Again SHell manual'.
- (http://www.gnu.org/software/bash/manual/))
+ `gawk''s path search mechanism is similar to the shell's. (See `The
+Bourne-Again SHell manual' (http://www.gnu.org/software/bash/manual/).)
+It treats a null entry in the path as indicating the current directory.
+(A null entry is indicated by starting or ending the path with a colon
+or by placing two colons next to each other [`::'].)
- However, `gawk' always looks in the current directory _before_
- searching `AWKPATH', so there is no real reason to include the
- current directory in the search path.
+ NOTE: `gawk' always looks in the current directory _before_
+ searching `AWKPATH'. Thus, while you can include the current
+ directory in the search path, either explicitly or with a null
+ entry, there is no real reason to do so.
If `AWKPATH' is not defined in the environment, `gawk' places its
default search path into `ENVIRON["AWKPATH"]'. This makes it easy to
@@ -2905,15 +2918,6 @@ A number of other environment variables affect `gawk''s behavior, but
they are more specialized. Those in the following list are meant to be
used by regular users.
-`POSIXLY_CORRECT'
- Causes `gawk' to switch to POSIX compatibility mode, disabling all
- traditional and GNU extensions. *Note Options::.
-
-`GAWK_SOCK_RETRIES'
- Controls the number of times `gawk' attempts to retry a two-way
- TCP/IP (socket) connection before giving up. *Note TCP/IP
- Networking::.
-
`GAWK_MSEC_SLEEP'
Specifies the interval between connection retries, in
milliseconds. On systems that do not support the `usleep()' system
@@ -2923,6 +2927,15 @@ used by regular users.
Specifies the time, in milliseconds, for `gawk' to wait for input
before returning with an error. *Note Read Timeout::.
+`GAWK_SOCK_RETRIES'
+ Controls the number of times `gawk' attempts to retry a two-way
+ TCP/IP (socket) connection before giving up. *Note TCP/IP
+ Networking::.
+
+`POSIXLY_CORRECT'
+ Causes `gawk' to switch to POSIX compatibility mode, disabling all
+ traditional and GNU extensions. *Note Options::.
+
The environment variables in the following list are meant for use by
the `gawk' developers for testing and tuning. They are subject to
change. The variables are:
@@ -2934,7 +2947,7 @@ change. The variables are:
the value should be a number, and `gawk' uses that number as the
size of the buffer to allocate. (When this variable is not set,
`gawk' uses the smaller of the file's size and the "default"
- blocksize, which is usually the filesystems I/O blocksize.)
+ blocksize, which is usually the filesystem's I/O blocksize.)
`AWK_HASH'
If this variable exists with a value of `gst', `gawk' switches to
@@ -2948,11 +2961,11 @@ change. The variables are:
where I/O is performed in records, not in blocks.
`GAWK_MSG_SRC'
- If this variable exists, `gawk' includes the source file name and
- line number from which warning and/or fatal messages are
- generated. Its purpose is to help isolate the source of a
- message, since there can be multiple places which produce the same
- warning or error message.
+ If this variable exists, `gawk' includes the file name and line
+ number within the `gawk' source code from which warning and/or
+ fatal messages are generated. Its purpose is to help isolate the
+ source of a message, since there are multiple places which produce
+ the same warning or error message.
`GAWK_NO_DFA'
If this variable exists, `gawk' does not use the DFA regexp matcher
@@ -3126,7 +3139,8 @@ is useful for embedding inside an `awk' source file that requires
access to an extension.
*note Dynamic Extensions::, describes how to write extensions (in C
-or C++) that can be loaded with either `@load' or the `-l' option.
+or C++) that can be loaded with either `@load' or the `-l' option. It
+also describes the `ordchr' extension.

File: gawk.info, Node: Obsolete, Next: Undocumented, Prev: Loading Shared Libraries, Up: Invoking Gawk
@@ -3177,7 +3191,8 @@ File: gawk.info, Node: Invoking Summary, Prev: Undocumented, Up: Invoking Gaw
affects how `awk' processes input.
* You can use a single minus sign (`-') to refer to standard input
- on the command line.
+ on the command line. `gawk' also lets you use the special file
+ name `/dev/stdin'.
* `gawk' pays attention to a number of environment variables.
`AWKPATH', `AWKLIBPATH', and `POSIXLY_CORRECT' are the most
@@ -3315,9 +3330,9 @@ or newline. While there is nothing to stop you from entering most
unprintable characters directly in a string constant or regexp constant,
they may look ugly.
- The following table lists all the escape sequences used in `awk' and
-what they represent. Unless noted otherwise, all these escape sequences
-apply to both string constants and regexp constants:
+ The following list presents all the escape sequences used in `awk'
+and what they represent. Unless noted otherwise, all these escape
+sequences apply to both string constants and regexp constants:
`\\'
A literal backslash, `\'.
@@ -3391,11 +3406,11 @@ normally be a regexp operator. For example, `/a\+b/' matches the three
characters `a+b'.
For complete portability, do not use a backslash before any
-character not shown in the previous list.
+character not shown in the previous list and that is not an operator.
To summarize:
- * The escape sequences in the table above are always processed first,
+ * The escape sequences in the list above are always processed first,
for both string constants and regexp constants. This happens very
early, as soon as `awk' reads your program.
@@ -3453,7 +3468,7 @@ and converted into corresponding real characters as the very first step
in processing regexps.
Here is a list of metacharacters. All characters that are not escape
-sequences and that are not listed in the table stand for themselves:
+sequences and that are not listed in the following stand for themselves:
`\'
This is used to suppress the special meaning of a character when
@@ -3641,8 +3656,8 @@ matches either `d' or `]'. Additionally, if you place `]' right after
the opening `[', the closing bracket is treated as one of the
characters to be matched.
- This treatment of `\' in bracket expressions is compatible with
-other `awk' implementations and is also mandated by POSIX. The regular
+ The treatment of `\' in bracket expressions is compatible with other
+`awk' implementations and is also mandated by POSIX. The regular
expressions in `awk' are a superset of the POSIX specification for
Extended Regular Expressions (EREs). POSIX EREs are based on the
regular expressions accepted by the traditional `egrep' utility.
@@ -3730,10 +3745,11 @@ Consider the following:
echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
- This example uses the `sub()' function (which we haven't discussed
-yet; *note String Functions::) to make a change to the input record.
-Here, the regexp `/a+/' indicates "one or more `a' characters," and the
-replacement text is `<A>'.
+ This example uses the `sub()' function to make a change to the input
+record. (`sub()' replaces the first instance of any text matched by
+the first argument with the string provided as the second argument;
+*note String Functions::). Here, the regexp `/a+/' indicates "one or
+more `a' characters," and the replacement text is `<A>'.
The input contains four `a' characters. `awk' (and POSIX) regular
expressions always match the leftmost, _longest_ sequence of input
@@ -3809,15 +3825,15 @@ constants," for several reasons:
Using `\n' in Bracket Expressions of Dynamic Regexps
- Some versions of `awk' do not allow the newline character to be used
-inside a bracket expression for a dynamic regexp:
+ Some older versions of `awk' do not allow the newline character to
+be used inside a bracket expression for a dynamic regexp:
$ awk '$0 ~ "[ \t\n]"'
error--> awk: newline in character class [
error--> ]...
error--> source line number 1
error--> context is
- error--> >>> <<<
+ error--> $0 ~ "[ >>> \t\n]" <<<
But a newline in a regexp constant works with no problem:
@@ -4025,10 +4041,6 @@ File: gawk.info, Node: Regexp Summary, Prev: Case-sensitivity, Up: Regexp
Within bracket expressions, POSIX character classes let you specify
certain groups of characters in a locale-independent fashion.
- * `gawk''s `IGNORECASE' variable lets you control the case
- sensitivity of regexp matching. In other `awk' versions, use
- `tolower()' or `toupper()'.
-
* Regular expressions match the leftmost longest text in the string
being matched. This matters for cases where you need to know the
extent of the match, such as for text substitution and when the
@@ -4037,6 +4049,10 @@ File: gawk.info, Node: Regexp Summary, Prev: Case-sensitivity, Up: Regexp
* Matching expressions may use dynamic regexps, that is, string
values treated as regular expressions.
+ * `gawk''s `IGNORECASE' variable lets you control the case
+ sensitivity of regexp matching. In other `awk' versions, use
+ `tolower()' or `toupper()'.
+

File: gawk.info, Node: Reading Files, Next: Printing, Prev: Regexp, Up: Top
@@ -4090,7 +4106,7 @@ File: gawk.info, Node: Records, Next: Fields, Up: Reading Files
`awk' divides the input for your program into records and fields. It
keeps track of the number of records that have been read so far from
the current input file. This value is stored in a built-in variable
-called `FNR' which is reset to zero when a new file is started.
+called `FNR' which is reset to zero every time a new file is started.
Another built-in variable, `NR', records the total number of input
records read so far from all data files. It starts at zero, but is
never automatically reset to zero.
@@ -4198,9 +4214,10 @@ character such as `/' is more likely to produce correct behavior in the
majority of cases, but there are no guarantees. The moral is: Know Your
Data.
- There is one unusual case, that occurs when `gawk' is being fully
-POSIX-compliant (*note Options::). Then, the following (extreme)
-pipeline prints a surprising `1':
+ When using regular characters as the record separator, there is one
+unusual case that occurs when `gawk' is being fully POSIX-compliant
+(*note Options::). Then, the following (extreme) pipeline prints a
+surprising `1':
$ echo | gawk --posix 'BEGIN { RS = "a" } ; { print NF }'
-| 1
@@ -4265,9 +4282,9 @@ trailing whitespace:
-| ]
The square brackets delineate the contents of `RT', letting you see the
-leading and trailing whitespace. The final value of `RT' `RT' is a
-newline. *Note Simple Sed::, for a more useful example of `RS' as a
-regexp and `RT'.
+leading and trailing whitespace. The final value of `RT' is a newline.
+*Note Simple Sed::, for a more useful example of `RS' as a regexp and
+`RT'.
If you set `RS' to a regular expression that allows optional
trailing text, such as `RS = "abc(XYZ)?"' it is possible, due to
@@ -4282,13 +4299,13 @@ that this will never happen.
the beginning and end of a _line_. As a result, something like
`RS = "^[[:upper:]]"' can only match at the beginning of a file.
This is because `gawk' views the input file as one long string
- that happens to contain newline characters in it. It is thus best
- to avoid anchor characters in the value of `RS'.
+ that happens to contain newline characters. It is thus best to
+ avoid anchor characters in the value of `RS'.
The use of `RS' as a regular expression and the `RT' variable are
`gawk' extensions; they are not available in compatibility mode (*note
Options::). In compatibility mode, only the first character of the
-value of `RS' is used to determine the end of the record.
+value of `RS' determines the end of the record.
`RS = "\0"' Is Not Portable
@@ -4317,11 +4334,12 @@ terminator. In effect, this means that `RS = "\0"' is the same as `RS
It happens that recent versions of `mawk' can use the NUL character
as a record separator. However, this is a special case: `mawk' does not
-allow embedded NUL characters in strings.
+allow embedded NUL characters in strings. (This may change in a future
+version of `mawk'.)
- *Note Readfile Function::, for an interesting, portable way to read
-whole files. If you are using `gawk', see *note Extension Sample
-Readfile::, for another option.
+ *Note Readfile Function::, for an interesting way to read whole
+files. If you are using `gawk', see *note Extension Sample Readfile::,
+for another option.
---------- Footnotes ----------
@@ -4378,13 +4396,11 @@ examples:
-| Julie 555-6699 julie.perscrutabor@skeeve.com F
This example prints each record in the file `mail-list' whose first
-field contains the string `li'. The operator `~' is called a "matching
-operator" (*note Regexp Usage::); it tests whether a string (here, the
-field `$1') matches a given regular expression.
+field contains the string `li'.
By contrast, the following example looks for `li' in _the entire
-record_ and prints the first field and the last field for each matching
-input record:
+record_ and prints the first and last fields for each matching input
+record:
$ awk '/li/ { print $1, $NF }' mail-list
-| Amelia F
@@ -4560,12 +4576,12 @@ value six.
value of `NF' and recomputes `$0'. (d.c.) Here is an example:
$ echo a b c d e f | awk '{ print "NF =", NF;
- > NF = 3; print $0 }'
+ > NF = 3; print $0 }'
-| NF = 6
-| a b c
CAUTION: Some versions of `awk' don't rebuild `$0' when `NF' is
- decremented. Caveat emptor.
+ decremented.
Finally, there are times when it is convenient to force `awk' to
rebuild the entire record, using the current value of the fields and
@@ -4590,8 +4606,8 @@ as it was read from the input. This includes any leading or trailing
whitespace, and the exact whitespace (or other characters) that
separate the fields.
- It is a not-uncommon error to try to change the field separators in
-a record simply by setting `FS' and `OFS', and then expecting a plain
+ It is a common error to try to change the field separators in a
+record simply by setting `FS' and `OFS', and then expecting a plain
`print' or `print $0' to print the modified record.
But this does not work, since nothing was done to change the record
@@ -4741,9 +4757,9 @@ play whenever `$0' is recomputed. For instance, study this pipeline:
The first `print' statement prints the record as it was read, with
leading whitespace intact. The assignment to `$2' rebuilds `$0' by
concatenating `$1' through `$NF' together, separated by the value of
-`OFS'. Because the leading whitespace was ignored when finding `$1',
-it is not part of the new `$0'. Finally, the last `print' statement
-prints the new `$0'.
+`OFS' (which is a space by default). Because the leading whitespace
+was ignored when finding `$1', it is not part of the new `$0'.
+Finally, the last `print' statement prints the new `$0'.
There is an additional subtlety to be aware of when using regular
expressions for field splitting. It is not well-specified in the POSIX
@@ -4758,7 +4774,7 @@ beginning of the record. `gawk' also works this way. For example:
$ echo 'xxAA xxBxx C' |
> gawk -F '(^x+)|( +)' '{ for (i = 1; i <= NF; i++)
- > printf "-->%s<--\n", $i }'
+ > printf "-->%s<--\n", $i }'
-| --><--
-| -->AA<--
-| -->xxBxx<--
@@ -4803,10 +4819,7 @@ For example:
sets `FS' to the `,' character. Notice that the option uses an
uppercase `F' instead of a lowercase `f'. The latter option (`-f')
-specifies a file containing an `awk' program. Case is significant in
-command-line options: the `-F' and `-f' options have nothing to do with
-each other. You can use both options at the same time to set the `FS'
-variable _and_ get an `awk' program from a file.
+specifies a file containing an `awk' program.
The value used for the argument to `-F' is processed in exactly the
same way as assignments to the built-in variable `FS'. Any special
@@ -4904,7 +4917,7 @@ occurrences of any two characters." If instead you want fields to be
separated by a literal period followed by any single character, use `FS
= "\\.."'.
- The following table summarizes how fields are split, based on the
+ The following list summarizes how fields are split, based on the
value of `FS' (`==' means "is equal to"):
`FS == " "'
@@ -4924,7 +4937,7 @@ value of `FS' (`==' means "is equal to"):
`FS == ""'
Each individual character in the record becomes a separate field.
- (This is a `gawk' extension; it is not specified by the POSIX
+ (This is a common extension; it is not specified by the POSIX
standard.)
Changing `FS' Does Not Affect the Fields
@@ -5295,7 +5308,7 @@ A simple program to process this file is as follows:
...
*Note Labels Program::, for a more realistic program that deals with
-address lists. The following table summarizes how records are split,
+address lists. The following list summarizes how records are split,
based on the value of `RS'. (`==' means "is equal to.")
`RS == "\n"'
@@ -5319,9 +5332,10 @@ based on the value of `RS'. (`==' means "is equal to.")
records. (This is a `gawk' extension; it is not specified by the
POSIX standard.)
- In all cases, `gawk' sets `RT' to the input text that matched the
-value specified by `RS'. But if the input file ended without any text
-that matches `RS', then `gawk' sets `RT' to the null string.
+ If not in compatibility mode (*note Options::), `gawk' sets `RT' to
+the input text that matched the value specified by `RS'. But if the
+input file ended without any text that matches `RS', then `gawk' sets
+`RT' to the null string.
---------- Footnotes ----------
@@ -5400,9 +5414,7 @@ processing on the next record _right now_. For example:
while (j == 0) {
# get more text
if (getline <= 0) {
- m = "unexpected EOF or error"
- m = (m ": " ERRNO)
- print m > "/dev/stderr"
+ print("unexpected EOF or error:", ERRNO) > "/dev/stderr"
exit
}
# build up the line using string concatenation
@@ -5605,9 +5617,9 @@ the program might produce:
bill ttyp1 Jul 13 14:23 (murphy:0)
bletch
-Notice that this program ran the command `who' and printed the previous
-result. (If you try this program yourself, you will of course get
-different results, depending upon who is logged in on your system.)
+Notice that this program ran the command `who' and printed the result.
+(If you try this program yourself, you will of course get different
+results, depending upon who is logged in on your system.)
This variation of `getline' splits the record into fields, sets the
value of `NF', and recomputes the value of `$0'. The values of `NR'
@@ -5623,10 +5635,10 @@ all `awk' implementations.
NOTE: Unfortunately, `gawk' has not been consistent in its
treatment of a construct like `"echo " "date" | getline'. Most
versions, including the current version, treat it at as `("echo "
- "date") | getline'. (This how BWK `awk' behaves.) Some versions
- changed and treated it as `"echo " ("date" | getline)'. (This is
- how `mawk' behaves.) In short, _always_ use explicit parentheses,
- and then you won't have to worry.
+ "date") | getline'. (This is also how BWK `awk' behaves.) Some
+ versions changed and treated it as `"echo " ("date" | getline)'.
+ (This is how `mawk' behaves.) In short, _always_ use explicit
+ parentheses, and then you won't have to worry.

File: gawk.info, Node: Getline/Variable/Pipe, Next: Getline/Coprocess, Prev: Getline/Pipe, Up: Getline
@@ -5646,7 +5658,7 @@ following program reads the current date and time into the variable
}
In this version of `getline', none of the built-in variables are
-changed and the record is not split into fields.
+changed and the record is not split into fields. However, `RT' is set.
According to POSIX, `EXPRESSION | getline VAR' is ambiguous if
EXPRESSION contains unparenthesized operators other than `$'; for
@@ -5728,7 +5740,7 @@ in mind:
`getline' command causes `awk' to set the value of `FILENAME'.
Normally, `FILENAME' does not have a value inside `BEGIN' rules,
because you have not yet started to process the command-line data
- files. (d.c.) (*Note BEGIN/END::, also *note Auto-set::.)
+ files. (d.c.) (See *note BEGIN/END::; also *note Auto-set::.)
* Using `FILENAME' with `getline' (`getline < FILENAME') is likely
to be a source for confusion. `awk' opens a separate input stream
@@ -5761,7 +5773,7 @@ in mind:
`gawk' treats `getline' like a function call, and evaluates the
expression `a[++c]' before attempting to read from `f'. However,
some versions of `awk' only evaluate the expression once they know
- that there is a string value to be assigned. Caveat Emptor.
+ that there is a string value to be assigned.

File: gawk.info, Node: Getline Summary, Prev: Getline Notes, Up: Getline
@@ -5774,19 +5786,18 @@ File: gawk.info, Node: Getline Summary, Prev: Getline Notes, Up: Getline
whether the variant is standard or a `gawk' extension. Note: for each
variant, `gawk' sets the `RT' built-in variable.
-Variant Effect Standard /
- Extension
+Variant Effect `awk' / `gawk'
-------------------------------------------------------------------------
-`getline' Sets `$0', `NF', `FNR', Standard
+`getline' Sets `$0', `NF', `FNR', `awk'
`NR', and `RT'
-`getline' VAR Sets VAR, `FNR', `NR', and Standard
+`getline' VAR Sets VAR, `FNR', `NR', and `awk'
`RT'
-`getline <' FILE Sets `$0', `NF', and `RT' Standard
-`getline VAR < FILE' Sets VAR and `RT' Standard
-COMMAND `| getline' Sets `$0', `NF', and `RT' Standard
-COMMAND `| getline' VAR Sets VAR and `RT' Standard
-COMMAND `|& getline' Sets `$0', `NF', and `RT' Extension
-COMMAND `|& getline' Sets VAR and `RT' Extension
+`getline <' FILE Sets `$0', `NF', and `RT' `awk'
+`getline VAR < FILE' Sets VAR and `RT' `awk'
+COMMAND `| getline' Sets `$0', `NF', and `RT' `awk'
+COMMAND `| getline' VAR Sets VAR and `RT' `awk'
+COMMAND `|& getline' Sets `$0', `NF', and `RT' `gawk'
+COMMAND `|& getline' Sets VAR and `RT' `gawk'
VAR
Table 4.1: `getline' Variants and What They Set
@@ -5802,7 +5813,7 @@ This minor node describes a feature that is specific to `gawk'.
You may specify a timeout in milliseconds for reading input from the
keyboard, a pipe, or two-way communication, including TCP/IP sockets.
This can be done on a per input, command or connection basis, by
-setting a special element in the `PROCINFO' (*note Auto-set::) array:
+setting a special element in the `PROCINFO' array (*note Auto-set::):
PROCINFO["input_name", "READ_TIMEOUT"] = TIMEOUT IN MILLISECONDS
@@ -5826,9 +5837,9 @@ for more than five seconds:
print $0
`gawk' terminates the read operation if input does not arrive after
-waiting for the timeout period, returns failure and sets the `ERRNO'
-variable to an appropriate string value. A negative or zero value for
-the timeout is the same as specifying no timeout at all.
+waiting for the timeout period, returns failure and sets `ERRNO' to an
+appropriate string value. A negative or zero value for the timeout is
+the same as specifying no timeout at all.
A timeout can also be set for reading from the keyboard in the
implicit loop that reads input records and matches them against
@@ -5926,6 +5937,10 @@ File: gawk.info, Node: Input Summary, Next: Input Exercises, Prev: Command-li
A regexp Text that matches the `gawk'
regexp
+ * `FNR' indicates how many records have been read from the current
+ input file; `NR' indicates how many records have been read in
+ total.
+
* `gawk' sets `RT' to the text matched by `RS'.
* After splitting the input into records, `awk' further splits the
@@ -5943,32 +5958,31 @@ File: gawk.info, Node: Input Summary, Next: Input Exercises, Prev: Command-li
* Field splitting is more complicated than record splitting.
- Field separator value Fields are split ... `awk' /
- `gawk'
+ Field separator value Fields are split ... `awk' /
+ `gawk'
----------------------------------------------------------------------
- `FS == " "' On runs of whitespace `awk'
- `FS == ANY SINGLE On that character `awk'
- CHARACTER'
- `FS == REGEXP' On text matching the `awk'
- regexp
- `FS == ""' Each individual character `gawk'
- is a separate field
- `FIELDWIDTHS == LIST OF Based on character `gawk'
- COLUMNS' position
- `FPAT == REGEXP' On text around text `gawk'
- matching the regexp
-
- Using `FS = "\n"' causes the entire record to be a single field
+ `FS == " "' On runs of whitespace `awk'
+ `FS == ANY SINGLE On that character `awk'
+ CHARACTER'
+ `FS == REGEXP' On text matching the regexp `awk'
+ `FS == ""' Each individual character is `gawk'
+ a separate field
+ `FIELDWIDTHS == LIST OF Based on character position `gawk'
+ COLUMNS'
+ `FPAT == REGEXP' On the text surrounding text `gawk'
+ matching the regexp
+
+ * Using `FS = "\n"' causes the entire record to be a single field
(assuming that newlines separate records).
* `FS' may be set from the command line using the `-F' option. This
can also be done using command-line variable assignment.
- * `PROCINFO["FS"]' can be used to see how fields are being split.
+ * Use `PROCINFO["FS"]' to see how fields are being split.
* Use `getline' in its various forms to read additional records,
from the default input stream, from a file, or from a pipe or
- co-process.
+ coprocess.
* Use `PROCINFO[FILE, "READ_TIMEOUT"]' to cause reads to timeout for
FILE.
@@ -6026,6 +6040,7 @@ function.
* Printf:: The `printf' statement.
* Redirection:: How to redirect output to multiple files and
pipes.
+* Special FD:: Special files for I/O.
* Special Files:: File name interpretation in `gawk'.
`gawk' allows access to inherited file
descriptors.
@@ -6039,10 +6054,10 @@ File: gawk.info, Node: Print, Next: Print Examples, Up: Printing
5.1 The `print' Statement
=========================
-The `print' statement is used for producing output with simple,
-standardized formatting. You specify only the strings or numbers to
-print, in a list separated by commas. They are output, separated by
-single spaces, followed by a newline. The statement looks like this:
+Use the `print' statement to produce output with simple, standardized
+formatting. You specify only the strings or numbers to print, in a
+list separated by commas. They are output, separated by single spaces,
+followed by a newline. The statement looks like this:
print ITEM1, ITEM2, ...
@@ -6057,14 +6072,14 @@ Numeric values are converted to strings and then printed.
The simple statement `print' with no items is equivalent to `print
$0': it prints the entire current record. To print a blank line, use
-`print ""', where `""' is the empty string. To print a fixed piece of
-text, use a string constant, such as `"Don't Panic"', as one item. If
-you forget to use the double-quote characters, your text is taken as an
-`awk' expression, and you will probably get an error. Keep in mind
-that a space is printed between any two items.
+`print ""'. To print a fixed piece of text, use a string constant,
+such as `"Don't Panic"', as one item. If you forget to use the
+double-quote characters, your text is taken as an `awk' expression, and
+you will probably get an error. Keep in mind that a space is printed
+between any two items.
Note that the `print' statement is a statement and not an
-expression--you can't use it the pattern part of a pattern-action
+expression--you can't use it in the pattern part of a PATTERN-ACTION
statement, for example.

@@ -6219,12 +6234,12 @@ to format numbers (or strings), and that there are a number of
different ways in which numbers can be formatted. The different format
specifications are discussed more fully in *note Control Letters::.
- The built-in variable `OFMT' contains the default format
-specification that `print' uses with `sprintf()' when it wants to
-convert a number to a string for printing. The default value of `OFMT'
-is `"%.6g"'. The way `print' prints numbers can be changed by
-supplying different format specifications as the value of `OFMT', as
-shown in the following example:
+ The built-in variable `OFMT' contains the format specification that
+`print' uses with `sprintf()' when it wants to convert a number to a
+string for printing. The default value of `OFMT' is `"%.6g"'. The way
+`print' prints numbers can be changed by supplying a different format
+specification for the value of `OFMT', as shown in the following
+example:
$ awk 'BEGIN {
> OFMT = "%.0f" # print numbers as integers (rounds)
@@ -6246,8 +6261,6 @@ by `print', use `printf'. With `printf' you can specify the width to
use for each item, as well as various formatting choices for numbers
(such as what output base to use, whether to print an exponent, whether
to print a sign, and how many digits to print after the decimal point).
-You do this by supplying a string, called the "format string", that
-controls how and where to print the other arguments.
* Menu:
@@ -6266,10 +6279,10 @@ A simple `printf' statement looks like this:
printf FORMAT, ITEM1, ITEM2, ...
-The entire list of arguments may optionally be enclosed in parentheses.
-The parentheses are necessary if any of the item expressions use the `>'
-relational operator; otherwise, it can be confused with an output
-redirection (*note Redirection::).
+As print `print', the entire list of arguments may optionally be
+enclosed in parentheses. Here too, the parentheses are necessary if any
+of the item expressions use the `>' relational operator; otherwise, it
+can be confused with an output redirection (*note Redirection::).
The difference between `printf' and `print' is the FORMAT argument.
This is an expression whose value is taken as a string; it specifies
@@ -6290,10 +6303,10 @@ statements. For example:
$ awk 'BEGIN {
> ORS = "\nOUCH!\n"; OFS = "+"
- > msg = "Dont Panic!"
+ > msg = "Don\47t Panic!"
> printf "%s\n", msg
> }'
- -| Dont Panic!
+ -| Don't Panic!
Here, neither the `+' nor the `OUCH' appear in the output message.
@@ -6311,9 +6324,9 @@ print. The rest of the format specifier is made up of optional
width. Here is a list of the format-control letters:
`%c'
- Print a number as an ASCII character; thus, `printf "%c", 65'
- outputs the letter `A'. The output for a string value is the first
- character of the string.
+ Print a number as a character; thus, `printf "%c", 65' outputs the
+ letter `A'. The output for a string value is the first character
+ of the string.
NOTE: The POSIX standard says the first character of a string
is printed. In locales with multibyte characters, `gawk'
@@ -6406,7 +6419,7 @@ File: gawk.info, Node: Format Modifiers, Next: Printf Examples, Prev: Control
A format specification can also include "modifiers" that can control
how much of the item's value is printed, as well as how much space it
gets. The modifiers come between the `%' and the format-control letter.
-We will use the bullet symbol "*" in the following examples to represent
+We use the bullet symbol "*" in the following examples to represent
spaces in the output. Here are the possible modifiers, in the order in
which they may appear:
@@ -6426,8 +6439,7 @@ which they may appear:
At first glance, this feature doesn't seem to be of much use. It
is in fact a `gawk' extension, intended for use in translating
messages at runtime. *Note Printf Ordering::, which describes how
- and why to use positional specifiers. For now, we will not use
- them.
+ and why to use positional specifiers. For now, we ignore them.
`-'
The minus sign, used before the width modifier (see later on in
@@ -6457,10 +6469,10 @@ which they may appear:
trailing zeros are not removed from the result.
`0'
- A leading `0' (zero) acts as a flag that indicates that output
- should be padded with zeros instead of spaces. This applies only
- to the numeric output formats. This flag only has an effect when
- the field width is wider than the value to print.
+ A leading `0' (zero) acts as a flag indicating that output should
+ be padded with zeros instead of spaces. This applies only to the
+ numeric output formats. This flag only has an effect when the
+ field width is wider than the value to print.
`''
A single quote or apostrophe character is a POSIX extension to ISO
@@ -6608,14 +6620,14 @@ beginning of the `awk' program:
awk 'BEGIN { print "Name Number"
print "---- ------" }
- { printf "%-10s %s\n", $1, $2 }' mail-list
+ { printf "%-10s %s\n", $1, $2 }' mail-list
The above example mixes `print' and `printf' statements in the same
program. Using just `printf' statements can produce the same results:
awk 'BEGIN { printf "%-10s %s\n", "Name", "Number"
printf "%-10s %s\n", "----", "------" }
- { printf "%-10s %s\n", $1, $2 }' mail-list
+ { printf "%-10s %s\n", $1, $2 }' mail-list
Printing each column heading with the same format specification used
for the column elements ensures that the headings are aligned just like
@@ -6627,10 +6639,10 @@ be emphasized by storing it in a variable, like this:
awk 'BEGIN { format = "%-10s %s\n"
printf format, "Name", "Number"
printf format, "----", "------" }
- { printf format, $1, $2 }' mail-list
+ { printf format, $1, $2 }' mail-list

-File: gawk.info, Node: Redirection, Next: Special Files, Prev: Printf, Up: Printing
+File: gawk.info, Node: Redirection, Next: Special FD, Prev: Printf, Up: Printing
5.6 Redirecting Output of `print' and `printf'
==============================================
@@ -6640,7 +6652,7 @@ output, usually the screen. Both `print' and `printf' can also send
their output to other places. This is called "redirection".
NOTE: When `--sandbox' is specified (*note Options::), redirecting
- output to files and pipes is disabled.
+ output to files, pipes and coprocesses is disabled.
A redirection appears after the `print' or `printf' statement.
Redirections in `awk' are written just like redirections in shell
@@ -6711,16 +6723,10 @@ work identically for `printf':
maintenance:
report = "mail bug-system"
- print "Awk script failed:", $0 | report
- m = ("at record number " FNR " of " FILENAME)
- print m | report
+ print("Awk script failed:", $0) | report
+ print("at record number", FNR, "of", FILENAME) | report
close(report)
- The message is built using string concatenation and saved in the
- variable `m'. It's then sent down the pipeline to the `mail'
- program. (The parentheses group the items to concatenate--see
- *note Concatenation::.)
-
The `close()' function is called here because it's a good idea to
close the pipe as soon as all the intended output has been sent to
it. *Note Close Files And Pipes::, for more information.
@@ -6787,39 +6793,26 @@ The program builds up a list of command lines, using the `mv' utility
to rename the files. It then sends the list to the shell for execution.

-File: gawk.info, Node: Special Files, Next: Close Files And Pipes, Prev: Redirection, Up: Printing
-
-5.7 Special File Names in `gawk'
-================================
-
-`gawk' provides a number of special file names that it interprets
-internally. These file names provide access to standard file
-descriptors and TCP/IP networking.
+File: gawk.info, Node: Special FD, Next: Special Files, Prev: Redirection, Up: Printing
-* Menu:
-
-* Special FD:: Special files for I/O.
-* Special Network:: Special files for network communications.
-* Special Caveats:: Things to watch out for.
-
-
-File: gawk.info, Node: Special FD, Next: Special Network, Up: Special Files
-
-5.7.1 Special Files for Standard Descriptors
---------------------------------------------
+5.7 Special Files for Standard Pre-Opened Data Streams
+======================================================
Running programs conventionally have three input and output streams
already available to them for reading and writing. These are known as
the "standard input", "standard output", and "standard error output".
-These streams are, by default, connected to your keyboard and screen,
-but they are often redirected with the shell, via the `<', `<<', `>',
-`>>', `>&', and `|' operators. Standard error is typically used for
-writing error messages; the reason there are two separate streams,
+These open streams (and any other open file or pipe) are often referred
+to by the technical term "file descriptors".
+
+ These streams are, by default, connected to your keyboard and
+screen, but they are often redirected with the shell, via the `<', `<<',
+`>', `>>', `>&', and `|' operators. Standard error is typically used
+for writing error messages; the reason there are two separate streams,
standard output and standard error, is so that they can be redirected
separately.
- In other implementations of `awk', the only way to write an error
-message to standard error in an `awk' program is as follows:
+ In traditional implementations of `awk', the only way to write an
+error message to standard error in an `awk' program is as follows:
print "Serious error detected!" | "cat 1>&2"
@@ -6834,19 +6827,18 @@ error messages to the screen, like this:
(`/dev/tty' is a special file supplied by the operating system that is
connected to your keyboard and screen. It represents the "terminal,"(1)
which on modern systems is a keyboard and screen, not a serial console.)
-This usually has the same effect but not always: although the standard
-error stream is usually the screen, it can be redirected; when that
-happens, writing to the screen is not correct. In fact, if `awk' is
-run from a background job, it may not have a terminal at all. Then
+This generally has the same effect but not always: although the
+standard error stream is usually the screen, it can be redirected; when
+that happens, writing to the screen is not correct. In fact, if `awk'
+is run from a background job, it may not have a terminal at all. Then
opening `/dev/tty' fails.
- `gawk' provides special file names for accessing the three standard
-streams. (c.e.) It also provides syntax for accessing any other
-inherited open files. If the file name matches one of these special
-names when `gawk' redirects input or output, then it directly uses the
-stream that the file name stands for. These special file names work
-for all operating systems that `gawk' has been ported to, not just
-those that are POSIX-compliant:
+ `gawk', BWK `awk' and `mawk' provide special file names for
+accessing the three standard streams. If the file name matches one of
+these special names when `gawk' (or one of the others) redirects input
+or output, then it directly uses the descriptor that the file name
+stands for. These special file names work for all operating systems
+that `gawk' has been ported to, not just those that are POSIX-compliant:
`/dev/stdin'
The standard input (file descriptor 0).
@@ -6857,16 +6849,8 @@ those that are POSIX-compliant:
`/dev/stderr'
The standard error output (file descriptor 2).
-`/dev/fd/N'
- The file associated with file descriptor N. Such a file must be
- opened by the program initiating the `awk' execution (typically
- the shell). Unless special pains are taken in the shell from which
- `gawk' is invoked, only descriptors 0, 1, and 2 are available.
-
- The file names `/dev/stdin', `/dev/stdout', and `/dev/stderr' are
-aliases for `/dev/fd/0', `/dev/fd/1', and `/dev/fd/2', respectively.
-However, they are more self-explanatory. The proper way to write an
-error message in a `gawk' program is to use `/dev/stderr', like this:
+ With these facilities, the proper way to write an error message then
+becomes:
print "Serious error detected!" > "/dev/stderr"
@@ -6874,21 +6858,60 @@ error message in a `gawk' program is to use `/dev/stderr', like this:
redirection, the value must be a string. It is a common error to omit
the quotes, which leads to confusing results.
- Finally, using the `close()' function on a file name of the form
-`"/dev/fd/N"', for file descriptor numbers above two, does actually
-close the given file descriptor.
-
- The `/dev/stdin', `/dev/stdout', and `/dev/stderr' special files are
-also recognized internally by several other versions of `awk'.
+ `gawk' does not treat these file names as special when in POSIX
+compatibility mode. However, since BWK `awk' supports them, `gawk' does
+support them even when invoked with the `--traditional' option (*note
+Options::).
---------- Footnotes ----------
(1) The "tty" in `/dev/tty' stands for "Teletype," a serial terminal.

-File: gawk.info, Node: Special Network, Next: Special Caveats, Prev: Special FD, Up: Special Files
+File: gawk.info, Node: Special Files, Next: Close Files And Pipes, Prev: Special FD, Up: Printing
+
+5.8 Special File Names in `gawk'
+================================
+
+Besides access to standard input, stanard output, and standard error,
+`gawk' provides access to any open file descriptor. Additionally,
+there are special file names reserved for TCP/IP networking.
+
+* Menu:
+
+* Other Inherited Files:: Accessing other open files with
+ `gawk'.
+* Special Network:: Special files for network communications.
+* Special Caveats:: Things to watch out for.
+
+
+File: gawk.info, Node: Other Inherited Files, Next: Special Network, Up: Special Files
+
+5.8.1 Accessing Other Open Files With `gawk'
+--------------------------------------------
+
+Besides the `/dev/stdin', `/dev/stdout', and `/dev/stderr' special file
+names mentioned earlier, `gawk' provides syntax for accessing any other
+inherited open file:
+
+`/dev/fd/N'
+ The file associated with file descriptor N. Such a file must be
+ opened by the program initiating the `awk' execution (typically
+ the shell). Unless special pains are taken in the shell from which
+ `gawk' is invoked, only descriptors 0, 1, and 2 are available.
+
+ The file names `/dev/stdin', `/dev/stdout', and `/dev/stderr' are
+essentially aliases for `/dev/fd/0', `/dev/fd/1', and `/dev/fd/2',
+respectively. However, those names are more self-explanatory.
+
+ Note that using `close()' on a file name of the form `"/dev/fd/N"',
+for file descriptor numbers above two, does actually close the given
+file descriptor.
+
+
+File: gawk.info, Node: Special Network, Next: Special Caveats, Prev: Other Inherited Files, Up: Special Files
-5.7.2 Special Files for Network Communications
+5.8.2 Special Files for Network Communications
----------------------------------------------
`gawk' programs can open a two-way TCP/IP connection, acting as either
@@ -6908,14 +6931,18 @@ mentioned here only for completeness. Full discussion is delayed until

File: gawk.info, Node: Special Caveats, Prev: Special Network, Up: Special Files
-5.7.3 Special File Name Caveats
+5.8.3 Special File Name Caveats
-------------------------------
-Here is a list of things to bear in mind when using the special file
-names that `gawk' provides:
+Here are some things to bear in mind when using the special file names
+that `gawk' provides:
- * Recognition of these special file names is disabled if `gawk' is in
- compatibility mode (*note Options::).
+ * Recognition of the file names for the three standard pre-opened
+ files is disabled only in POSIX mode.
+
+ * Recognition of the other special file names is disabled if `gawk'
+ is in compatibility mode (either `--traditional' or `--posix';
+ *note Options::).
* `gawk' _always_ interprets these special file names. For example,
using `/dev/fd/4' for output actually writes on file descriptor 4,
@@ -6928,7 +6955,7 @@ names that `gawk' provides:

File: gawk.info, Node: Close Files And Pipes, Next: Output Summary, Prev: Special Files, Up: Printing
-5.8 Closing Input and Output Redirections
+5.9 Closing Input and Output Redirections
=========================================
If the same file name or the same shell command is used with `getline'
@@ -7042,7 +7069,8 @@ addition, `gawk' sets `ERRNO' to a string indicating the error.
Note also that `close(FILENAME)' has no "magic" effects on the
implicit loop that reads through the files named on the command line.
It is, more likely, a close of a file that was never opened with a
-redirection, so `awk' silently does nothing.
+redirection, so `awk' silently does nothing, except return a negative
+value.
When using the `|&' operator to communicate with a coprocess, it is
occasionally useful to be able to close one end of the two-way pipe
@@ -7051,8 +7079,8 @@ to `close()'. As in any other call to `close()', the first argument is
the name of the command or special file used to start the coprocess.
The second argument should be a string, with either of the values
`"to"' or `"from"'. Case does not matter. As this is an advanced
-feature, a more complete discussion is delayed until *note Two-way
-I/O::, which discusses it in more detail and gives an example.
+feature, discussion is delayed until *note Two-way I/O::, which
+describes it in more detail and gives an example.
Using `close()''s Return Value
@@ -7095,8 +7123,8 @@ value.

File: gawk.info, Node: Output Summary, Next: Output Exercises, Prev: Close Files And Pipes, Up: Printing
-5.9 Summary
-===========
+5.10 Summary
+============
* The `print' statement prints comma-separated expressions. Each
expression is separated by the value of `OFS' and terminated by
@@ -7108,20 +7136,20 @@ File: gawk.info, Node: Output Summary, Next: Output Exercises, Prev: Close Fi
flags that modify the behavior of the format control letters.
* Output from both `print' and `printf' may be redirected to files,
- pipes, and co-processes.
+ pipes, and coprocesses.
* `gawk' provides special file names for access to standard input,
output and error, and for network communications.
- * Use `close()' to close open file, pipe and co-process redirections.
- For co-processes, it is possible to close only one direction of the
+ * Use `close()' to close open file, pipe and coprocess redirections.
+ For coprocesses, it is possible to close only one direction of the
communications.

File: gawk.info, Node: Output Exercises, Prev: Output Summary, Up: Printing
-5.10 Exercises
+5.11 Exercises
==============
1. Rewrite the program:
@@ -7351,7 +7379,7 @@ and:
are exactly equivalent. One rather bizarre consequence of this rule is
that the following Boolean expression is valid, but does not do what
-the user probably intended:
+its author probably intended:
# Note that /foo/ is on the left of the ~
if (/foo/ ~ $1) print "found foo"
@@ -7377,9 +7405,10 @@ of the `match()' function, and as the third argument of the `split()'
and `patsplit()' functions (*note String Functions::). Modern
implementations of `awk', including `gawk', allow the third argument of
`split()' to be a regexp constant, but some older implementations do
-not. (d.c.) This can lead to confusion when attempting to use regexp
-constants as arguments to user-defined functions (*note User-defined::).
-For example:
+not. (d.c.) Because some built-in functions accept regexp constants
+as arguments, it can be confusing when attempting to use regexp
+constants as arguments to user-defined functions (*note
+User-defined::). For example:
function mysub(pat, repl, str, global)
{
@@ -7443,7 +7472,7 @@ variable's current value. Variables are given new values with
"assignment operators", "increment operators", and "decrement
operators". *Note Assignment Ops::. In addition, the `sub()' and
`gsub()' functions can change a variable's value, and the `match()',
-`patsplit()' and `split()' functions can change the contents of their
+`split()' and `patsplit()' functions can change the contents of their
array parameters. *Note String Functions::.
A few variables have special built-in meanings, such as `FS' (the
@@ -7458,8 +7487,8 @@ uppercase.
The kind of value a variable holds can change over the life of a
program. By default, variables are initialized to the empty string,
which is zero if converted to a number. There is no need to explicitly
-"initialize" a variable in `awk', which is what you would do in C and
-in most other traditional languages.
+initialize a variable in `awk', which is what you would do in C and in
+most other traditional languages.

File: gawk.info, Node: Assignment Options, Prev: Using Variables, Up: Variables
@@ -7634,7 +7663,7 @@ difference in behavior, on a GNU/Linux system:
The `en_DK.utf-8' locale is for English in Denmark, where the comma
acts as the decimal point separator. In the normal `"C"' locale, `gawk'
-treats `4,321' as `4', while in the Danish locale, it's treated as the
+treats `4,321' as 4, while in the Danish locale, it's treated as the
full number, 4.321.
Some earlier versions of `gawk' fully complied with this aspect of
@@ -8017,8 +8046,7 @@ A workaround is:
awk '/[=]=/' /dev/null
- `gawk' does not have this problem; BWK `awk' and `mawk' also do not
-(*note Other Versions::).
+ `gawk' does not have this problem; BWK `awk' and `mawk' also do not.

File: gawk.info, Node: Increment Ops, Prev: Assignment Ops, Up: All Operators
@@ -8195,9 +8223,9 @@ determine how they are compared. Variable typing follows these rules:
STRING attribute.
* Fields, `getline' input, `FILENAME', `ARGV' elements, `ENVIRON'
- elements, and the elements of an array created by `patsplit()',
- `split()' and `match()' that are numeric strings have the STRNUM
- attribute. Otherwise, they have the STRING attribute.
+ elements, and the elements of an array created by `match()',
+ `split()' and `patsplit()' that are numeric strings have the
+ STRNUM attribute. Otherwise, they have the STRING attribute.
Uninitialized variables also have the STRNUM attribute.
* Attributes propagate across assignments but are not changed by any
@@ -8247,21 +8275,21 @@ In contrast, the eight characters `" +3.14"' appearing in program text
comprise a string constant. The following examples print `1' when the
comparison between the two different constants is true, `0' otherwise:
- $ echo ' +3.14' | gawk '{ print $0 == " +3.14" }' True
+ $ echo ' +3.14' | awk '{ print($0 == " +3.14") }' True
-| 1
- $ echo ' +3.14' | gawk '{ print $0 == "+3.14" }' False
+ $ echo ' +3.14' | awk '{ print($0 == "+3.14") }' False
-| 0
- $ echo ' +3.14' | gawk '{ print $0 == "3.14" }' False
+ $ echo ' +3.14' | awk '{ print($0 == "3.14") }' False
-| 0
- $ echo ' +3.14' | gawk '{ print $0 == 3.14 }' True
+ $ echo ' +3.14' | awk '{ print($0 == 3.14) }' True
-| 1
- $ echo ' +3.14' | gawk '{ print $1 == " +3.14" }' False
+ $ echo ' +3.14' | awk '{ print($1 == " +3.14") }' False
-| 0
- $ echo ' +3.14' | gawk '{ print $1 == "+3.14" }' True
+ $ echo ' +3.14' | awk '{ print($1 == "+3.14") }' True
-| 1
- $ echo ' +3.14' | gawk '{ print $1 == "3.14" }' False
+ $ echo ' +3.14' | awk '{ print($1 == "3.14") }' False
-| 0
- $ echo ' +3.14' | gawk '{ print $1 == 3.14 }' True
+ $ echo ' +3.14' | awk '{ print($1 == 3.14) }' True
-| 1

@@ -8314,8 +8342,9 @@ Unless `b' happens to be zero or the null string, the `if' part of the
test always succeeds. Because the operators are so similar, this kind
of error is very difficult to spot when scanning the source code.
- The following table of expressions illustrates the kind of comparison
-`gawk' performs, as well as what the result of the comparison is:
+ The following list of expressions illustrates the kinds of
+comparisons `awk' performs, as well as what the result of each
+comparison is:
`1.5 <= 2.0'
numeric comparison (true)
@@ -8366,9 +8395,9 @@ regexp constant (`/'...`/') or an ordinary expression. In the latter
case, the value of the expression as a string is used as a dynamic
regexp (*note Regexp Usage::; also *note Computed Regexps::).
- In modern implementations of `awk', a constant regular expression in
-slashes by itself is also an expression. The regexp `/REGEXP/' is an
-abbreviation for the following comparison expression:
+ A constant regular expression in slashes by itself is also an
+expression. The regexp `/REGEXP/' is an abbreviation for the following
+comparison expression:
$0 ~ /REGEXP/
@@ -8384,9 +8413,9 @@ File: gawk.info, Node: POSIX String Comparison, Prev: Comparison Operators, U
The POSIX standard says that string comparison is performed based on
the locale's "collating order". This is the order in which characters
-sort, as defined by the locale (for more discussion, *note Ranges and
-Locales::). This order is usually very different from the results
-obtained when doing straight character-by-character comparison.(1)
+sort, as defined by the locale (for more discussion, *note Locales::).
+This order is usually very different from the results obtained when
+doing straight character-by-character comparison.(1)
Because this behavior differs considerably from existing practice,
`gawk' only implements it when in POSIX mode (*note Options::). Here
@@ -8443,13 +8472,15 @@ Boolean operators are:
`BOOLEAN1 || BOOLEAN2'
True if at least one of BOOLEAN1 or BOOLEAN2 is true. For
example, the following statement prints all records in the input
- that contain _either_ `edu' or `li' or both:
+ that contain _either_ `edu' or `li':
if ($0 ~ /edu/ || $0 ~ /li/) print
The subexpression BOOLEAN2 is evaluated only if BOOLEAN1 is false.
This can make a difference when BOOLEAN2 contains expressions that
- have side effects.
+ have side effects. (Thus, this test never really distinguishes
+ records that contain both `edu' and `li'--as soon as `edu' is
+ matched, the full test succeeds.)
`! BOOLEAN'
True if BOOLEAN is false. For example, the following program
@@ -8457,7 +8488,7 @@ Boolean operators are:
variable is not defined:
BEGIN { if (! ("HOME" in ENVIRON))
- print "no home!" }
+ print "no home!" }
(The `in' operator is described in *note Reference to Elements::.)
@@ -8680,7 +8711,7 @@ violates the precedence rules; for example, `$$0++--' is not a valid
expression because the first `$' has higher precedence than the `++';
to avoid the problem the expression can be rewritten as `$($0++)--'.
- This table presents `awk''s operators, in order of highest to lowest
+ This list presents `awk''s operators, in order of highest to lowest
precedence:
`('...`)'
@@ -8755,8 +8786,8 @@ system about the local character set and language. The ISO C standard
defines a default `"C"' locale, which is an environment that is typical
of what many C programmers are used to.
- Once upon a time, the locale setting used to affect regexp matching
-(*note Ranges and Locales::), but this is no longer true.
+ Once upon a time, the locale setting used to affect regexp matching,
+but this is no longer true (*note Ranges and Locales::).
Locales can affect record splitting. For the normal case of `RS =
"\n"', the locale is largely irrelevant. For other single-character
@@ -8808,10 +8839,11 @@ File: gawk.info, Node: Expressions Summary, Prev: Locales, Up: Expressions
* `awk' provides the usual arithmetic operators (addition,
subtraction, multiplication, division, modulus), and unary plus
and minus. It also provides comparison operators, boolean
- operators, and regexp matching operators. String concatenation is
- accomplished by placing two expressions next to each other; there
- is no explicit operator. The three-operand `?:' operator provides
- an "if-else" test within expressions.
+ operators, array membership testing, and regexp matching
+ operators. String concatenation is accomplished by placing two
+ expressions next to each other; there is no explicit operator.
+ The three-operand `?:' operator provides an "if-else" test within
+ expressions.
* Assignment operators provide convenient shorthands for common
arithmetic operations.
@@ -8819,8 +8851,8 @@ File: gawk.info, Node: Expressions Summary, Prev: Locales, Up: Expressions
* In `awk', a value is considered to be true if it is non-zero _or_
non-null. Otherwise, the value is false.
- * A value's type is set upon each assignment and may change over its
- lifetime. The type determines how it behaves in comparisons
+ * A variable's type is set upon each assignment and may change over
+ its lifetime. The type determines how it behaves in comparisons
(string or numeric).
* Function calls return a value which may be used as part of a larger
@@ -8891,7 +8923,7 @@ summary of the types of `awk' patterns:
number) or non-null (if a string). (*Note Expression Patterns::.)
`BEGPAT, ENDPAT'
- A pair of patterns separated by a comma, specifying a range of
+ A pair of patterns separated by a comma, specifying a "range" of
records. The range includes both the initial record that matches
BEGPAT and the final record that matches ENDPAT. (*Note Ranges::.)
@@ -9102,7 +9134,7 @@ input is read. For example:
$ awk '
> BEGIN { print "Analysis of \"li\"" }
- > /li/ { ++n }
+ > /li/ { ++n }
> END { print "\"li\" appears in", n, "records." }' mail-list
-| Analysis of "li"
-| "li" appears in 4 records.
@@ -9171,9 +9203,10 @@ and `NF' were _undefined_ inside an `END' rule. The POSIX standard
specifies that `NF' is available in an `END' rule. It contains the
number of fields from the last input record. Most probably due to an
oversight, the standard does not say that `$0' is also preserved,
-although logically one would think that it should be. In fact, `gawk'
-does preserve the value of `$0' for use in `END' rules. Be aware,
-however, that BWK `awk', and possibly other implementations, do not.
+although logically one would think that it should be. In fact, all of
+BWK `awk', `mawk', and `gawk' preserve the value of `$0' for use in
+`END' rules. Be aware, however, that some other implementations and
+many older versions of Unix `awk' do not.
The third point follows from the first two. The meaning of `print'
inside a `BEGIN' or `END' rule is the same as always: `print $0'. If
@@ -9242,9 +9275,9 @@ makes it possible to catch and process I/O errors at the level of the
`awk' program.
The `next' statement (*note Next Statement::) is not allowed inside
-either a `BEGINFILE' or and `ENDFILE' rule. The `nextfile' statement
-(*note Nextfile Statement::) is allowed only inside a `BEGINFILE' rule,
-but not inside an `ENDFILE' rule.
+either a `BEGINFILE' or an `ENDFILE' rule. The `nextfile' statement is
+allowed only inside a `BEGINFILE' rule, but not inside an `ENDFILE'
+rule.
The `getline' statement (*note Getline::) is restricted inside both
`BEGINFILE' and `ENDFILE': only redirected forms of `getline' are
@@ -9279,9 +9312,9 @@ hold a pattern that the `awk' program searches for. There are two ways
to get the value of the shell variable into the body of the `awk'
program.
- The most common method is to use shell quoting to substitute the
-variable's value into the program inside the script. For example,
-consider the following program:
+ A common method is to use shell quoting to substitute the variable's
+value into the program inside the script. For example, consider the
+following program:
printf "Enter search pattern: "
read pattern
@@ -9472,18 +9505,18 @@ thing the `while' statement does is test the CONDITION. If the
CONDITION is true, it executes the statement BODY. (The CONDITION is
true when the value is not zero and not a null string.) After BODY has
been executed, CONDITION is tested again, and if it is still true, BODY
-is executed again. This process repeats until the CONDITION is no
-longer true. If the CONDITION is initially false, the body of the loop
-is never executed and `awk' continues with the statement following the
-loop. This example prints the first three fields of each record, one
-per line:
-
- awk '{
- i = 1
- while (i <= 3) {
- print $i
- i++
- }
+executes again. This process repeats until the CONDITION is no longer
+true. If the CONDITION is initially false, the body of the loop never
+executes and `awk' continues with the statement following the loop.
+This example prints the first three fields of each record, one per line:
+
+ awk '
+ {
+ i = 1
+ while (i <= 3) {
+ print $i
+ i++
+ }
}' inventory-shipped
The body of this loop is a compound statement enclosed in braces,
@@ -9514,22 +9547,22 @@ the CONDITION is true. It looks like this:
BODY
while (CONDITION)
- Even if the CONDITION is false at the start, the BODY is executed at
+ Even if the CONDITION is false at the start, the BODY executes at
least once (and only once, unless executing BODY makes CONDITION true).
Contrast this with the corresponding `while' statement:
while (CONDITION)
- BODY
+ BODY
This statement does not execute BODY even once if the CONDITION is
false to begin with. The following is an example of a `do' statement:
{
- i = 1
- do {
- print $0
- i++
- } while (i <= 10)
+ i = 1
+ do {
+ print $0
+ i++
+ } while (i <= 10)
}
This program prints each input record 10 times. However, it isn't a
@@ -9558,9 +9591,10 @@ INCREMENT. Typically, INITIALIZATION sets a variable to either zero or
one, INCREMENT adds one to it, and CONDITION compares it against the
desired number of iterations. For example:
- awk '{
- for (i = 1; i <= 3; i++)
- print $i
+ awk '
+ {
+ for (i = 1; i <= 3; i++)
+ print $i
}' inventory-shipped
This prints the first three fields of each input record, with one field
@@ -9584,7 +9618,7 @@ whatsoever. For example, the following statement prints all the powers
of two between 1 and 100:
for (i = 1; i <= 100; i *= 2)
- print i
+ print i
If there is nothing to be done, any of the three expressions in the
parentheses following the `for' keyword may be omitted. Thus,
@@ -9842,11 +9876,11 @@ rules. *Note BEGINFILE/ENDFILE::.
According to the POSIX standard, the behavior is undefined if the
`next' statement is used in a `BEGIN' or `END' rule. `gawk' treats it
-as a syntax error. Although POSIX permits it, most other `awk'
-implementations don't allow the `next' statement inside function bodies
-(*note User-defined::). Just as with any other `next' statement, a
-`next' statement inside a function body reads the next record and
-starts processing it with the first rule in the program.
+as a syntax error. Although POSIX does not disallow it, most other
+`awk' implementations don't allow the `next' statement inside function
+bodies (*note User-defined::). Just as with any other `next'
+statement, a `next' statement inside a function body reads the next
+record and starts processing it with the first rule in the program.

File: gawk.info, Node: Nextfile Statement, Next: Exit Statement, Prev: Next Statement, Up: Statements
@@ -9890,17 +9924,17 @@ files, pipes, and coprocesses that are opened with redirections. It is
not related to the main processing that `awk' does with the files
listed in `ARGV'.
- NOTE: For many years, `nextfile' was a `gawk' extension. As of
+ NOTE: For many years, `nextfile' was a common extension. In
September, 2012, it was accepted for inclusion into the POSIX
standard. See the Austin Group website
(http://austingroupbugs.net/view.php?id=607).
- The current version of BWK `awk', and `mawk' (*note Other
-Versions::) also support `nextfile'. However, they don't allow the
-`nextfile' statement inside function bodies (*note User-defined::).
-`gawk' does; a `nextfile' inside a function body reads the next record
-and starts processing it with the first rule in the program, just as
-any other `nextfile' statement.
+ The current version of BWK `awk', and `mawk' also support
+`nextfile'. However, they don't allow the `nextfile' statement inside
+function bodies (*note User-defined::). `gawk' does; a `nextfile'
+inside a function body reads the next record and starts processing it
+with the first rule in the program, just as any other `nextfile'
+statement.

File: gawk.info, Node: Exit Statement, Prev: Nextfile Statement, Up: Statements
@@ -9924,8 +9958,8 @@ stop immediately.
An `exit' statement that is not part of a `BEGIN' or `END' rule
stops the execution of any further automatic rules for the current
record, skips reading any remaining input records, and executes the
-`END' rule if there is one. Any `ENDFILE' rules are also skipped; they
-are not executed.
+`END' rule if there is one. `gawk' also skips any `ENDFILE' rules;
+they do not execute.
In such a case, if you don't want the `END' rule to do its job, set
a variable to nonzero before the `exit' statement and check that
@@ -10012,7 +10046,7 @@ description of each variable.)
use binary I/O. Any other string value is treated the same as
`"rw"', but causes `gawk' to generate a warning message.
`BINMODE' is described in more detail in *note PC Using::. `mawk'
- *note Other Versions::), also supports this variable, but only
+ (*note Other Versions::), also supports this variable, but only
using numeric values.
``CONVFMT''
@@ -10095,9 +10129,8 @@ description of each variable.)
printing with the `print' statement. It works by being passed as
the first argument to the `sprintf()' function (*note String
Functions::). Its default value is `"%.6g"'. Earlier versions of
- `awk' also used `OFMT' to specify the format for converting
- numbers to strings in general expressions; this is now done by
- `CONVFMT'.
+ `awk' used `OFMT' to specify the format for converting numbers to
+ strings in general expressions; this is now done by `CONVFMT'.
`OFS'
This is the output field separator (*note Output Separators::).
@@ -10206,8 +10239,8 @@ Options::), they are not special.
the command line.
While you can change the value of `ARGIND' within your `awk'
- program, `gawk' automatically sets it to a new value when the next
- file is opened.
+ program, `gawk' automatically sets it to a new value when it opens
+ the next file.
`ENVIRON'
An associative array containing the values of the environment.
@@ -10257,9 +10290,9 @@ Options::), they are not special.
Getline::) inside a `BEGIN' rule can give `FILENAME' a value.
`FNR'
- The current record number in the current file. `FNR' is
- incremented each time a new record is read (*note Records::). It
- is reinitialized to zero each time a new input file is started.
+ The current record number in the current file. `awk' increments
+ `FNR' each time it reads a new record (*note Records::). `awk'
+ resets `FNR' to zero each time it starts a new input file.
`NF'
The number of fields in the current input record. `NF' is set
@@ -10283,8 +10316,8 @@ Options::), they are not special.
`NR'
The number of input records `awk' has processed since the
- beginning of the program's execution (*note Records::). `NR' is
- incremented each time a new record is read.
+ beginning of the program's execution (*note Records::). `awk'
+ increments `NR' each time it reads a new record.
`PROCINFO #'
The elements of this array provide access to information about the
@@ -10349,7 +10382,7 @@ Options::), they are not special.
`PROCINFO["sorted_in"]'
If this element exists in `PROCINFO', its value controls the
- order in which array indices will be processed by `for (INDEX
+ order in which array indices will be processed by `for (INDX
in ARRAY)' loops. Since this is an advanced feature, we
defer the full description until later; see *note Scanning an
Array::.
@@ -10367,7 +10400,7 @@ Options::), they are not special.
The following additional elements in the array are available to
provide information about the MPFR and GMP libraries if your
- version of `gawk' supports arbitrary precision numbers (*note
+ version of `gawk' supports arbitrary precision arithmetic (*note
Arbitrary Precision Arithmetic::):
`PROCINFO["mpfr_version"]'
@@ -10400,14 +10433,14 @@ Options::), they are not special.
The `PROCINFO' array has the following additional uses:
- * It may be used to cause coprocesses to communicate over
- pseudo-ttys instead of through two-way pipes; this is
- discussed further in *note Two-way I/O::.
-
* It may be used to provide a timeout when reading from any
open input file, pipe, or coprocess. *Note Read Timeout::,
for more information.
+ * It may be used to cause coprocesses to communicate over
+ pseudo-ttys instead of through two-way pipes; this is
+ discussed further in *note Two-way I/O::.
+
`RLENGTH'
The length of the substring matched by the `match()' function
(*note String Functions::). `RLENGTH' is set by invoking the
@@ -10596,6 +10629,12 @@ Because `-q' is not a valid `gawk' option, it and the following `-v'
are passed on to the `awk' program. (*Note Getopt Function::, for an
`awk' library function that parses command-line options.)
+ When designing your program, you should choose options that don't
+conflict with `gawk''s, since it will process any options that it
+accepts before passing the rest of the command line on to your program.
+Using `#!' with the `-E' option may help (*note Executable Scripts::,
+and *note Options::).
+

File: gawk.info, Node: Pattern Action Summary, Prev: Built-in Variables, Up: Patterns and Actions
@@ -10625,8 +10664,8 @@ File: gawk.info, Node: Pattern Action Summary, Prev: Built-in Variables, Up:
* The control statements in `awk' are `if'-`else', `while', `for',
and `do'-`while'. `gawk' adds the `switch' statement. There are
- two flavors of `for' statement: one for for performing general
- looping, and the other iterating through an array.
+ two flavors of `for' statement: one for performing general
+ looping, and the other for iterating through an array.
* `break' and `continue' let you exit early or start the next
iteration of a loop (or get out of a `switch').
@@ -10638,12 +10677,16 @@ File: gawk.info, Node: Pattern Action Summary, Prev: Built-in Variables, Up:
* The `exit' statement terminates your program. When executed from
an action (or function body) it transfers control to the `END'
statements. From an `END' statement body, it exits immediately.
- You may pass an optional numeric value to be used at `awk''s exit
+ You may pass an optional numeric value to be used as `awk''s exit
status.
* Some built-in variables provide control over `awk', mainly for I/O.
Other variables convey information from `awk' to your program.
+ * `ARGC' and `ARGV' make the command-line arguments available to
+ your program. Manipulating them from a `BEGIN' rule lets you
+ control how `awk' will process the provided data files.
+

File: gawk.info, Node: Arrays, Next: Functions, Prev: Patterns and Actions, Up: Top
@@ -10663,26 +10706,21 @@ about array usage. The major node moves on to discuss `gawk''s facility
for sorting arrays, and ends with a brief description of `gawk''s
ability to support true arrays of arrays.
- `awk' maintains a single set of names that may be used for naming
-variables, arrays, and functions (*note User-defined::). Thus, you
-cannot have a variable and an array with the same name in the same
-`awk' program.
-
* Menu:
* Array Basics:: The basics of arrays.
-* Delete:: The `delete' statement removes an element
- from an array.
* Numeric Array Subscripts:: How to use numbers as subscripts in
`awk'.
* Uninitialized Subscripts:: Using Uninitialized variables as subscripts.
+* Delete:: The `delete' statement removes an element
+ from an array.
* Multidimensional:: Emulating multidimensional arrays in
`awk'.
* Arrays of Arrays:: True multidimensional arrays.
* Arrays Summary:: Summary of arrays.

-File: gawk.info, Node: Array Basics, Next: Delete, Up: Arrays
+File: gawk.info, Node: Array Basics, Next: Numeric Array Subscripts, Up: Arrays
8.1 The Basics of Arrays
========================
@@ -10901,14 +10939,14 @@ encountering repeated numbers, gaps, or lines that don't begin with a
number:
{
- if ($1 > max)
- max = $1
- arr[$1] = $0
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
}
END {
- for (x = 1; x <= max; x++)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ print arr[x]
}
The first rule keeps track of the largest line number seen so far;
@@ -10936,9 +10974,9 @@ overrides the others. Gaps in the line numbers can be handled with an
easy improvement to the program's `END' rule, as follows:
END {
- for (x = 1; x <= max; x++)
- if (x in arr)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ if (x in arr)
+ print arr[x]
}

@@ -10956,7 +10994,7 @@ lowest index up to the highest. This technique won't do the job in
has a special kind of `for' statement for scanning an array:
for (VAR in ARRAY)
- BODY
+ BODY
This loop executes BODY once for each index in ARRAY that the program
has previously used, with the variable VAR set to that index.
@@ -11013,7 +11051,7 @@ all `awk' versions do so. Consider this program, named `loopcheck.awk':
}
}
- Here is what happens when run with `gawk':
+ Here is what happens when run with `gawk' (and `mawk'):
$ gawk -f loopcheck.awk
-| here
@@ -11116,7 +11154,8 @@ available:
to run. Changing `PROCINFO["sorted_in"]' in the loop body does not
affect the loop. For example:
- $ gawk 'BEGIN {
+ $ gawk '
+ > BEGIN {
> a[4] = 4
> a[3] = 3
> for (i in a)
@@ -11124,7 +11163,8 @@ affect the loop. For example:
> }'
-| 4 4
-| 3 3
- $ gawk 'BEGIN {
+ $ gawk '
+ > BEGIN {
> PROCINFO["sorted_in"] = "@ind_str_asc"
> a[4] = 4
> a[3] = 3
@@ -11176,87 +11216,9 @@ ordering when the numeric values are equal ensures that `gawk' behaves
consistently across different environments.

-File: gawk.info, Node: Delete, Next: Numeric Array Subscripts, Prev: Array Basics, Up: Arrays
-
-8.2 The `delete' Statement
-==========================
+File: gawk.info, Node: Numeric Array Subscripts, Next: Uninitialized Subscripts, Prev: Array Basics, Up: Arrays
-To remove an individual element of an array, use the `delete' statement:
-
- delete ARRAY[INDEX-EXPRESSION]
-
- Once an array element has been deleted, any value the element once
-had is no longer available. It is as if the element had never been
-referred to or been given a value. The following is an example of
-deleting elements in an array:
-
- for (i in frequencies)
- delete frequencies[i]
-
-This example removes all the elements from the array `frequencies'.
-Once an element is deleted, a subsequent `for' statement to scan the
-array does not report that element and the `in' operator to check for
-the presence of that element returns zero (i.e., false):
-
- delete foo[4]
- if (4 in foo)
- print "This will never be printed"
-
- It is important to note that deleting an element is _not_ the same
-as assigning it a null value (the empty string, `""'). For example:
-
- foo[4] = ""
- if (4 in foo)
- print "This is printed, even though foo[4] is empty"
-
- It is not an error to delete an element that does not exist.
-However, if `--lint' is provided on the command line (*note Options::),
-`gawk' issues a warning message when an element that is not in the
-array is deleted.
-
- All the elements of an array may be deleted with a single statement
-by leaving off the subscript in the `delete' statement, as follows:
-
- delete ARRAY
-
- Using this version of the `delete' statement is about three times
-more efficient than the equivalent loop that deletes each element one
-at a time.
-
- NOTE: For many years, using `delete' without a subscript was a
- `gawk' extension. As of September, 2012, it was accepted for
- inclusion into the POSIX standard. See the Austin Group website
- (http://austingroupbugs.net/view.php?id=544). This form of the
- `delete' statement is also supported by BWK `awk' and `mawk', as
- well as by a number of other implementations (*note Other
- Versions::).
-
- The following statement provides a portable but nonobvious way to
-clear out an array:(1)
-
- split("", array)
-
- The `split()' function (*note String Functions::) clears out the
-target array first. This call asks it to split apart the null string.
-Because there is no data to split out, the function simply clears the
-array and then returns.
-
- CAUTION: Deleting an array does not change its type; you cannot
- delete an array and then use the array's name as a scalar (i.e., a
- regular variable). For example, the following does not work:
-
- a[1] = 3
- delete a
- a = 3
-
- ---------- Footnotes ----------
-
- (1) Thanks to Michael Brennan for pointing this out.
-
-
-File: gawk.info, Node: Numeric Array Subscripts, Next: Uninitialized Subscripts, Prev: Delete, Up: Arrays
-
-8.3 Using Numbers to Subscript Arrays
+8.2 Using Numbers to Subscript Arrays
=====================================
An important aspect to remember about arrays is that _array subscripts
@@ -11285,9 +11247,9 @@ two significant digits. This test fails, since `"12.15"' is different
from `"12.153"'.
According to the rules for conversions (*note Conversion::), integer
-values are always converted to strings as integers, no matter what the
-value of `CONVFMT' may happen to be. So the usual case of the
-following works:
+values always convert to strings as integers, no matter what the value
+of `CONVFMT' may happen to be. So the usual case of the following
+works:
for (i = 1; i <= maxsub; i++)
do something with array[i]
@@ -11300,14 +11262,14 @@ example, that `array[17]', `array[021]', and `array[0x11]' all refer to
the same element!
As with many things in `awk', the majority of the time things work
-as one would expect them to. But it is useful to have a precise
+as you would expect them to. But it is useful to have a precise
knowledge of the actual rules since they can sometimes have a subtle
effect on your programs.

-File: gawk.info, Node: Uninitialized Subscripts, Next: Multidimensional, Prev: Numeric Array Subscripts, Up: Arrays
+File: gawk.info, Node: Uninitialized Subscripts, Next: Delete, Prev: Numeric Array Subscripts, Up: Arrays
-8.4 Using Uninitialized Variables as Subscripts
+8.3 Using Uninitialized Variables as Subscripts
===============================================
Suppose it's necessary to write a program to print the input data in
@@ -11353,7 +11315,86 @@ string as a subscript if `--lint' is provided on the command line
(*note Options::).

-File: gawk.info, Node: Multidimensional, Next: Arrays of Arrays, Prev: Uninitialized Subscripts, Up: Arrays
+File: gawk.info, Node: Delete, Next: Multidimensional, Prev: Uninitialized Subscripts, Up: Arrays
+
+8.4 The `delete' Statement
+==========================
+
+To remove an individual element of an array, use the `delete' statement:
+
+ delete ARRAY[INDEX-EXPRESSION]
+
+ Once an array element has been deleted, any value the element once
+had is no longer available. It is as if the element had never been
+referred to or been given a value. The following is an example of
+deleting elements in an array:
+
+ for (i in frequencies)
+ delete frequencies[i]
+
+This example removes all the elements from the array `frequencies'.
+Once an element is deleted, a subsequent `for' statement to scan the
+array does not report that element and the `in' operator to check for
+the presence of that element returns zero (i.e., false):
+
+ delete foo[4]
+ if (4 in foo)
+ print "This will never be printed"
+
+ It is important to note that deleting an element is _not_ the same
+as assigning it a null value (the empty string, `""'). For example:
+
+ foo[4] = ""
+ if (4 in foo)
+ print "This is printed, even though foo[4] is empty"
+
+ It is not an error to delete an element that does not exist.
+However, if `--lint' is provided on the command line (*note Options::),
+`gawk' issues a warning message when an element that is not in the
+array is deleted.
+
+ All the elements of an array may be deleted with a single statement
+by leaving off the subscript in the `delete' statement, as follows:
+
+ delete ARRAY
+
+ Using this version of the `delete' statement is about three times
+more efficient than the equivalent loop that deletes each element one
+at a time.
+
+ This form of the `delete' statement is also supported by BWK `awk'
+and `mawk', as well as by a number of other implementations.
+
+ NOTE: For many years, using `delete' without a subscript was a
+ common extension. In September, 2012, it was accepted for
+ inclusion into the POSIX standard. See the Austin Group website
+ (http://austingroupbugs.net/view.php?id=544).
+
+ The following statement provides a portable but nonobvious way to
+clear out an array:(1)
+
+ split("", array)
+
+ The `split()' function (*note String Functions::) clears out the
+target array first. This call asks it to split apart the null string.
+Because there is no data to split out, the function simply clears the
+array and then returns.
+
+ CAUTION: Deleting all the elements from an array does not change
+ its type; you cannot clear an array and then use the array's name
+ as a scalar (i.e., a regular variable). For example, the following
+ does not work:
+
+ a[1] = 3
+ delete a
+ a = 3
+
+ ---------- Footnotes ----------
+
+ (1) Thanks to Michael Brennan for pointing this out.
+
+
+File: gawk.info, Node: Multidimensional, Next: Arrays of Arrays, Prev: Delete, Up: Arrays
8.5 Multidimensional Arrays
===========================
@@ -11365,7 +11406,7 @@ File: gawk.info, Node: Multidimensional, Next: Arrays of Arrays, Prev: Uninit
A multidimensional array is an array in which an element is
identified by a sequence of indices instead of a single index. For
example, a two-dimensional array requires two indices. The usual way
-(in most languages, including `awk') to refer to an element of a
+(in many languages, including `awk') to refer to an element of a
two-dimensional array named `grid' is with `grid[X,Y]'.
Multidimensional arrays are supported in `awk' through concatenation
@@ -11506,8 +11547,9 @@ multidimensional subscript). So the following is valid in `gawk':
Each subarray and the main array can be of different length. In
fact, the elements of an array or its subarray do not all have to have
the same type. This means that the main array and any of its subarrays
-can be non-rectangular, or jagged in structure. One can assign a scalar
-value to the index `4' of the main array `a':
+can be non-rectangular, or jagged in structure. You can assign a scalar
+value to the index `4' of the main array `a', even though `a[1]' is
+itself an array and not a scalar:
a[4] = "An element in a jagged array"
@@ -11568,6 +11610,8 @@ an array element is itself an array:
print array[i][j]
}
}
+ else
+ print array[i]
}
If the structure of a jagged array of arrays is known in advance,
@@ -11798,8 +11842,9 @@ brackets ([ ]):
user-defined function that can be used to obtain a random
non-negative integer less than N:
- function randint(n) {
- return int(n * rand())
+ function randint(n)
+ {
+ return int(n * rand())
}
The multiplication produces a random number greater than zero and
@@ -11816,8 +11861,7 @@ brackets ([ ]):
# Roll 3 six-sided dice and
# print total number of points.
{
- printf("%d points\n",
- roll(6)+roll(6)+roll(6))
+ printf("%d points\n", roll(6) + roll(6) + roll(6))
}
CAUTION: In most `awk' implementations, including `gawk',
@@ -11904,8 +11948,7 @@ with character indices, and not byte indices.
In the following list, optional parameters are enclosed in square
brackets ([ ]). Several functions perform string substitution; the
full discussion is provided in the description of the `sub()' function,
-which comes towards the end since the list is presented in alphabetic
-order.
+which comes towards the end since the list is presented alphabetically.
Those functions that are specific to `gawk' are marked with a pound
sign (`#'). They are not available in compatibility mode (*note
@@ -11938,7 +11981,8 @@ Options::):
When comparing strings, `IGNORECASE' affects the sorting (*note
Array Sorting Functions::). If the SOURCE array contains
subarrays as values (*note Arrays of Arrays::), they will come
- last, after all scalar values.
+ last, after all scalar values. Subarrays are _not_ recursively
+ sorted.
For example, if the contents of `a' are as follows:
@@ -12041,7 +12085,10 @@ Options::):
If FIND is not found, `index()' returns zero.
- It is a fatal error to use a regexp constant for FIND.
+ With BWK `awk' and `gawk', it is a fatal error to use a regexp
+ constant for FIND. Other implementations allow it, simply
+ treating the regexp constant as an expression meaning `$0 ~
+ /regexp/'.
`length('[STRING]`)'
Return the number of characters in STRING. If STRING is a number,
@@ -12109,13 +12156,12 @@ Options::):
For example:
{
- if ($1 == "FIND")
- regex = $2
- else {
- where = match($0, regex)
- if (where != 0)
- print "Match of", regex, "found at",
- where, "in", $0
+ if ($1 == "FIND")
+ regex = $2
+ else {
+ where = match($0, regex)
+ if (where != 0)
+ print "Match of", regex, "found at", where, "in", $0
}
}
@@ -12184,7 +12230,7 @@ Options::):
The `patsplit()' function splits strings into pieces in a manner
similar to the way input lines are split into fields using `FPAT'
- (*note Splitting By Content::.
+ (*note Splitting By Content::).
Before splitting the string, `patsplit()' deletes any previously
existing elements in the arrays ARRAY and SEPS.
@@ -12195,15 +12241,14 @@ Options::):
first piece is stored in `ARRAY[1]', the second piece in
`ARRAY[2]', and so forth. The string value of the third argument,
FIELDSEP, is a regexp describing where to split STRING (much as
- `FS' can be a regexp describing where to split input records;
- *note Regexp Field Splitting::). If FIELDSEP is omitted, the
- value of `FS' is used. `split()' returns the number of elements
- created. SEPS is a `gawk' extension with `SEPS[I]' being the
- separator string between `ARRAY[I]' and `ARRAY[I+1]'. If FIELDSEP
- is a single space then any leading whitespace goes into `SEPS[0]'
- and any trailing whitespace goes into `SEPS[N]' where N is the
- return value of `split()' (that is, the number of elements in
- ARRAY).
+ `FS' can be a regexp describing where to split input records). If
+ FIELDSEP is omitted, the value of `FS' is used. `split()' returns
+ the number of elements created. SEPS is a `gawk' extension with
+ `SEPS[I]' being the separator string between `ARRAY[I]' and
+ `ARRAY[I+1]'. If FIELDSEP is a single space then any leading
+ whitespace goes into `SEPS[0]' and any trailing whitespace goes
+ into `SEPS[N]' where N is the return value of `split()' (that is,
+ the number of elements in ARRAY).
The `split()' function splits strings into pieces in a manner
similar to the way input lines are split into fields. For example:
@@ -12409,6 +12454,17 @@ Options::):
Nonalphabetic characters are left unchanged. For example,
`toupper("MiXeD cAsE 123")' returns `"MIXED CASE 123"'.
+ Matching the Null String
+
+ In `awk', the `*' operator can match the null string. This is
+particularly important for the `sub()', `gsub()', and `gensub()'
+functions. For example:
+
+ $ echo abc | awk '{ gsub(/m*/, "X"); print }'
+ -| XaXbXcX
+
+Although this makes a certain amount of sense, it can be surprising.
+
---------- Footnotes ----------
(1) Unless you use the `--non-decimal-data' option, which isn't
@@ -12428,8 +12484,8 @@ File: gawk.info, Node: Gory Details, Up: String Functions
9.1.3.1 More About `\' and `&' with `sub()', `gsub()', and `gensub()'
.....................................................................
- CAUTION: This section has been known to cause headaches. You
- might want to skip it upon first reading.
+ CAUTION: This subsubsection has been reported to cause headaches.
+ You might want to skip it upon first reading.
When using `sub()', `gsub()', or `gensub()', and trying to get
literal backslashes and ampersands into the replacement text, you need
@@ -12563,17 +12619,6 @@ Table 9.4: Escape Sequence Processing For `gensub()'
and the special cases for `sub()' and `gsub()', we recommend the use of
`gawk' and `gensub()' when you have to do substitutions.
- Matching the Null String
-
- In `awk', the `*' operator can match the null string. This is
-particularly important for the `sub()', `gsub()', and `gensub()'
-functions. For example:
-
- $ echo abc | awk '{ gsub(/m*/, "X"); print }'
- -| XaXbXcX
-
-Although this makes a certain amount of sense, it can be surprising.
-
---------- Footnotes ----------
(1) This was rather naive of him, despite there being a note in this
@@ -12623,11 +12668,10 @@ parameters are enclosed in square brackets ([ ]):
function--`gawk' also buffers its output and the `fflush()'
function forces `gawk' to flush its buffers.
- `fflush()' was added to BWK `awk' in April of 1992. For two
- decades, it was not part of the POSIX standard. As of December,
- 2012, it was accepted for inclusion into the POSIX standard. See
- the Austin Group website
- (http://austingroupbugs.net/view.php?id=634).
+ Brian Kernighan added `fflush()' to his `awk' in April of 1992.
+ For two decades, it was a common extension. In December, 2012, it
+ was accepted for inclusion into the POSIX standard. See the
+ Austin Group website (http://austingroupbugs.net/view.php?id=634).
POSIX standardizes `fflush()' as follows: If there is no argument,
or if the argument is the null string (`""'), then `awk' flushes
@@ -12814,7 +12858,7 @@ enclosed in square brackets ([ ]):
If DATESPEC does not contain enough elements or if the resulting
time is out of range, `mktime()' returns -1.
-`strftime(' [FORMAT [`,' TIMESTAMP [`,' UTC-FLAG] ] ]`)'
+`strftime('[FORMAT [`,' TIMESTAMP [`,' UTC-FLAG] ] ]`)'
Format the time specified by TIMESTAMP based on the contents of
the FORMAT string and return the result. It is similar to the
function of the same name in ISO C. If UTC-FLAG is present and is
@@ -13029,7 +13073,7 @@ to the standard output and interprets the current time according to the
format specifiers in the string. For example:
$ date '+Today is %A, %B %d, %Y.'
- -| Today is Monday, May 05, 2014.
+ -| Today is Monday, September 22, 2014.
Here is the `gawk' version of the `date' utility. It has a shell
"wrapper" to handle the `-u' option, which requires that `date' run as
@@ -13118,12 +13162,13 @@ a given value.
Finally, two other common operations are to shift the bits left or
right. For example, if you have a bit string `10111001' and you shift
-it right by three bits, you end up with `00010111'.(1) If you start over
-again with `10111001' and shift it left by three bits, you end up with
-`11001000'. `gawk' provides built-in functions that implement the
-bitwise operations just described. They are:
+it right by three bits, you end up with `00010111'.(1) If you start
+over again with `10111001' and shift it left by three bits, you end up
+with `11001000'. The following list describes `gawk''s built-in
+functions that implement the bitwise operations. Optional parameters
+are enclosed in square brackets ([ ]):
-``and(V1, V2' [`,' ...]`)''
+``and('V1`,' V2 [`,' ...]`)''
Return the bitwise AND of the arguments. There must be at least
two.
@@ -13133,13 +13178,13 @@ bitwise operations just described. They are:
``lshift(VAL, COUNT)''
Return the value of VAL, shifted left by COUNT bits.
-``or(V1, V2' [`,' ...]`)''
+``or('V1`,' V2 [`,' ...]`)''
Return the bitwise OR of the arguments. There must be at least two.
``rshift(VAL, COUNT)''
Return the value of VAL, shifted right by COUNT bits.
-``xor(V1, V2' [`,' ...]`)''
+``xor('V1`,' V2 [`,' ...]`)''
Return the bitwise XOR of the arguments. There must be at least
two.
@@ -13214,7 +13259,7 @@ Nondecimal-numbers::), and then demonstrates the results of the
(1) This example shows that 0's come in on the left side. For
`gawk', this is always true, but in some languages, it's possible to
-have the left side fill with 1's. Caveat emptor.
+have the left side fill with 1's.

File: gawk.info, Node: Type Functions, Next: I18N Functions, Prev: Bitwise Functions, Up: Built-in
@@ -13224,7 +13269,7 @@ File: gawk.info, Node: Type Functions, Next: I18N Functions, Prev: Bitwise Fu
`gawk' provides a single function that lets you distinguish an array
from a scalar variable. This is necessary for writing code that
-traverses every element of an array of arrays. (*note Arrays of
+traverses every element of an array of arrays (*note Arrays of
Arrays::).
`isarray(X)'
@@ -13236,12 +13281,12 @@ itself an array or not. The second is inside the body of a
user-defined function (not discussed yet; *note User-defined::), to
test if a parameter is an array or not.
- Note, however, that using `isarray()' at the global level to test
-variables makes no sense. Since you are the one writing the program, you
-are supposed to know if your variables are arrays or not. And in fact,
-due to the way `gawk' works, if you pass the name of a variable that
-has not been previously used to `isarray()', `gawk' will end up turning
-it into a scalar.
+ NOTE: Using `isarray()' at the global level to test variables
+ makes no sense. Since you are the one writing the program, you are
+ supposed to know if your variables are arrays or not. And in fact,
+ due to the way `gawk' works, if you pass the name of a variable
+ that has not been previously used to `isarray()', `gawk' ends up
+ turning it into a scalar.

File: gawk.info, Node: I18N Functions, Prev: Type Functions, Up: Built-in
@@ -13452,7 +13497,7 @@ extra whitespace signifies the start of the local variable list):
function delarray(a, i)
{
for (i in a)
- delete a[i]
+ delete a[i]
}
When working with arrays, it is often necessary to delete all the
@@ -13460,8 +13505,8 @@ elements in an array and start over with a new list of elements (*note
Delete::). Instead of having to repeat this loop everywhere that you
need to clear out an array, your program can just call `delarray'.
(This guarantees portability. The use of `delete ARRAY' to delete the
-contents of an entire array is a recent(1) addition to the POSIX
-standard.)
+contents of an entire array is a relatively recent(1) addition to the
+POSIX standard.)
The following is an example of a recursive function. It takes a
string as an input parameter and returns the string in backwards order.
@@ -13484,7 +13529,7 @@ way:
> gawk -e '{ print rev($0) }' -f rev.awk
-| !cinaP t'noD
- The C `ctime()' function takes a timestamp and returns it in a
+ The C `ctime()' function takes a timestamp and returns it as a
string, formatted in a well-known fashion. The following example uses
the built-in `strftime()' function (*note Time Functions::) to create
an `awk' version of `ctime()':
@@ -13495,12 +13540,18 @@ an `awk' version of `ctime()':
function ctime(ts, format)
{
- format = PROCINFO["strftime"]
+ format = "%a %b %e %H:%M:%S %Z %Y"
+
if (ts == 0)
ts = systime() # use current time as default
return strftime(format, ts)
}
+ You might think that `ctime()' could use `PROCINFO["strftime"]' for
+its format string. That would be a mistake, since `ctime()' is supposed
+to return the time formatted in a standard fashion, and user-level code
+could have changed `PROCINFO["strftime"]'.
+
---------- Footnotes ----------
(1) Late in 2012.
@@ -14042,7 +14093,7 @@ mechanism allows you to sort arbitrary data in an arbitrary fashion.
# quicksort_swap --- helper function for quicksort, should really be inline
- function quicksort_swap(data, i, j, temp)
+ function quicksort_swap(data, i, j, temp)
{
temp = data[i]
data[i] = data[j]
@@ -14177,11 +14228,12 @@ File: gawk.info, Node: Functions Summary, Prev: Indirect Calls, Up: Functions
functions.
* POSIX `awk' provides three kinds of built-in functions: numeric,
- string, and I/O. `gawk' provides functions that work with values
- representing time, do bit manipulation, sort arrays, and
- internationalize and localize programs. `gawk' also provides
- several extensions to some of standard functions, typically in the
- form of additional arguments.
+ string, and I/O. `gawk' provides functions that sort arrays, work
+ with values representing time, do bit manipulation, determine
+ variable type (array vs. scalar), and internationalize and
+ localize programs. `gawk' also provides several extensions to
+ some of standard functions, typically in the form of additional
+ arguments.
* Functions accept zero or more arguments and return a value. The
expressions that provide the argument values are completely
@@ -14366,8 +14418,9 @@ program, leading to bugs that are very difficult to track down:
function lib_func(x, y, l1, l2)
{
...
- USE VARIABLE some_var # some_var should be local
- ... # but is not by oversight
+ # some_var should be local but by oversight is not
+ USE VARIABLE some_var
+ ...
}
A different convention, common in the Tcl community, is to use a
@@ -14475,7 +14528,7 @@ versions of `awk':
# a[5] = "123.45"
# a[6] = "1.e3"
# a[7] = "1.32"
- # a[7] = "1.32E2"
+ # a[8] = "1.32E2"
#
# for (i = 1; i in a; i++)
# print a[i], strtonum(a[i]), mystrtonum(a[i])
@@ -14484,9 +14537,11 @@ versions of `awk':
The function first looks for C-style octal numbers (base 8). If the
input string matches a regular expression describing octal numbers,
then `mystrtonum()' loops through each character in the string. It
-sets `k' to the index in `"01234567"' of the current octal digit.
-Since the return value is one-based, the `k--' adjusts `k' so it can be
-used in computing the return value.
+sets `k' to the index in `"1234567"' of the current octal digit. The
+return value will either be the same number as the digit, or zero if
+the character is not there, which will be true for a `0'. This is
+safe, since the regexp test in the `if' ensures that only octal values
+are converted.
Similar logic applies to the code that checks for and converts a
hexadecimal value, which starts with `0x' or `0X'. The use of
@@ -14512,7 +14567,7 @@ condition or set of conditions is true. Before proceeding with a
particular computation, you make a statement about what you believe to
be the case. Such a statement is known as an "assertion". The C
language provides an `<assert.h>' header file and corresponding
-`assert()' macro that the programmer can use to make assertions. If an
+`assert()' macro that a programmer can use to make assertions. If an
assertion fails, the `assert()' macro arranges to print a diagnostic
message describing the condition that should have been true but was
not, and then it kills the program. In C, using `assert()' looks this:
@@ -14852,7 +14907,7 @@ current time formatted in the same way as the `date' utility:
now = systime()
# return date(1)-style output
- ret = strftime(PROCINFO["strftime"], now)
+ ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
# clear out target array
delete time
@@ -14948,6 +15003,9 @@ string. Thus calling code may use something like:
This tests the result to see if it is empty or not. An equivalent
test would be `contents == ""'.
+ *Note Extension Sample Readfile::, for an extension function that
+also reads an entire file into memory.
+

File: gawk.info, Node: Data File Management, Next: Getopt Function, Prev: General Functions, Up: Library Functions
@@ -14997,15 +15055,14 @@ does so _portably_; this works with any implementation of `awk':
# that each take the name of the file being started or
# finished, respectively.
- FILENAME != _oldfilename \
- {
+ FILENAME != _oldfilename {
if (_oldfilename != "")
endfile(_oldfilename)
_oldfilename = FILENAME
beginfile(FILENAME)
}
- END { endfile(FILENAME) }
+ END { endfile(FILENAME) }
This file must be loaded before the user's "main" program, so that
the rule it supplies is executed first.
@@ -15043,7 +15100,7 @@ solves the problem:
beginfile(FILENAME)
}
- END { endfile(_filename_) }
+ END { endfile(_filename_) }
*note Wc Program::, shows how this library function can be used and
how it simplifies writing the main program.
@@ -15096,15 +15153,12 @@ over with it from the top. For lack of a better name, we'll call it
nextfile
}
- This code relies on the `ARGIND' variable (*note Auto-set::), which
-is specific to `gawk'. If you are not using `gawk', you can use ideas
-presented in *note Filetrans Function::, to either update `ARGIND' on
-your own or modify this code as appropriate.
-
- The `rewind()' function also relies on the `nextfile' keyword (*note
-Nextfile Statement::). Because of this, you should not call it from an
-`ENDFILE' rule. (This isn't necessary anyway, since as soon as an
-`ENDFILE' rule finishes `gawk' goes to the next file!)
+ The `rewind()' function relies on the `ARGIND' variable (*note
+Auto-set::), which is specific to `gawk'. It also relies on the
+`nextfile' keyword (*note Nextfile Statement::). Because of this, you
+should not call it from an `ENDFILE' rule. (This isn't necessary
+anyway, since as soon as an `ENDFILE' rule finishes `gawk' goes to the
+next file!)

File: gawk.info, Node: File Checking, Next: Empty Files, Prev: Rewind Function, Up: Data File Management
@@ -15121,7 +15175,7 @@ following program to your `awk' program:
BEGIN {
for (i = 1; i < ARGC; i++) {
- if (ARGV[i] ~ /^[[:alpha:]_][[:alnum:]_]*=.*/ \
+ if (ARGV[i] ~ /^[a-zA-Z_][a-zA-Z0-9_]*=.*/ \
|| ARGV[i] == "-" || ARGV[i] == "/dev/stdin")
continue # assignment or standard input
else if ((getline junk < ARGV[i]) < 0) # unreadable
@@ -15135,6 +15189,10 @@ following program to your `awk' program:
element from `ARGV' with `delete' skips the file (since it's no longer
in the list). See also *note ARGC and ARGV::.
+ The regular expression check purposely does not use character classes
+such as `[:alpha:]' and `[:alnum:]' (*note Bracket Expressions::) since
+`awk' variable names only allow the English letters.
+
---------- Footnotes ----------
(1) The `BEGINFILE' special pattern (*note BEGINFILE/ENDFILE::)
@@ -15213,7 +15271,7 @@ programming with a library file does the trick:
function disable_assigns(argc, argv, i)
{
for (i = 1; i < argc; i++)
- if (argv[i] ~ /^[[:alpha:]_][[:alnum:]_]*=.*/)
+ if (argv[i] ~ /^[a-zA-Z_][a-zA-Z0-9_]*=.*/)
argv[i] = ("./" argv[i])
}
@@ -15513,10 +15571,14 @@ result of two sample runs of the test program:
In both runs, the first `--' terminates the arguments to `awk', so
that it does not try to interpret the `-a', etc., as its own options.
- NOTE: After `getopt()' is through, it is the responsibility of the
- user level code to clear out all the elements of `ARGV' from 1 to
- `Optind', so that `awk' does not try to process the command-line
- options as file names.
+ NOTE: After `getopt()' is through, user level code must clear out
+ all the elements of `ARGV' from 1 to `Optind', so that `awk' does
+ not try to process the command-line options as file names.
+
+ Using `#!' with the `-E' option may help avoid conflicts between
+your program's options and `gawk''s options, since `-E' causes `gawk'
+to abandon processing of further options (*note Executable Scripts::,
+and *note Options::).
Several of the sample programs presented in *note Sample Programs::,
use `getopt()' to process their arguments.
@@ -15675,8 +15737,8 @@ corresponding to the C functions of the same names:
routine, we have chosen to put it in `/usr/local/libexec/awk'; however,
you might want it to be in a different directory on your system.
- The function `_pw_init()' keeps three copies of the user information
-in three associative arrays. The arrays are indexed by username
+ The function `_pw_init()' fills three copies of the user information
+into three associative arrays. The arrays are indexed by username
(`_pw_byname'), by user ID number (`_pw_byuid'), and by order of
occurrence (`_pw_bycount'). The variable `_pw_inited' is used for
efficiency, since `_pw_init()' needs to be called only once.
@@ -15686,13 +15748,10 @@ efficiency, since `_pw_init()' needs to be called only once.
in the variable `using_fw' whether field splitting with `FIELDWIDTHS'
is in effect or not. Doing so is necessary, since these functions
could be called from anywhere within a user's program, and the user may
-have his or her own way of splitting records and fields.
-
- The `using_fw' variable checks `PROCINFO["FS"]', which is
-`"FIELDWIDTHS"' if field splitting is being done with `FIELDWIDTHS'.
-This makes it possible to restore the correct field-splitting mechanism
-later. The test can only be true for `gawk'. It is false if using
-`FS' or `FPAT', or on some other `awk' implementation.
+have his or her own way of splitting records and fields. This makes it
+possible to restore the correct field-splitting mechanism later. The
+test can only be true for `gawk'. It is false if using `FS' or `FPAT',
+or on some other `awk' implementation.
The code that checks for using `FPAT', using `using_fpat' and
`PROCINFO["FS"]', is similar.
@@ -15888,8 +15947,7 @@ the same names:
n = split($4, a, "[ \t]*,[ \t]*")
for (i = 1; i <= n; i++)
if (a[i] in _gr_groupsbyuser)
- _gr_groupsbyuser[a[i]] = \
- _gr_groupsbyuser[a[i]] " " $1
+ _gr_groupsbyuser[a[i]] = gr_groupsbyuser[a[i]] " " $1
else
_gr_groupsbyuser[a[i]] = $1
@@ -16065,8 +16123,8 @@ File: gawk.info, Node: Library Functions Summary, Next: Library Exercises, Pr
============
* Reading programs is an excellent way to learn Good Programming.
- The functions provided in this major node and the next are intended
- to serve that purpose.
+ The functions and programs provided in this major node and the next
+ are intended to serve that purpose.
* When writing general-purpose library functions, put some thought
into how to name any global variables so that they won't conflict
@@ -16257,18 +16315,13 @@ supplied:
#
# Requires getopt() and join() library functions
- function usage( e1, e2)
+ function usage()
{
- e1 = "usage: cut [-f list] [-d c] [-s] [files...]"
- e2 = "usage: cut [-c list] [files...]"
- print e1 > "/dev/stderr"
- print e2 > "/dev/stderr"
+ print("usage: cut [-f list] [-d c] [-s] [files...]") > "/dev/stderr"
+ print("usage: cut [-c list] [files...]") > "/dev/stderr"
exit 1
}
-The variables `e1' and `e2' are used so that the function fits nicely
-on the screen.
-
Next comes a `BEGIN' rule that parses the command-line options. It
sets `FS' to a single TAB character, because that is `cut''s default
field separator. The rule then sets the output field separator to be the
@@ -16670,17 +16723,13 @@ there are no matches, the exit status is one; otherwise it is zero:
The `usage()' function prints a usage message in case of invalid
options, and then exits:
- function usage( e)
+ function usage()
{
- e = "Usage: egrep [-csvil] [-e pat] [files ...]"
- e = e "\n\tegrep [-csvil] pat [files ...]"
- print e > "/dev/stderr"
+ print("Usage: egrep [-csvil] [-e pat] [files ...]") > "/dev/stderr"
+ print("\n\tegrep [-csvil] pat [files ...]") > "/dev/stderr"
exit 1
}
- The variable `e' is used so that the function fits nicely on the
-printed page.
-
---------- Footnotes ----------
(1) It also introduces a subtle bug; if a match happens, we output
@@ -16731,26 +16780,22 @@ and the group numbers:
printf("uid=%d", uid)
pw = getpwuid(uid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
if (euid != uid) {
printf(" euid=%d", euid)
pw = getpwuid(euid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
}
printf(" gid=%d", gid)
pw = getgrgid(gid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
if (egid != gid) {
printf(" egid=%d", egid)
pw = getgrgid(egid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
}
for (i = 1; ("group" i) in PROCINFO; i++) {
@@ -16759,8 +16804,7 @@ and the group numbers:
group = PROCINFO["group" i]
printf("%d", group)
pw = getgrgid(group)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
if (("group" (i+1)) in PROCINFO)
printf(",")
}
@@ -16770,8 +16814,10 @@ and the group numbers:
function pr_first_field(str, a)
{
- split(str, a, ":")
- printf("(%s)", a[1])
+ if (str != "") {
+ split(str, a, ":")
+ printf("(%s)", a[1])
+ }
}
The test in the `for' loop is worth noting. Any supplementary
@@ -16789,8 +16835,9 @@ then the condition is false the first time it's tested, and the loop
body never executes.
The `pr_first_field()' function simply isolates out some code that
-is used repeatedly, making the whole program slightly shorter and
-cleaner.
+is used repeatedly, making the whole program shorter and cleaner. In
+particular, moving the check for the empty string into this function
+saves several lines of code.

File: gawk.info, Node: Split Program, Next: Tee Program, Prev: Id Program, Up: Clones
@@ -16881,15 +16928,12 @@ moves to the next letter in the alphabet and `s2' starts over again at
The `usage()' function simply prints an error message and exits:
- function usage( e)
+ function usage()
{
- e = "usage: split [-num] [file] [outname]"
- print e > "/dev/stderr"
+ print("usage: split [-num] [file] [outname]") > "/dev/stderr"
exit 1
}
-The variable `e' is used so that the function fits nicely on the screen.
-
This program is a bit sloppy; it relies on `awk' to automatically
close the last file instead of doing it in an `END' rule. It also
assumes that letters are contiguous in the character set, which isn't
@@ -17002,10 +17046,10 @@ usage is as follows:
The options for `uniq' are:
`-d'
- Print only repeated lines.
+ Print only repeated (duplicated) lines.
`-u'
- Print only nonrepeated lines.
+ Print only nonrepeated (unique) lines.
`-c'
Count lines. This option overrides `-d' and `-u'. Both repeated
@@ -17055,10 +17099,9 @@ standard output, `/dev/stdout':
#
# Requires getopt() and join() library functions
- function usage( e)
+ function usage()
{
- e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]"
- print e > "/dev/stderr"
+ print("Usage: uniq [-udc [-n]] [+n] [ in [ out ]]") > "/dev/stderr"
exit 1
}
@@ -17112,16 +17155,18 @@ standard output, `/dev/stdout':
The following function, `are_equal()', compares the current line,
`$0', to the previous line, `last'. It handles skipping fields and
characters. If no field count and no character count are specified,
-`are_equal()' simply returns one or zero depending upon the result of a
-simple string comparison of `last' and `$0'. Otherwise, things get more
-complicated. If fields have to be skipped, each line is broken into an
-array using `split()' (*note String Functions::); the desired fields
-are then joined back into a line using `join()'. The joined lines are
-stored in `clast' and `cline'. If no fields are skipped, `clast' and
-`cline' are set to `last' and `$0', respectively. Finally, if
-characters are skipped, `substr()' is used to strip off the leading
-`charcount' characters in `clast' and `cline'. The two strings are
-then compared and `are_equal()' returns the result:
+`are_equal()' returns one or zero depending upon the result of a simple
+string comparison of `last' and `$0'.
+
+ Otherwise, things get more complicated. If fields have to be
+skipped, each line is broken into an array using `split()' (*note
+String Functions::); the desired fields are then joined back into a line
+using `join()'. The joined lines are stored in `clast' and `cline'.
+If no fields are skipped, `clast' and `cline' are set to `last' and
+`$0', respectively. Finally, if characters are skipped, `substr()' is
+used to strip off the leading `charcount' characters in `clast' and
+`cline'. The two strings are then compared and `are_equal()' returns
+the result:
function are_equal( n, m, clast, cline, alast, aline)
{
@@ -17217,8 +17262,8 @@ one or more input files. Its usage is as follows:
If no files are specified on the command line, `wc' reads its
standard input. If there are multiple files, it also prints total
-counts for all the files. The options and their meanings are shown in
-the following list:
+counts for all the files. The options and their meanings are as
+follows:
`-l'
Count only lines.
@@ -17709,12 +17754,12 @@ splits records at blank lines (*note Records::). It sets `MAXLINES' to
Most of the work is done in the `printpage()' function. The label
lines are stored sequentially in the `line' array. But they have to
print horizontally; `line[1]' next to `line[6]', `line[2]' next to
-`line[7]', and so on. Two loops are used to accomplish this. The
-outer loop, controlled by `i', steps through every 10 lines of data;
-this is each row of labels. The inner loop, controlled by `j', goes
-through the lines within the row. As `j' goes from 0 to 4, `i+j' is
-the `j'-th line in the row, and `i+j+5' is the entry next to it. The
-output ends up looking something like this:
+`line[7]', and so on. Two loops accomplish this. The outer loop,
+controlled by `i', steps through every 10 lines of data; this is each
+row of labels. The inner loop, controlled by `j', goes through the
+lines within the row. As `j' goes from 0 to 4, `i+j' is the `j'-th
+line in the row, and `i+j+5' is the entry next to it. The output ends
+up looking something like this:
line 1 line 6
line 2 line 7
@@ -17805,7 +17850,7 @@ a useful format.
At first glance, a program like this would seem to do the job:
- # Print list of word frequencies
+ # wordfreq-first-try.awk --- print list of word frequencies
{
for (i = 1; i <= NF; i++)
@@ -17959,9 +18004,9 @@ Texinfo input file into separate files.
This Info file is written in Texinfo
(http://www.gnu.org/software/texinfo/), the GNU project's document
formatting language. A single Texinfo source file can be used to
-produce both printed and online documentation. The Texinfo language is
-described fully, starting with *note (Texinfo)Top::
-texinfo,Texinfo--The GNU Documentation Format.
+produce both printed documentation, with TeX, and online documentation.
+(The Texinfo language is described fully, starting with *note
+(Texinfo)Top:: texinfo,Texinfo--The GNU Documentation Format.)
For our purposes, it is enough to know three things about Texinfo
input files:
@@ -18023,13 +18068,11 @@ upper- and lowercase letters in the directives won't matter.
given (`NF' is at least three) and also checking that the command exits
with a zero exit status, signifying OK:
- # extract.awk --- extract files and run programs
- # from texinfo files
+ # extract.awk --- extract files and run programs from texinfo files
BEGIN { IGNORECASE = 1 }
- /^@c(omment)?[ \t]+system/ \
- {
+ /^@c(omment)?[ \t]+system/ {
if (NF < 3) {
e = ("extract: " FILENAME ":" FNR)
e = (e ": badly formed `system' line")
@@ -18077,8 +18120,7 @@ with the value of `SUBSEP' (*note Multidimensional::), to rejoin the
pieces back into a single line. That line is then printed to the
output file:
- /^@c(omment)?[ \t]+file/ \
- {
+ /^@c(omment)?[ \t]+file/ {
if (NF != 3) {
e = ("extract: " FILENAME ":" FNR ": badly formed `file' line")
print e > "/dev/stderr"
@@ -18132,7 +18174,7 @@ closing the open file:
function unexpected_eof()
{
printf("extract: %s:%d: unexpected EOF or error\n",
- FILENAME, FNR) > "/dev/stderr"
+ FILENAME, FNR) > "/dev/stderr"
exit 1
}
@@ -18341,8 +18383,8 @@ arguments are supplied, then the first nonoption argument should be the
`awk' program. If there are no command-line arguments left, `igawk'
prints an error message and exits. Otherwise, the first argument is
appended to `program'. In any case, after the arguments have been
-processed, `program' contains the complete text of the original `awk'
-program.
+processed, the shell variable `program' contains the complete text of
+the original `awk' program.
The program is as follows:
@@ -18588,7 +18630,7 @@ and it is frequently easier to do certain kinds of string and argument
manipulation using the shell than it is in `awk'.
Finally, `igawk' shows that it is not always necessary to add new
-features to a program; they can often be layered on top.
+features to a program; they can often be layered on top.(3)
---------- Footnotes ----------
@@ -18597,7 +18639,10 @@ book. We provide some minimal explanations, but see a good shell
programming book if you wish to understand things in more depth.
(2) On some very old versions of `awk', the test `getline junk < t'
-can loop forever if the file exists but is empty. Caveat emptor.
+can loop forever if the file exists but is empty.
+
+ (3) `gawk' does `@include' processing itself in order to support the
+use of `awk' programs as Web CGI scripts.

File: gawk.info, Node: Anagram Program, Next: Signature Program, Prev: Igawk Program, Up: Miscellaneous Programs
@@ -18610,12 +18655,11 @@ word list (such as `/usr/share/dict/words' on many GNU/Linux systems).
One word is an anagram of another if both words contain the same letters
(for example, "babbling" and "blabbing").
- An elegant algorithm is presented in Column 2, Problem C of Jon
-Bentley's `Programming Pearls', second edition. The idea is to give
-words that are anagrams a common signature, sort all the words together
-by their signature, and then print them. Dr. Bentley observes that
-taking the letters in each word and sorting them produces that common
-signature.
+ Column 2, Problem C of Jon Bentley's `Programming Pearls', second
+edition, presents an elegant algorithm. The idea is to give words that
+are anagrams a common signature, sort all the words together by their
+signature, and then print them. Dr. Bentley observes that taking the
+letters in each word and sorting them produces that common signature.
The following program uses arrays of arrays to bring together words
with the same signature and array sorting to print the words in sorted
@@ -18724,9 +18768,9 @@ File: gawk.info, Node: Programs Summary, Next: Programs Exercises, Prev: Misc
11.4 Summary
============
- * The functions provided in this major node and the previous one
- continue on the theme that reading programs is an excellent way to
- learn Good Programming.
+ * The programs provided in this major node continue on the theme
+ that reading programs is an excellent way to learn Good
+ Programming.
* Using `#!' to make `awk' programs directly runnable makes them
easier to use. Otherwise, invoke the program using `awk -f ...'.
@@ -18903,11 +18947,10 @@ File: gawk.info, Node: Nondecimal Data, Next: Array Sorting, Up: Advanced Fea
===================================
If you run `gawk' with the `--non-decimal-data' option, you can have
-nondecimal constants in your input data:
+nondecimal values in your input data:
$ echo 0123 123 0x123 |
- > gawk --non-decimal-data '{ printf "%d, %d, %d\n",
- > $1, $2, $3 }'
+ > gawk --non-decimal-data '{ printf "%d, %d, %d\n", $1, $2, $3 }'
-| 83, 123, 291
For this feature to work, write your program so that `gawk' treats
@@ -18938,6 +18981,8 @@ request it.
programs easier to write and easier to read, and leads to less
surprising results.
+ This option may disappear in a future version of `gawk'.
+

File: gawk.info, Node: Array Sorting, Next: Two-way I/O, Prev: Nondecimal Data, Up: Advanced Features
@@ -18976,7 +19021,7 @@ pre-defined values to `PROCINFO["sorted_in"]' in order to control the
order in which `gawk' traverses an array during a `for' loop.
In addition, the value of `PROCINFO["sorted_in"]' can be a function
-name. This lets you traverse an array based on any custom criterion.
+name.(1) This lets you traverse an array based on any custom criterion.
The array elements are ordered according to the return value of this
function. The comparison function should be defined with at least four
arguments:
@@ -19091,7 +19136,7 @@ of the previous functions:
according to login name. The following program sorts records by a
specific field position and can be used for this purpose:
- # sort.awk --- simple program to sort by field position
+ # passwd-sort.awk --- simple program to sort by field position
# field position is specified by the global variable POS
function cmp_field(i1, v1, i2, v2)
@@ -19143,13 +19188,14 @@ seemingly ordered data:
elements compare equal. This is usually not a problem, but letting the
tied elements come out in arbitrary order can be an issue, especially
when comparing item values. The partial ordering of the equal elements
-may change during the next loop traversal, if other elements are added
-or removed from the array. One way to resolve ties when comparing
-elements with otherwise equal values is to include the indices in the
-comparison rules. Note that doing this may make the loop traversal
-less efficient, so consider it only if necessary. The following
-comparison functions force a deterministic order, and are based on the
-fact that the (string) indices of two elements are never equal:
+may change the next time the array is traversed, if other elements are
+added or removed from the array. One way to resolve ties when
+comparing elements with otherwise equal values is to include the
+indices in the comparison rules. Note that doing this may make the
+loop traversal less efficient, so consider it only if necessary. The
+following comparison functions force a deterministic order, and are
+based on the fact that the (string) indices of two elements are never
+equal:
function cmp_numeric(i1, v1, i2, v2)
{
@@ -19188,6 +19234,11 @@ array has been reported to add 15% to 20% overhead to the execution
time of `awk' programs. For this reason, sorted array traversal is not
the default.
+ ---------- Footnotes ----------
+
+ (1) This is why the predefined sorting orders start with an `@'
+character, which cannot be part of an identifier.
+

File: gawk.info, Node: Array Sorting Functions, Prev: Controlling Array Traversal, Up: Array Sorting
@@ -19274,7 +19325,7 @@ fill in the result array.
Because `IGNORECASE' affects string comparisons, the value of
`IGNORECASE' also affects sorting for both `asort()' and `asorti()'.
Note also that the locale's sorting order does _not_ come into play;
-comparisons are based on character values only.(1) Caveat Emptor.
+comparisons are based on character values only.(1)
---------- Footnotes ----------
@@ -19400,7 +19451,7 @@ using regular pipes.
(1) Michael Brennan suggests the use of `rand()' to generate unique
file names. This is a valid point; nevertheless, temporary files remain
-more difficult than two-way pipes.
+more difficult to use than two-way pipes.
(2) This is very different from the same operator in the C shell and
in Bash.
@@ -19415,7 +19466,7 @@ File: gawk.info, Node: TCP/IP Networking, Next: Profiling, Prev: Two-way I/O,
A host is a host from coast to coast,
and no-one can talk to host that's close,
unless the host that isn't close
- is busy hung or dead.
+ is busy, hung, or dead.
In addition to being able to open a two-way pipeline to a coprocess on
the same system (*note Two-way I/O::), it is possible to make a two-way
@@ -19439,8 +19490,8 @@ NET-TYPE
PROTOCOL
The protocol to use over IP. This must be either `tcp', or `udp',
- for a TCP or UDP IP connection, respectively. The use of TCP is
- recommended for most applications.
+ for a TCP or UDP IP connection, respectively. TCP should be used
+ for most applications.
LOCAL-PORT
The local TCP or UDP port number to use. Use a port number of `0'
@@ -19466,10 +19517,10 @@ REMOTE-PORT
Consider the following very simple example:
BEGIN {
- Service = "/inet/tcp/0/localhost/daytime"
- Service |& getline
- print $0
- close(Service)
+ Service = "/inet/tcp/0/localhost/daytime"
+ Service |& getline
+ print $0
+ close(Service)
}
This program reads the current date and time from the local system's
@@ -19734,9 +19785,9 @@ File: gawk.info, Node: Advanced Features Summary, Prev: Profiling, Up: Advanc
`PROCINFO["sorted_in"]'.
* You can use the `|&' operator to create a two-way pipe to a
- co-process. You read from the co-process with `getline' and write
+ coprocess. You read from the coprocess with `getline' and write
to it with `print' or `printf'. Use `close()' to close off the
- co-process completely, or optionally, close off one side of the
+ coprocess completely, or optionally, close off one side of the
two-way communications.
* By using special "file names" with the `|&' operator, you can open
@@ -26989,7 +27040,7 @@ There are two ways to get GNU software:
supported. If you have the `wget' program, you can use a command
like the following:
- wget http://ftp.gnu.org/gnu/gawk/gawk-4.1.1.tar.gz
+ wget http://ftp.gnu.org/gnu/gawk/gawk-4.1.2.tar.gz
The GNU software archive is mirrored around the world. The
up-to-date list of mirror sites is available from the main FSF web site
@@ -27008,25 +27059,25 @@ compression programs: `gzip', `bzip2', and `xz'. For simplicity, the
rest of these instructions assume you are using the one compressed with
the GNU Zip program, `gzip'.
- Once you have the distribution (for example, `gawk-4.1.1.tar.gz'),
+ Once you have the distribution (for example, `gawk-4.1.2.tar.gz'),
use `gzip' to expand the file and then use `tar' to extract it. You
can use the following pipeline to produce the `gawk' distribution:
- gzip -d -c gawk-4.1.1.tar.gz | tar -xvpf -
+ gzip -d -c gawk-4.1.2.tar.gz | tar -xvpf -
On a system with GNU `tar', you can let `tar' do the decompression
for you:
- tar -xvpzf gawk-4.1.1.tar.gz
+ tar -xvpzf gawk-4.1.2.tar.gz
-Extracting the archive creates a directory named `gawk-4.1.1' in the
+Extracting the archive creates a directory named `gawk-4.1.2' in the
current directory.
The distribution file name is of the form `gawk-V.R.P.tar.gz'. The
V represents the major version of `gawk', the R represents the current
release of version V, and the P represents a "patch level", meaning
that minor bugs have been fixed in the release. The current patch
-level is 1, but when retrieving distributions, you should get the
+level is 2, but when retrieving distributions, you should get the
version with the highest version, release, and patch level. (Note,
however, that patch levels greater than or equal to 70 denote "beta" or
nonproduction software; you might not want to retrieve such a version
@@ -27237,7 +27288,7 @@ Unix-derived systems, GNU/Linux, BSD-based systems, and the Cygwin
environment for MS-Windows.
After you have extracted the `gawk' distribution, `cd' to
-`gawk-4.1.1'. Like most GNU software, `gawk' is configured
+`gawk-4.1.2'. Like most GNU software, `gawk' is configured
automatically for your system by running the `configure' program. This
program is a Bourne shell script that is generated automatically using
GNU Autoconf. (The Autoconf software is described fully starting with
@@ -27664,8 +27715,8 @@ GNU tools, such as Bash, the GNU Compiler Collection (GCC), GNU Make,
and other GNU programs. Compilation and installation for Cygwin is the
same as for a Unix system:
- tar -xvpzf gawk-4.1.1.tar.gz
- cd gawk-4.1.1
+ tar -xvpzf gawk-4.1.2.tar.gz
+ cd gawk-4.1.2
./configure
make
@@ -27685,7 +27736,7 @@ use the `BINMODE' variable.
This can cause problems with other Unix-like components that have
been ported to MS-Windows that expect `gawk' to do automatic
-translation of `"\r\n"', since it won't. Caveat Emptor!
+translation of `"\r\n"', since it won't.

File: gawk.info, Node: VMS Installation, Prev: PC Installation, Up: Non-Unix Installation
@@ -28190,9 +28241,9 @@ B.6 Summary
* The `gawk' distribution is available from GNU project's main
distribution site, `ftp.gnu.org'. The canonical build recipe is:
- wget http://ftp.gnu.org/gnu/gawk/gawk-4.1.1.tar.gz
- tar -xvpzf gawk-4.1.1.tar.gz
- cd gawk-4.1.1
+ wget http://ftp.gnu.org/gnu/gawk/gawk-4.1.2.tar.gz
+ tar -xvpzf gawk-4.1.2.tar.gz
+ cd gawk-4.1.2
./configure && make && make check
* `gawk' may be built on non-POSIX systems as well. The currently
@@ -31036,7 +31087,7 @@ Index
* Menu:
-* ! (exclamation point), ! operator: Boolean Ops. (line 67)
+* ! (exclamation point), ! operator: Boolean Ops. (line 69)
* ! (exclamation point), ! operator <1>: Egrep Program. (line 175)
* ! (exclamation point), ! operator <2>: Ranges. (line 48)
* ! (exclamation point), ! operator: Precedence. (line 52)
@@ -31066,22 +31117,22 @@ Index
* % (percent sign), %= operator <1>: Precedence. (line 95)
* % (percent sign), %= operator: Assignment Ops. (line 130)
* & (ampersand), && operator <1>: Precedence. (line 86)
-* & (ampersand), && operator: Boolean Ops. (line 57)
+* & (ampersand), && operator: Boolean Ops. (line 59)
* & (ampersand), gsub()/gensub()/sub() functions and: Gory Details.
(line 6)
* ' (single quote): One-shot. (line 15)
-* ' (single quote) in gawk command lines: Long. (line 33)
+* ' (single quote) in gawk command lines: Long. (line 35)
* ' (single quote), in shell commands: Quoting. (line 48)
* ' (single quote), vs. apostrophe: Comments. (line 27)
-* ' (single quote), with double quotes: Quoting. (line 70)
+* ' (single quote), with double quotes: Quoting. (line 73)
* () (parentheses), in a profile: Profiling. (line 146)
* () (parentheses), regexp operator: Regexp Operators. (line 81)
* * (asterisk), * operator, as multiplication operator: Precedence.
(line 55)
* * (asterisk), * operator, as regexp operator: Regexp Operators.
(line 89)
-* * (asterisk), * operator, null strings, matching: Gory Details.
- (line 143)
+* * (asterisk), * operator, null strings, matching: String Functions.
+ (line 535)
* * (asterisk), ** operator <1>: Precedence. (line 49)
* * (asterisk), ** operator: Arithmetic Ops. (line 81)
* * (asterisk), **= operator <1>: Precedence. (line 95)
@@ -31125,30 +31176,30 @@ Index
* --include option: Options. (line 159)
* --lint option <1>: Options. (line 185)
* --lint option: Command Line. (line 20)
-* --lint-old option: Options. (line 293)
+* --lint-old option: Options. (line 295)
* --load option: Options. (line 173)
* --non-decimal-data option <1>: Nondecimal Data. (line 6)
* --non-decimal-data option: Options. (line 211)
* --non-decimal-data option, strtonum() function and: Nondecimal Data.
- (line 36)
-* --optimize option: Options. (line 235)
-* --posix option: Options. (line 252)
-* --posix option, --traditional option and: Options. (line 271)
-* --pretty-print option: Options. (line 224)
+ (line 35)
+* --optimize option: Options. (line 237)
+* --posix option: Options. (line 254)
+* --posix option, --traditional option and: Options. (line 273)
+* --pretty-print option: Options. (line 226)
* --profile option <1>: Profiling. (line 12)
-* --profile option: Options. (line 240)
-* --re-interval option: Options. (line 277)
-* --sandbox option: Options. (line 284)
+* --profile option: Options. (line 242)
+* --re-interval option: Options. (line 279)
+* --sandbox option: Options. (line 286)
* --sandbox option, disabling system() function: I/O Functions.
- (line 97)
+ (line 96)
* --sandbox option, input redirection with getline: Getline. (line 19)
* --sandbox option, output redirection with print, printf: Redirection.
(line 6)
* --source option: Options. (line 117)
* --traditional option: Options. (line 81)
-* --traditional option, --posix option and: Options. (line 271)
-* --use-lc-numeric option: Options. (line 219)
-* --version option: Options. (line 298)
+* --traditional option, --posix option and: Options. (line 273)
+* --use-lc-numeric option: Options. (line 221)
+* --version option: Options. (line 300)
* --with-whiny-user-strftime configuration option: Additional Configuration Options.
(line 35)
* -b option: Options. (line 68)
@@ -31156,32 +31207,32 @@ Index
* -c option: Options. (line 81)
* -D option: Options. (line 108)
* -d option: Options. (line 93)
-* -e option: Options. (line 333)
+* -e option: Options. (line 336)
* -E option: Options. (line 125)
* -e option: Options. (line 117)
* -f option: Options. (line 25)
* -F option: Options. (line 21)
* -f option: Long. (line 12)
-* -F option, -Ft sets FS to TAB: Options. (line 306)
+* -F option, -Ft sets FS to TAB: Options. (line 308)
* -F option, command-line: Command Line Field Separator.
(line 6)
-* -f option, multiple uses: Options. (line 311)
+* -f option, multiple uses: Options. (line 313)
* -g option: Options. (line 147)
* -h option: Options. (line 154)
* -i option: Options. (line 159)
-* -L option: Options. (line 293)
+* -L option: Options. (line 295)
* -l option: Options. (line 173)
* -M option: Options. (line 205)
-* -N option: Options. (line 219)
+* -N option: Options. (line 221)
* -n option: Options. (line 211)
-* -O option: Options. (line 235)
-* -o option: Options. (line 224)
-* -P option: Options. (line 252)
-* -p option: Options. (line 240)
-* -r option: Options. (line 277)
-* -S option: Options. (line 284)
+* -O option: Options. (line 237)
+* -o option: Options. (line 226)
+* -P option: Options. (line 254)
+* -p option: Options. (line 242)
+* -r option: Options. (line 279)
+* -S option: Options. (line 286)
* -v option: Assignment Options. (line 12)
-* -V option: Options. (line 298)
+* -V option: Options. (line 300)
* -v option: Options. (line 32)
* -W option: Options. (line 46)
* . (period), regexp operator: Regexp Operators. (line 44)
@@ -31201,8 +31252,8 @@ Index
(line 148)
* / (forward slash), patterns and: Expression Patterns. (line 24)
* /= operator vs. /=.../ regexp constant: Assignment Ops. (line 148)
-* /dev/... special files: Special FD. (line 46)
-* /dev/fd/N special files (gawk): Special FD. (line 46)
+* /dev/... special files: Special FD. (line 48)
+* /dev/fd/N special files (gawk): Special FD. (line 48)
* /inet/... special files (gawk): TCP/IP Networking. (line 6)
* /inet4/... special files (gawk): TCP/IP Networking. (line 6)
* /inet6/... special files (gawk): TCP/IP Networking. (line 6)
@@ -31275,7 +31326,7 @@ Index
* \ (backslash), \y operator (gawk): GNU Regexp Operators.
(line 38)
* \ (backslash), as field separator: Command Line Field Separator.
- (line 27)
+ (line 24)
* \ (backslash), continuing lines and: Statements/Lines. (line 19)
* \ (backslash), continuing lines and, comments and: Statements/Lines.
(line 76)
@@ -31338,12 +31389,12 @@ Index
* ambiguity, syntactic: /= operator vs. /=.../ regexp constant: Assignment Ops.
(line 148)
* ampersand (&), && operator <1>: Precedence. (line 86)
-* ampersand (&), && operator: Boolean Ops. (line 57)
+* ampersand (&), && operator: Boolean Ops. (line 59)
* ampersand (&), gsub()/gensub()/sub() functions and: Gory Details.
(line 6)
-* anagram.awk program: Anagram Program. (line 22)
+* anagram.awk program: Anagram Program. (line 21)
* anagrams, finding: Anagram Program. (line 6)
-* and: Bitwise Functions. (line 39)
+* and: Bitwise Functions. (line 40)
* AND bitwise operation: Bitwise Functions. (line 6)
* and Boolean-logic operator: Boolean Ops. (line 6)
* ANSI: Glossary. (line 34)
@@ -31359,25 +31410,25 @@ Index
* arctangent: Numeric Functions. (line 11)
* ARGC/ARGV variables: Auto-set. (line 15)
* ARGC/ARGV variables, command-line arguments: Other Arguments.
- (line 12)
+ (line 15)
* ARGC/ARGV variables, how to use: ARGC and ARGV. (line 6)
* ARGC/ARGV variables, portability and: Executable Scripts. (line 59)
* ARGIND variable: Auto-set. (line 44)
-* ARGIND variable, command-line arguments: Other Arguments. (line 12)
+* ARGIND variable, command-line arguments: Other Arguments. (line 15)
* arguments, command-line <1>: ARGC and ARGV. (line 6)
* arguments, command-line <2>: Auto-set. (line 15)
* arguments, command-line: Other Arguments. (line 6)
* arguments, command-line, invoking awk: Command Line. (line 6)
* arguments, in function calls: Function Calls. (line 18)
* arguments, processing: Getopt Function. (line 6)
-* ARGV array, indexing into: Other Arguments. (line 12)
+* ARGV array, indexing into: Other Arguments. (line 15)
* arithmetic operators: Arithmetic Ops. (line 6)
* array manipulation in extensions: Array Manipulation. (line 6)
* array members: Reference to Elements.
(line 6)
* array scanning order, controlling: Controlling Scanning.
(line 14)
-* array, number of elements: String Functions. (line 197)
+* array, number of elements: String Functions. (line 200)
* arrays: Arrays. (line 6)
* arrays of arrays: Arrays of Arrays. (line 6)
* arrays, an example of using: Array Example. (line 6)
@@ -31385,7 +31436,7 @@ Index
* arrays, as parameters to functions: Pass By Value/Reference.
(line 47)
* arrays, associative: Array Intro. (line 50)
-* arrays, associative, library functions and: Library Names. (line 57)
+* arrays, associative, library functions and: Library Names. (line 58)
* arrays, deleting entire contents: Delete. (line 39)
* arrays, elements that don't exist: Reference to Elements.
(line 23)
@@ -31393,13 +31444,12 @@ Index
* arrays, elements, deleting: Delete. (line 6)
* arrays, elements, order of access by in operator: Scanning an Array.
(line 48)
-* arrays, elements, retrieving number of: String Functions. (line 42)
+* arrays, elements, retrieving number of: String Functions. (line 41)
* arrays, for statement and: Scanning an Array. (line 20)
* arrays, indexing: Array Intro. (line 50)
* arrays, merging into strings: Join Function. (line 6)
* arrays, multidimensional: Multidimensional. (line 10)
* arrays, multidimensional, scanning: Multiscanning. (line 11)
-* arrays, names of, and names of functions/variables: Arrays. (line 18)
* arrays, numeric subscripts: Numeric Array Subscripts.
(line 6)
* arrays, referencing elements: Reference to Elements.
@@ -31420,12 +31470,12 @@ Index
* ASCII: Ordinal Functions. (line 45)
* asort <1>: Array Sorting Functions.
(line 6)
-* asort: String Functions. (line 42)
+* asort: String Functions. (line 41)
* asort() function (gawk), arrays, sorting: Array Sorting Functions.
(line 6)
* asorti <1>: Array Sorting Functions.
(line 6)
-* asorti: String Functions. (line 42)
+* asorti: String Functions. (line 41)
* asorti() function (gawk), arrays, sorting: Array Sorting Functions.
(line 6)
* assert() function (C library): Assert Function. (line 6)
@@ -31442,8 +31492,8 @@ Index
(line 55)
* asterisk (*), * operator, as regexp operator: Regexp Operators.
(line 89)
-* asterisk (*), * operator, null strings, matching: Gory Details.
- (line 143)
+* asterisk (*), * operator, null strings, matching: String Functions.
+ (line 535)
* asterisk (*), ** operator <1>: Precedence. (line 49)
* asterisk (*), ** operator: Arithmetic Ops. (line 81)
* asterisk (*), **= operator <1>: Precedence. (line 95)
@@ -31455,7 +31505,7 @@ Index
* awf (amazingly workable formatter) program: Glossary. (line 24)
* awk debugging, enabling: Options. (line 108)
* awk language, POSIX version: Assignment Ops. (line 137)
-* awk profiling, enabling: Options. (line 240)
+* awk profiling, enabling: Options. (line 242)
* awk programs <1>: Two Rules. (line 6)
* awk programs <2>: Executable Scripts. (line 6)
* awk programs: Getting Started. (line 12)
@@ -31481,7 +31531,7 @@ Index
* awk, gawk and <1>: This Manual. (line 14)
* awk, gawk and: Preface. (line 21)
* awk, history of: History. (line 17)
-* awk, implementation issues, pipes: Redirection. (line 135)
+* awk, implementation issues, pipes: Redirection. (line 129)
* awk, implementations: Other Versions. (line 6)
* awk, implementations, limits: Getline Notes. (line 14)
* awk, invoking: Command Line. (line 6)
@@ -31490,7 +31540,7 @@ Index
* awk, POSIX and: Preface. (line 21)
* awk, POSIX and, See Also POSIX awk: Preface. (line 21)
* awk, regexp constants and: Comparison Operators.
- (line 102)
+ (line 103)
* awk, See Also gawk: Preface. (line 34)
* awk, terms describing: This Manual. (line 6)
* awk, uses for <1>: When. (line 6)
@@ -31545,7 +31595,7 @@ Index
* backslash (\), \y operator (gawk): GNU Regexp Operators.
(line 38)
* backslash (\), as field separator: Command Line Field Separator.
- (line 27)
+ (line 24)
* backslash (\), continuing lines and: Statements/Lines. (line 19)
* backslash (\), continuing lines and, comments and: Statements/Lines.
(line 76)
@@ -31576,18 +31626,18 @@ Index
* BEGIN pattern, next/nextfile statements and <1>: Next Statement.
(line 44)
* BEGIN pattern, next/nextfile statements and: I/O And BEGIN/END.
- (line 36)
+ (line 37)
* BEGIN pattern, OFS/ORS variables, assigning values to: Output Separators.
(line 20)
* BEGIN pattern, operators and: Using BEGIN/END. (line 17)
* BEGIN pattern, print statement and: I/O And BEGIN/END. (line 16)
* BEGIN pattern, pwcat program: Passwd Functions. (line 143)
-* BEGIN pattern, running awk programs and: Cut Program. (line 68)
+* BEGIN pattern, running awk programs and: Cut Program. (line 63)
* BEGIN pattern, TEXTDOMAIN variable and: Programmer i18n. (line 60)
* BEGINFILE pattern: BEGINFILE/ENDFILE. (line 6)
* BEGINFILE pattern, Boolean patterns and: Expression Patterns.
(line 70)
-* beginfile() user-defined function: Filetrans Function. (line 62)
+* beginfile() user-defined function: Filetrans Function. (line 61)
* Bentley, Jon: Glossary. (line 143)
* Benzinger, Michael: Contributors. (line 97)
* Berry, Karl <1>: Ranges and Locales. (line 74)
@@ -31601,11 +31651,11 @@ Index
* BINMODE variable <1>: PC Using. (line 33)
* BINMODE variable: User-modified. (line 15)
* bit-manipulation functions: Bitwise Functions. (line 6)
-* bits2str() user-defined function: Bitwise Functions. (line 70)
-* bitwise AND: Bitwise Functions. (line 39)
-* bitwise complement: Bitwise Functions. (line 43)
-* bitwise OR: Bitwise Functions. (line 49)
-* bitwise XOR: Bitwise Functions. (line 55)
+* bits2str() user-defined function: Bitwise Functions. (line 71)
+* bitwise AND: Bitwise Functions. (line 40)
+* bitwise complement: Bitwise Functions. (line 44)
+* bitwise OR: Bitwise Functions. (line 50)
+* bitwise XOR: Bitwise Functions. (line 56)
* bitwise, complement: Bitwise Functions. (line 25)
* bitwise, operations: Bitwise Functions. (line 6)
* bitwise, shift: Bitwise Functions. (line 32)
@@ -31649,8 +31699,8 @@ Index
* Brennan, Michael: Foreword. (line 83)
* Brian Kernighan's awk <1>: I/O Functions. (line 43)
* Brian Kernighan's awk <2>: Gory Details. (line 19)
-* Brian Kernighan's awk <3>: String Functions. (line 490)
-* Brian Kernighan's awk <4>: Delete. (line 48)
+* Brian Kernighan's awk <3>: String Functions. (line 491)
+* Brian Kernighan's awk <4>: Delete. (line 51)
* Brian Kernighan's awk <5>: Nextfile Statement. (line 47)
* Brian Kernighan's awk <6>: Continue Statement. (line 44)
* Brian Kernighan's awk <7>: Break Statement. (line 51)
@@ -31675,8 +31725,8 @@ Index
* Buening, Andreas <2>: Contributors. (line 92)
* Buening, Andreas: Acknowledgments. (line 60)
* buffering, input/output <1>: Two-way I/O. (line 52)
-* buffering, input/output: I/O Functions. (line 140)
-* buffering, interactive vs. noninteractive: I/O Functions. (line 109)
+* buffering, input/output: I/O Functions. (line 139)
+* buffering, interactive vs. noninteractive: I/O Functions. (line 108)
* buffers, flushing: I/O Functions. (line 32)
* buffers, operators for: GNU Regexp Operators.
(line 48)
@@ -31706,7 +31756,7 @@ Index
* case sensitivity, and regexps: User-modified. (line 76)
* case sensitivity, and string comparisons: User-modified. (line 76)
* case sensitivity, array indices and: Array Intro. (line 94)
-* case sensitivity, converting case: String Functions. (line 520)
+* case sensitivity, converting case: String Functions. (line 521)
* case sensitivity, example programs: Library Functions. (line 53)
* case sensitivity, gawk: Case-sensitivity. (line 26)
* case sensitivity, regexps and: Case-sensitivity. (line 6)
@@ -31742,7 +31792,7 @@ Index
* close() function, portability: Close Files And Pipes.
(line 81)
* close() function, return value: Close Files And Pipes.
- (line 131)
+ (line 132)
* close() function, two-way pipes and: Two-way I/O. (line 59)
* Close, Diane <1>: Contributors. (line 20)
* Close, Diane: Manual History. (line 34)
@@ -31778,15 +31828,15 @@ Index
* commenting, backslash continuation and: Statements/Lines. (line 76)
* common extensions, ** operator: Arithmetic Ops. (line 30)
* common extensions, **= operator: Assignment Ops. (line 137)
-* common extensions, /dev/stderr special file: Special FD. (line 46)
-* common extensions, /dev/stdin special file: Special FD. (line 46)
-* common extensions, /dev/stdout special file: Special FD. (line 46)
+* common extensions, /dev/stderr special file: Special FD. (line 48)
+* common extensions, /dev/stdin special file: Special FD. (line 48)
+* common extensions, /dev/stdout special file: Special FD. (line 48)
* common extensions, \x escape sequence: Escape Sequences. (line 61)
* common extensions, BINMODE variable: PC Using. (line 33)
* common extensions, delete to delete entire arrays: Delete. (line 39)
* common extensions, func keyword: Definition Syntax. (line 93)
* common extensions, length() applied to an array: String Functions.
- (line 197)
+ (line 200)
* common extensions, RS as a regexp: gawk split records. (line 6)
* common extensions, single character fields: Single Character Fields.
(line 6)
@@ -31795,7 +31845,7 @@ Index
(line 9)
* comparison expressions, as patterns: Expression Patterns. (line 14)
* comparison expressions, string vs. regexp: Comparison Operators.
- (line 78)
+ (line 79)
* compatibility mode (gawk), extensions: POSIX/GNU. (line 6)
* compatibility mode (gawk), file names: Special Caveats. (line 9)
* compatibility mode (gawk), hexadecimal numbers: Nondecimal-numbers.
@@ -31809,7 +31859,7 @@ Index
* compiling gawk for MS-DOS and MS-Windows: PC Compiling. (line 13)
* compiling gawk for VMS: VMS Compilation. (line 6)
* compiling gawk with EMX for OS/2: PC Compiling. (line 28)
-* compl: Bitwise Functions. (line 43)
+* compl: Bitwise Functions. (line 44)
* complement, bitwise: Bitwise Functions. (line 25)
* compound statements, control statements and: Statements. (line 10)
* concatenating: Concatenation. (line 8)
@@ -31835,15 +31885,15 @@ Index
* control statements: Statements. (line 6)
* controlling array scanning order: Controlling Scanning.
(line 14)
-* convert string to lower case: String Functions. (line 521)
-* convert string to number: String Functions. (line 388)
-* convert string to upper case: String Functions. (line 527)
+* convert string to lower case: String Functions. (line 522)
+* convert string to number: String Functions. (line 389)
+* convert string to upper case: String Functions. (line 528)
* converting integer array subscripts: Numeric Array Subscripts.
(line 31)
* converting, dates to timestamps: Time Functions. (line 76)
-* converting, numbers to strings <1>: Bitwise Functions. (line 109)
+* converting, numbers to strings <1>: Bitwise Functions. (line 110)
* converting, numbers to strings: Strings And Numbers. (line 6)
-* converting, strings to numbers <1>: Bitwise Functions. (line 109)
+* converting, strings to numbers <1>: Bitwise Functions. (line 110)
* converting, strings to numbers: Strings And Numbers. (line 6)
* CONVFMT variable <1>: User-modified. (line 30)
* CONVFMT variable: Strings And Numbers. (line 29)
@@ -31851,7 +31901,7 @@ Index
(line 6)
* cookie: Glossary. (line 149)
* coprocesses <1>: Two-way I/O. (line 25)
-* coprocesses: Redirection. (line 102)
+* coprocesses: Redirection. (line 96)
* coprocesses, closing: Close Files And Pipes.
(line 6)
* coprocesses, getline from: Getline/Coprocess. (line 6)
@@ -31859,7 +31909,7 @@ Index
* cosine: Numeric Functions. (line 15)
* counting: Wc Program. (line 6)
* csh utility: Statements/Lines. (line 44)
-* csh utility, POSIXLY_CORRECT environment variable: Options. (line 351)
+* csh utility, POSIXLY_CORRECT environment variable: Options. (line 354)
* csh utility, |& operator, comparison with: Two-way I/O. (line 25)
* ctime() user-defined function: Function Example. (line 74)
* currency symbols, localization: Explaining gettext. (line 104)
@@ -31884,11 +31934,11 @@ Index
(line 43)
* dark corner, break statement: Break Statement. (line 51)
* dark corner, close() function: Close Files And Pipes.
- (line 131)
+ (line 132)
* dark corner, command-line arguments: Assignment Options. (line 43)
* dark corner, continue statement: Continue Statement. (line 44)
* dark corner, CONVFMT variable: Strings And Numbers. (line 40)
-* dark corner, escape sequences: Other Arguments. (line 35)
+* dark corner, escape sequences: Other Arguments. (line 38)
* dark corner, escape sequences, for metacharacters: Escape Sequences.
(line 142)
* dark corner, exit statement: Exit Statement. (line 30)
@@ -31900,9 +31950,9 @@ Index
* dark corner, format-control characters: Control Letters. (line 18)
* dark corner, FS as null string: Single Character Fields.
(line 20)
-* dark corner, input files: awk split records. (line 110)
+* dark corner, input files: awk split records. (line 111)
* dark corner, invoking awk: Command Line. (line 16)
-* dark corner, length() function: String Functions. (line 183)
+* dark corner, length() function: String Functions. (line 186)
* dark corner, locale's decimal point character: Locale influences conversions.
(line 17)
* dark corner, multiline records: Multiple Line. (line 35)
@@ -31914,7 +31964,7 @@ Index
(line 148)
* dark corner, regexp constants, as arguments to user-defined functions: Using Constant Regexps.
(line 43)
-* dark corner, split() function: String Functions. (line 359)
+* dark corner, split() function: String Functions. (line 360)
* dark corner, strings, storing: gawk split records. (line 83)
* dark corner, value of ARGV[0]: Auto-set. (line 39)
* data, fixed-width: Constant Size. (line 10)
@@ -32043,7 +32093,7 @@ Index
* debugger, read commands from a file: Debugger Info. (line 96)
* debugging awk programs: Debugger. (line 6)
* debugging gawk, bug reports: Bugs. (line 9)
-* decimal point character, locale specific: Options. (line 268)
+* decimal point character, locale specific: Options. (line 270)
* decrement operators: Increment Ops. (line 35)
* default keyword: Switch Statement. (line 6)
* Deifik, Scott <1>: Bugs. (line 71)
@@ -32060,7 +32110,7 @@ Index
* deleting entire arrays: Delete. (line 39)
* Demaille, Akim: Acknowledgments. (line 60)
* describe call stack frame, in debugger: Debugger Info. (line 27)
-* differences between gawk and awk: String Functions. (line 197)
+* differences between gawk and awk: String Functions. (line 200)
* differences in awk and gawk, ARGC/ARGV variables: ARGC and ARGV.
(line 90)
* differences in awk and gawk, ARGIND variable: Auto-set. (line 44)
@@ -32083,7 +32133,7 @@ Index
* differences in awk and gawk, command-line directories: Command-line directories.
(line 6)
* differences in awk and gawk, ERRNO variable: Auto-set. (line 82)
-* differences in awk and gawk, error messages: Special FD. (line 16)
+* differences in awk and gawk, error messages: Special FD. (line 19)
* differences in awk and gawk, FIELDWIDTHS variable: User-modified.
(line 37)
* differences in awk and gawk, FPAT variable: User-modified. (line 43)
@@ -32094,26 +32144,26 @@ Index
* differences in awk and gawk, IGNORECASE variable: User-modified.
(line 76)
* differences in awk and gawk, implementation limitations <1>: Redirection.
- (line 135)
+ (line 129)
* differences in awk and gawk, implementation limitations: Getline Notes.
(line 14)
* differences in awk and gawk, indirect function calls: Indirect Calls.
(line 6)
* differences in awk and gawk, input/output operators <1>: Redirection.
- (line 102)
+ (line 96)
* differences in awk and gawk, input/output operators: Getline/Coprocess.
(line 6)
* differences in awk and gawk, line continuations: Conditional Exp.
(line 34)
* differences in awk and gawk, LINT variable: User-modified. (line 88)
* differences in awk and gawk, match() function: String Functions.
- (line 260)
+ (line 262)
* differences in awk and gawk, print/printf statements: Format Modifiers.
(line 13)
* differences in awk and gawk, PROCINFO array: Auto-set. (line 137)
* differences in awk and gawk, read timeouts: Read Timeout. (line 6)
* differences in awk and gawk, record separators: awk split records.
- (line 124)
+ (line 125)
* differences in awk and gawk, regexp constants: Using Constant Regexps.
(line 43)
* differences in awk and gawk, regular expressions: Case-sensitivity.
@@ -32124,13 +32174,13 @@ Index
* differences in awk and gawk, single-character fields: Single Character Fields.
(line 6)
* differences in awk and gawk, split() function: String Functions.
- (line 347)
+ (line 348)
* differences in awk and gawk, strings: Scalar Constants. (line 20)
* differences in awk and gawk, strings, storing: gawk split records.
(line 77)
* differences in awk and gawk, SYMTAB variable: Auto-set. (line 276)
* differences in awk and gawk, TEXTDOMAIN variable: User-modified.
- (line 152)
+ (line 151)
* differences in awk and gawk, trunc-mod operation: Arithmetic Ops.
(line 66)
* directories, command-line: Command-line directories.
@@ -32185,7 +32235,7 @@ Index
* empty array elements: Reference to Elements.
(line 18)
* empty pattern: Empty. (line 6)
-* empty strings: awk split records. (line 114)
+* empty strings: awk split records. (line 115)
* empty strings, See null strings: Regexp Field Splitting.
(line 43)
* enable breakpoint: Breakpoint Control. (line 73)
@@ -32202,16 +32252,16 @@ Index
* END pattern, next/nextfile statements and <1>: Next Statement.
(line 44)
* END pattern, next/nextfile statements and: I/O And BEGIN/END.
- (line 36)
+ (line 37)
* END pattern, operators and: Using BEGIN/END. (line 17)
* END pattern, print statement and: I/O And BEGIN/END. (line 16)
* ENDFILE pattern: BEGINFILE/ENDFILE. (line 6)
* ENDFILE pattern, Boolean patterns and: Expression Patterns. (line 70)
-* endfile() user-defined function: Filetrans Function. (line 62)
-* endgrent() function (C library): Group Functions. (line 212)
-* endgrent() user-defined function: Group Functions. (line 215)
-* endpwent() function (C library): Passwd Functions. (line 210)
-* endpwent() user-defined function: Passwd Functions. (line 213)
+* endfile() user-defined function: Filetrans Function. (line 61)
+* endgrent() function (C library): Group Functions. (line 211)
+* endgrent() user-defined function: Group Functions. (line 214)
+* endpwent() function (C library): Passwd Functions. (line 207)
+* endpwent() user-defined function: Passwd Functions. (line 210)
* English, Steve: Advanced Features. (line 6)
* ENVIRON array: Auto-set. (line 60)
* environment variables used by gawk: Environment Variables.
@@ -32227,9 +32277,9 @@ Index
* ERRNO variable: Auto-set. (line 82)
* ERRNO variable, with BEGINFILE pattern: BEGINFILE/ENDFILE. (line 26)
* ERRNO variable, with close() function: Close Files And Pipes.
- (line 139)
+ (line 140)
* ERRNO variable, with getline command: Getline. (line 19)
-* error handling: Special FD. (line 16)
+* error handling: Special FD. (line 19)
* error handling, ERRNO variable and: Auto-set. (line 82)
* error output: Special FD. (line 6)
* escape processing, gsub()/gensub()/sub() functions: Gory Details.
@@ -32245,7 +32295,7 @@ Index
* examining fields: Fields. (line 6)
* exclamation point (!), ! operator <1>: Egrep Program. (line 175)
* exclamation point (!), ! operator <2>: Precedence. (line 52)
-* exclamation point (!), ! operator: Boolean Ops. (line 67)
+* exclamation point (!), ! operator: Boolean Ops. (line 69)
* exclamation point (!), != operator <1>: Precedence. (line 65)
* exclamation point (!), != operator: Comparison Operators.
(line 11)
@@ -32296,16 +32346,16 @@ Index
* extensions, Brian Kernighan's awk: BTL. (line 6)
* extensions, common, ** operator: Arithmetic Ops. (line 30)
* extensions, common, **= operator: Assignment Ops. (line 137)
-* extensions, common, /dev/stderr special file: Special FD. (line 46)
-* extensions, common, /dev/stdin special file: Special FD. (line 46)
-* extensions, common, /dev/stdout special file: Special FD. (line 46)
+* extensions, common, /dev/stderr special file: Special FD. (line 48)
+* extensions, common, /dev/stdin special file: Special FD. (line 48)
+* extensions, common, /dev/stdout special file: Special FD. (line 48)
* extensions, common, \x escape sequence: Escape Sequences. (line 61)
* extensions, common, BINMODE variable: PC Using. (line 33)
* extensions, common, delete to delete entire arrays: Delete. (line 39)
* extensions, common, fflush() function: I/O Functions. (line 43)
* extensions, common, func keyword: Definition Syntax. (line 93)
* extensions, common, length() applied to an array: String Functions.
- (line 197)
+ (line 200)
* extensions, common, RS as a regexp: gawk split records. (line 6)
* extensions, common, single character fields: Single Character Fields.
(line 6)
@@ -32345,7 +32395,7 @@ Index
(line 6)
* field separators, regular expressions as: Field Separators. (line 51)
* field separators, See Also OFS: Changing Fields. (line 64)
-* field separators, spaces as: Cut Program. (line 108)
+* field separators, spaces as: Cut Program. (line 103)
* fields <1>: Basic High Level. (line 73)
* fields <2>: Fields. (line 6)
* fields: Reading Files. (line 14)
@@ -32365,7 +32415,7 @@ Index
* file inclusion, @include directive: Include Files. (line 8)
* file names, distinguishing: Auto-set. (line 56)
* file names, in compatibility mode: Special Caveats. (line 9)
-* file names, standard streams in gawk: Special FD. (line 46)
+* file names, standard streams in gawk: Special FD. (line 48)
* FILENAME variable <1>: Auto-set. (line 98)
* FILENAME variable: Reading Files. (line 6)
* FILENAME variable, getline, setting with: Getline Notes. (line 19)
@@ -32378,7 +32428,7 @@ Index
* files, .po: Explaining gettext. (line 37)
* files, .po, converting to .mo: I18N Example. (line 63)
* files, .pot: Explaining gettext. (line 31)
-* files, /dev/... special files: Special FD. (line 46)
+* files, /dev/... special files: Special FD. (line 48)
* files, /inet/... (gawk): TCP/IP Networking. (line 6)
* files, /inet4/... (gawk): TCP/IP Networking. (line 6)
* files, /inet6/... (gawk): TCP/IP Networking. (line 6)
@@ -32401,7 +32451,7 @@ Index
(line 47)
* files, message object, specifying directory of: Explaining gettext.
(line 54)
-* files, multiple passes over: Other Arguments. (line 53)
+* files, multiple passes over: Other Arguments. (line 56)
* files, multiple, duplicating output into: Tee Program. (line 6)
* files, output, See output files: Close Files And Pipes.
(line 6)
@@ -32427,7 +32477,7 @@ Index
* Fish, Fred: Contributors. (line 50)
* fixed-width data: Constant Size. (line 10)
* flag variables <1>: Tee Program. (line 20)
-* flag variables: Boolean Ops. (line 67)
+* flag variables: Boolean Ops. (line 69)
* floating-point, numbers, arbitrary precision: Arbitrary Precision Arithmetic.
(line 6)
* floating-point, VAX/VMS: VMS Running. (line 51)
@@ -32450,7 +32500,7 @@ Index
* format time string: Time Functions. (line 48)
* formats, numeric output: OFMT. (line 6)
* formatting output: Printf. (line 6)
-* formatting strings: String Functions. (line 381)
+* formatting strings: String Functions. (line 382)
* forward slash (/) to enclose regular expressions: Regexp. (line 10)
* forward slash (/), / operator: Precedence. (line 55)
* forward slash (/), /= operator <1>: Precedence. (line 95)
@@ -32473,9 +32523,9 @@ Index
* FS variable, --field-separator option and: Options. (line 21)
* FS variable, as null string: Single Character Fields.
(line 20)
-* FS variable, as TAB character: Options. (line 264)
+* FS variable, as TAB character: Options. (line 266)
* FS variable, changing value of: Field Separators. (line 35)
-* FS variable, running awk programs and: Cut Program. (line 68)
+* FS variable, running awk programs and: Cut Program. (line 63)
* FS variable, setting from command line: Command Line Field Separator.
(line 6)
* FS, containing ^: Regexp Field Splitting.
@@ -32500,7 +32550,7 @@ Index
* functions, defining: Definition Syntax. (line 9)
* functions, library: Library Functions. (line 6)
* functions, library, assertions: Assert Function. (line 6)
-* functions, library, associative arrays and: Library Names. (line 57)
+* functions, library, associative arrays and: Library Names. (line 58)
* functions, library, C library: Getopt Function. (line 6)
* functions, library, character values as numbers: Ordinal Functions.
(line 6)
@@ -32520,8 +32570,7 @@ Index
* functions, library, rounding numbers: Round Function. (line 6)
* functions, library, user database, reading: Passwd Functions.
(line 6)
-* functions, names of <1>: Definition Syntax. (line 23)
-* functions, names of: Arrays. (line 18)
+* functions, names of: Definition Syntax. (line 23)
* functions, recursive: Definition Syntax. (line 83)
* functions, string-translation: I18N Functions. (line 6)
* functions, undefined: Pass By Value/Reference.
@@ -32538,18 +32587,16 @@ Index
* Garfinkle, Scott: Contributors. (line 34)
* gawk program, dynamic profiling: Profiling. (line 179)
* gawk version: Auto-set. (line 214)
-* gawk, ARGIND variable in: Other Arguments. (line 12)
+* gawk, ARGIND variable in: Other Arguments. (line 15)
* gawk, awk and <1>: This Manual. (line 14)
* gawk, awk and: Preface. (line 21)
-* gawk, bitwise operations in: Bitwise Functions. (line 39)
+* gawk, bitwise operations in: Bitwise Functions. (line 40)
* gawk, break statement in: Break Statement. (line 51)
* gawk, built-in variables and: Built-in Variables. (line 14)
* gawk, character classes and: Bracket Expressions. (line 100)
* gawk, coding style in: Adding Code. (line 39)
* gawk, command-line options, and regular expressions: GNU Regexp Operators.
(line 70)
-* gawk, comparison operators and: Comparison Operators.
- (line 50)
* gawk, configuring: Configuration Philosophy.
(line 6)
* gawk, configuring, options: Additional Configuration Options.
@@ -32561,10 +32608,10 @@ Index
* gawk, ERRNO variable in <2>: Auto-set. (line 82)
* gawk, ERRNO variable in <3>: BEGINFILE/ENDFILE. (line 26)
* gawk, ERRNO variable in <4>: Close Files And Pipes.
- (line 139)
+ (line 140)
* gawk, ERRNO variable in: Getline. (line 19)
* gawk, escape sequences: Escape Sequences. (line 132)
-* gawk, extensions, disabling: Options. (line 252)
+* gawk, extensions, disabling: Options. (line 254)
* gawk, features, adding: Adding Code. (line 6)
* gawk, features, advanced: Advanced Features. (line 6)
* gawk, field separators and: User-modified. (line 71)
@@ -32580,7 +32627,7 @@ Index
* gawk, hexadecimal numbers and: Nondecimal-numbers. (line 42)
* gawk, IGNORECASE variable in <1>: Array Sorting Functions.
(line 83)
-* gawk, IGNORECASE variable in <2>: String Functions. (line 58)
+* gawk, IGNORECASE variable in <2>: String Functions. (line 57)
* gawk, IGNORECASE variable in <3>: Array Intro. (line 94)
* gawk, IGNORECASE variable in <4>: User-modified. (line 76)
* gawk, IGNORECASE variable in: Case-sensitivity. (line 26)
@@ -32589,7 +32636,7 @@ Index
* gawk, implementation issues, downward compatibility: Compatibility Mode.
(line 6)
* gawk, implementation issues, limits: Getline Notes. (line 14)
-* gawk, implementation issues, pipes: Redirection. (line 135)
+* gawk, implementation issues, pipes: Redirection. (line 129)
* gawk, installing: Installation. (line 6)
* gawk, internationalization and, See internationalization: Internationalization.
(line 13)
@@ -32616,16 +32663,16 @@ Index
* gawk, regular expressions, precedence: Regexp Operators. (line 161)
* gawk, RT variable in <1>: Auto-set. (line 272)
* gawk, RT variable in <2>: Multiple Line. (line 129)
-* gawk, RT variable in: awk split records. (line 124)
+* gawk, RT variable in: awk split records. (line 125)
* gawk, See Also awk: Preface. (line 34)
* gawk, source code, obtaining: Getting. (line 6)
* gawk, splitting fields and: Constant Size. (line 88)
* gawk, string-translation functions: I18N Functions. (line 6)
* gawk, SYMTAB array in: Auto-set. (line 276)
-* gawk, TEXTDOMAIN variable in: User-modified. (line 152)
+* gawk, TEXTDOMAIN variable in: User-modified. (line 151)
* gawk, timestamps: Time Functions. (line 6)
* gawk, uses for: Preface. (line 34)
-* gawk, versions of, information about, printing: Options. (line 298)
+* gawk, versions of, information about, printing: Options. (line 300)
* gawk, VMS version of: VMS Installation. (line 6)
* gawk, word-boundary operator: GNU Regexp Operators.
(line 63)
@@ -32641,12 +32688,12 @@ Index
* getaddrinfo() function (C library): TCP/IP Networking. (line 38)
* getgrent() function (C library): Group Functions. (line 6)
* getgrent() user-defined function: Group Functions. (line 6)
-* getgrgid() function (C library): Group Functions. (line 183)
-* getgrgid() user-defined function: Group Functions. (line 186)
-* getgrnam() function (C library): Group Functions. (line 172)
-* getgrnam() user-defined function: Group Functions. (line 177)
-* getgruser() function (C library): Group Functions. (line 192)
-* getgruser() function, user-defined: Group Functions. (line 195)
+* getgrgid() function (C library): Group Functions. (line 182)
+* getgrgid() user-defined function: Group Functions. (line 185)
+* getgrnam() function (C library): Group Functions. (line 171)
+* getgrnam() user-defined function: Group Functions. (line 176)
+* getgruser() function (C library): Group Functions. (line 191)
+* getgruser() function, user-defined: Group Functions. (line 194)
* getline command: Reading Files. (line 20)
* getline command, _gr_init() user-defined function: Group Functions.
(line 83)
@@ -32670,10 +32717,10 @@ Index
* getopt() user-defined function: Getopt Function. (line 108)
* getpwent() function (C library): Passwd Functions. (line 16)
* getpwent() user-defined function: Passwd Functions. (line 16)
-* getpwnam() function (C library): Passwd Functions. (line 177)
-* getpwnam() user-defined function: Passwd Functions. (line 182)
-* getpwuid() function (C library): Passwd Functions. (line 188)
-* getpwuid() user-defined function: Passwd Functions. (line 192)
+* getpwnam() function (C library): Passwd Functions. (line 174)
+* getpwnam() user-defined function: Passwd Functions. (line 179)
+* getpwuid() function (C library): Passwd Functions. (line 185)
+* getpwuid() user-defined function: Passwd Functions. (line 189)
* gettext library: Explaining gettext. (line 6)
* gettext library, locale categories: Explaining gettext. (line 81)
* gettext() function (C library): Explaining gettext. (line 63)
@@ -32712,7 +32759,7 @@ Index
* gsub <1>: String Functions. (line 139)
* gsub: Using Constant Regexps.
(line 43)
-* gsub() function, arguments of: String Functions. (line 460)
+* gsub() function, arguments of: String Functions. (line 461)
* gsub() function, escape processing: Gory Details. (line 6)
* h debugger command (alias for help): Miscellaneous Debugger Commands.
(line 66)
@@ -32758,9 +32805,9 @@ Index
* Illumos, POSIX-compliant awk: Other Versions. (line 105)
* implementation issues, gawk: Notes. (line 6)
* implementation issues, gawk, debugging: Compatibility Mode. (line 6)
-* implementation issues, gawk, limits <1>: Redirection. (line 135)
+* implementation issues, gawk, limits <1>: Redirection. (line 129)
* implementation issues, gawk, limits: Getline Notes. (line 14)
-* in operator <1>: For Statement. (line 75)
+* in operator <1>: For Statement. (line 76)
* in operator <2>: Precedence. (line 83)
* in operator: Comparison Operators.
(line 11)
@@ -32789,9 +32836,9 @@ Index
* input files, examples: Sample Data Files. (line 6)
* input files, reading: Reading Files. (line 6)
* input files, running awk without: Read Terminal. (line 6)
-* input files, variable assignments and: Other Arguments. (line 23)
+* input files, variable assignments and: Other Arguments. (line 26)
* input pipeline: Getline/Pipe. (line 9)
-* input record, length of: String Functions. (line 174)
+* input record, length of: String Functions. (line 177)
* input redirection: Getline/File. (line 6)
* input, data, nondecimal: Nondecimal Data. (line 6)
* input, explicit: Getline. (line 6)
@@ -32815,12 +32862,12 @@ Index
* integers, arbitrary precision: Arbitrary Precision Integers.
(line 6)
* integers, unsigned: Computer Arithmetic. (line 41)
-* interacting with other programs: I/O Functions. (line 75)
+* interacting with other programs: I/O Functions. (line 74)
* internationalization <1>: I18N and L10N. (line 6)
* internationalization: I18N Functions. (line 6)
* internationalization, localization <1>: Internationalization.
(line 13)
-* internationalization, localization: User-modified. (line 152)
+* internationalization, localization: User-modified. (line 151)
* internationalization, localization, character classes: Bracket Expressions.
(line 100)
* internationalization, localization, gawk and: Internationalization.
@@ -32836,7 +32883,7 @@ Index
* interpreted programs: Basic High Level. (line 15)
* interval expressions, regexp operator: Regexp Operators. (line 116)
* inventory-shipped file: Sample Data Files. (line 32)
-* invoke shell command: I/O Functions. (line 75)
+* invoke shell command: I/O Functions. (line 74)
* isarray: Type Functions. (line 11)
* ISO: Glossary. (line 367)
* ISO 8859-1: Glossary. (line 133)
@@ -32889,19 +32936,19 @@ Index
* left angle bracket (<), <= operator <1>: Precedence. (line 65)
* left angle bracket (<), <= operator: Comparison Operators.
(line 11)
-* left shift: Bitwise Functions. (line 46)
+* left shift: Bitwise Functions. (line 47)
* left shift, bitwise: Bitwise Functions. (line 32)
* leftmost longest match: Multiple Line. (line 26)
-* length: String Functions. (line 167)
-* length of input record: String Functions. (line 174)
-* length of string: String Functions. (line 167)
+* length: String Functions. (line 170)
+* length of input record: String Functions. (line 177)
+* length of string: String Functions. (line 170)
* Lesser General Public License (LGPL): Glossary. (line 396)
* LGPL (Lesser General Public License): Glossary. (line 396)
* libmawk: Other Versions. (line 121)
* libraries of awk functions: Library Functions. (line 6)
* libraries of awk functions, assertions: Assert Function. (line 6)
* libraries of awk functions, associative arrays and: Library Names.
- (line 57)
+ (line 58)
* libraries of awk functions, character values as numbers: Ordinal Functions.
(line 6)
* libraries of awk functions, command-line options: Getopt Function.
@@ -32921,7 +32968,7 @@ Index
* libraries of awk functions, user database, reading: Passwd Functions.
(line 6)
* line breaks: Statements/Lines. (line 6)
-* line continuations: Boolean Ops. (line 62)
+* line continuations: Boolean Ops. (line 64)
* line continuations, gawk: Conditional Exp. (line 34)
* line continuations, in print statement: Print Examples. (line 76)
* line continuations, with C shell: More Complex. (line 30)
@@ -32937,7 +32984,7 @@ Index
* lint checking, empty programs: Command Line. (line 16)
* lint checking, issuing warnings: Options. (line 185)
* lint checking, POSIXLY_CORRECT environment variable: Options.
- (line 336)
+ (line 339)
* lint checking, undefined functions: Pass By Value/Reference.
(line 88)
* LINT variable: User-modified. (line 88)
@@ -32953,7 +33000,7 @@ Index
* loading, extensions: Options. (line 173)
* local variables, in a function: Variable Scope. (line 6)
* locale categories: Explaining gettext. (line 81)
-* locale decimal point character: Options. (line 268)
+* locale decimal point character: Options. (line 270)
* locale, definition of: Locales. (line 6)
* localization: I18N and L10N. (line 6)
* localization, See internationalization, localization: I18N and L10N.
@@ -32967,7 +33014,7 @@ Index
* long options: Command Line. (line 13)
* loops: While Statement. (line 6)
* loops, break statement and: Break Statement. (line 6)
-* loops, continue statements and: For Statement. (line 64)
+* loops, continue statements and: For Statement. (line 65)
* loops, count for header, in a profile: Profiling. (line 131)
* loops, do-while: Do Statement. (line 6)
* loops, exiting: Break Statement. (line 6)
@@ -32976,7 +33023,7 @@ Index
* loops, See Also while statement: While Statement. (line 6)
* loops, while: While Statement. (line 6)
* ls utility: More Complex. (line 15)
-* lshift: Bitwise Functions. (line 46)
+* lshift: Bitwise Functions. (line 47)
* lvalues/rvalues: Assignment Ops. (line 32)
* mail-list file: Sample Data Files. (line 6)
* mailing labels, printing: Labels Program. (line 6)
@@ -32988,14 +33035,14 @@ Index
(line 6)
* marked strings, extracting: String Extraction. (line 6)
* Marx, Groucho: Increment Ops. (line 60)
-* match: String Functions. (line 207)
-* match regexp in string: String Functions. (line 207)
+* match: String Functions. (line 210)
+* match regexp in string: String Functions. (line 210)
* match() function, RSTART/RLENGTH variables: String Functions.
- (line 224)
+ (line 227)
* matching, expressions, See comparison expressions: Typing and Comparison.
(line 9)
* matching, leftmost longest: Multiple Line. (line 26)
-* matching, null strings: Gory Details. (line 143)
+* matching, null strings: String Functions. (line 535)
* mawk utility <1>: Other Versions. (line 44)
* mawk utility <2>: Nextfile Statement. (line 47)
* mawk utility <3>: Concatenation. (line 36)
@@ -33025,18 +33072,16 @@ Index
* multiple-line records: Multiple Line. (line 6)
* n debugger command (alias for next): Debugger Execution Control.
(line 43)
-* names, arrays/variables <1>: Library Names. (line 6)
-* names, arrays/variables: Arrays. (line 18)
+* names, arrays/variables: Library Names. (line 6)
* names, functions <1>: Library Names. (line 6)
* names, functions: Definition Syntax. (line 23)
-* namespace issues <1>: Library Names. (line 6)
-* namespace issues: Arrays. (line 18)
+* namespace issues: Library Names. (line 6)
* namespace issues, functions: Definition Syntax. (line 23)
* NetBSD: Glossary. (line 611)
* networks, programming: TCP/IP Networking. (line 6)
* networks, support for: Special Network. (line 6)
-* newlines <1>: Boolean Ops. (line 67)
-* newlines <2>: Options. (line 258)
+* newlines <1>: Boolean Ops. (line 69)
+* newlines <2>: Options. (line 260)
* newlines: Statements/Lines. (line 6)
* newlines, as field separators: Default Field Splitting.
(line 6)
@@ -33051,14 +33096,14 @@ Index
(line 43)
* next file statement: Feature History. (line 169)
* next statement <1>: Next Statement. (line 6)
-* next statement: Boolean Ops. (line 93)
-* next statement, BEGIN/END patterns and: I/O And BEGIN/END. (line 36)
+* next statement: Boolean Ops. (line 95)
+* next statement, BEGIN/END patterns and: I/O And BEGIN/END. (line 37)
* next statement, BEGINFILE/ENDFILE patterns and: BEGINFILE/ENDFILE.
(line 49)
* next statement, user-defined functions and: Next Statement. (line 44)
* nextfile statement: Nextfile Statement. (line 6)
* nextfile statement, BEGIN/END patterns and: I/O And BEGIN/END.
- (line 36)
+ (line 37)
* nextfile statement, BEGINFILE/ENDFILE patterns and: BEGINFILE/ENDFILE.
(line 26)
* nextfile statement, user-defined functions and: Nextfile Statement.
@@ -33081,16 +33126,16 @@ Index
* null strings <2>: Truth Values. (line 6)
* null strings <3>: Regexp Field Splitting.
(line 43)
-* null strings: awk split records. (line 114)
-* null strings in gawk arguments, quoting and: Quoting. (line 79)
+* null strings: awk split records. (line 115)
+* null strings in gawk arguments, quoting and: Quoting. (line 82)
* null strings, and deleting array elements: Delete. (line 27)
* null strings, as array subscripts: Uninitialized Subscripts.
(line 43)
* null strings, converting numbers to strings: Strings And Numbers.
(line 21)
-* null strings, matching: Gory Details. (line 143)
-* number as string of bits: Bitwise Functions. (line 109)
-* number of array elements: String Functions. (line 197)
+* null strings, matching: String Functions. (line 535)
+* number as string of bits: Bitwise Functions. (line 110)
+* number of array elements: String Functions. (line 200)
* number sign (#), #! (executable scripts): Executable Scripts.
(line 6)
* number sign (#), commenting: Comments. (line 6)
@@ -33099,7 +33144,7 @@ Index
* numbers, as values of characters: Ordinal Functions. (line 6)
* numbers, Cliff random: Cliff Random Function.
(line 6)
-* numbers, converting <1>: Bitwise Functions. (line 109)
+* numbers, converting <1>: Bitwise Functions. (line 110)
* numbers, converting: Strings And Numbers. (line 6)
* numbers, converting, to strings: User-modified. (line 30)
* numbers, hexadecimal: Nondecimal-numbers. (line 6)
@@ -33117,7 +33162,7 @@ Index
* OFMT variable <2>: Strings And Numbers. (line 57)
* OFMT variable: OFMT. (line 15)
* OFMT variable, POSIX awk and: OFMT. (line 27)
-* OFS variable <1>: User-modified. (line 114)
+* OFS variable <1>: User-modified. (line 113)
* OFS variable <2>: Output Separators. (line 6)
* OFS variable: Changing Fields. (line 64)
* OpenBSD: Glossary. (line 611)
@@ -33147,7 +33192,7 @@ Index
* operators, precedence: Increment Ops. (line 60)
* operators, relational, See operators, comparison: Typing and Comparison.
(line 9)
-* operators, short-circuit: Boolean Ops. (line 57)
+* operators, short-circuit: Boolean Ops. (line 59)
* operators, string: Concatenation. (line 8)
* operators, string-matching: Regexp Usage. (line 19)
* operators, string-matching, for buffers: GNU Regexp Operators.
@@ -33163,14 +33208,14 @@ Index
* options, long <1>: Options. (line 6)
* options, long: Command Line. (line 13)
* options, printing list of: Options. (line 154)
-* or: Bitwise Functions. (line 49)
+* or: Bitwise Functions. (line 50)
* OR bitwise operation: Bitwise Functions. (line 6)
* or Boolean-logic operator: Boolean Ops. (line 6)
* ord() extension function: Extension Sample Ord.
(line 12)
* ord() user-defined function: Ordinal Functions. (line 16)
* order of evaluation, concatenation: Concatenation. (line 41)
-* ORS variable <1>: User-modified. (line 119)
+* ORS variable <1>: User-modified. (line 118)
* ORS variable: Output Separators. (line 20)
* output field separator, See OFS variable: Changing Fields. (line 64)
* output record separator, See ORS variable: Output Separators.
@@ -33194,7 +33239,7 @@ Index
* parentheses (), in a profile: Profiling. (line 146)
* parentheses (), regexp operator: Regexp Operators. (line 81)
* password file: Passwd Functions. (line 16)
-* patsplit: String Functions. (line 294)
+* patsplit: String Functions. (line 296)
* patterns: Patterns and Actions.
(line 6)
* patterns, comparison expressions as: Expression Patterns. (line 14)
@@ -33250,15 +33295,15 @@ Index
* portability, gawk: New Ports. (line 6)
* portability, gettext library and: Explaining gettext. (line 11)
* portability, internationalization and: I18N Portability. (line 6)
-* portability, length() function: String Functions. (line 176)
+* portability, length() function: String Functions. (line 179)
* portability, new awk vs. old awk: Strings And Numbers. (line 57)
* portability, next statement in user-defined functions: Pass By Value/Reference.
(line 91)
* portability, NF variable, decrementing: Changing Fields. (line 115)
* portability, operators: Increment Ops. (line 60)
* portability, operators, not in POSIX awk: Precedence. (line 98)
-* portability, POSIXLY_CORRECT environment variable: Options. (line 356)
-* portability, substr() function: String Functions. (line 510)
+* portability, POSIXLY_CORRECT environment variable: Options. (line 359)
+* portability, substr() function: String Functions. (line 511)
* portable object files <1>: Translator i18n. (line 6)
* portable object files: Explaining gettext. (line 37)
* portable object files, converting to message object files: I18N Example.
@@ -33294,7 +33339,7 @@ Index
* POSIX awk, FS variable and: User-modified. (line 60)
* POSIX awk, function keyword in: Definition Syntax. (line 93)
* POSIX awk, functions and, gsub()/sub(): Gory Details. (line 90)
-* POSIX awk, functions and, length(): String Functions. (line 176)
+* POSIX awk, functions and, length(): String Functions. (line 179)
* POSIX awk, GNU long options and: Options. (line 15)
* POSIX awk, interval expressions in: Regexp Operators. (line 135)
* POSIX awk, next/nextfile statements and: Next Statement. (line 44)
@@ -33302,16 +33347,16 @@ Index
* POSIX awk, OFMT variable and <1>: Strings And Numbers. (line 57)
* POSIX awk, OFMT variable and: OFMT. (line 27)
* POSIX awk, period (.), using: Regexp Operators. (line 51)
-* POSIX awk, printf format strings and: Format Modifiers. (line 159)
+* POSIX awk, printf format strings and: Format Modifiers. (line 158)
* POSIX awk, regular expressions and: Regexp Operators. (line 161)
* POSIX awk, timestamps and: Time Functions. (line 6)
* POSIX awk, | I/O operator and: Getline/Pipe. (line 55)
-* POSIX mode: Options. (line 252)
+* POSIX mode: Options. (line 254)
* POSIX, awk and: Preface. (line 21)
* POSIX, gawk extensions not included in: POSIX/GNU. (line 6)
* POSIX, programs, implementing in awk: Clones. (line 6)
-* POSIXLY_CORRECT environment variable: Options. (line 336)
-* PREC variable: User-modified. (line 124)
+* POSIXLY_CORRECT environment variable: Options. (line 339)
+* PREC variable: User-modified. (line 123)
* precedence <1>: Precedence. (line 6)
* precedence: Increment Ops. (line 60)
* precedence, regexp operators: Regexp Operators. (line 156)
@@ -33322,7 +33367,7 @@ Index
* print statement, commas, omitting: Print Examples. (line 31)
* print statement, I/O operators in: Precedence. (line 71)
* print statement, line continuations and: Print Examples. (line 76)
-* print statement, OFMT variable and: User-modified. (line 114)
+* print statement, OFMT variable and: User-modified. (line 113)
* print statement, See Also redirection, of output: Redirection.
(line 17)
* print statement, sprintf() function and: Round Function. (line 6)
@@ -33364,7 +33409,7 @@ Index
* PROCINFO array, and group membership: Group Functions. (line 6)
* PROCINFO array, and user and group ID numbers: Id Program. (line 15)
* PROCINFO array, testing the field splitting: Passwd Functions.
- (line 161)
+ (line 154)
* PROCINFO array, uses: Auto-set. (line 249)
* PROCINFO, values of sorted_in: Controlling Scanning.
(line 26)
@@ -33373,7 +33418,7 @@ Index
* program identifiers: Auto-set. (line 155)
* program, definition of: Getting Started. (line 21)
* programming conventions, --non-decimal-data option: Nondecimal Data.
- (line 36)
+ (line 35)
* programming conventions, ARGC/ARGV variables: Auto-set. (line 35)
* programming conventions, exit statement: Exit Statement. (line 38)
* programming conventions, function parameters: Return Statement.
@@ -33407,7 +33452,7 @@ Index
(line 99)
* QUIT signal (MS-Windows): Profiling. (line 214)
* quoting in gawk command lines: Long. (line 26)
-* quoting in gawk command lines, tricks for: Quoting. (line 88)
+* quoting in gawk command lines, tricks for: Quoting. (line 91)
* quoting, for small awk programs: Comments. (line 27)
* r debugger command (alias for run): Debugger Execution Control.
(line 62)
@@ -33438,19 +33483,19 @@ Index
* readfile() user-defined function: Readfile Function. (line 30)
* reading input files: Reading Files. (line 6)
* recipe for a programming language: History. (line 6)
-* record separators <1>: User-modified. (line 133)
+* record separators <1>: User-modified. (line 132)
* record separators: awk split records. (line 6)
* record separators, changing: awk split records. (line 85)
* record separators, regular expressions as: awk split records.
- (line 124)
+ (line 125)
* record separators, with multiline records: Multiple Line. (line 10)
* records <1>: Basic High Level. (line 73)
* records: Reading Files. (line 14)
* records, multiline: Multiple Line. (line 6)
* records, printing: Print. (line 22)
* records, splitting input into: Records. (line 6)
-* records, terminating: awk split records. (line 124)
-* records, treating files as: gawk split records. (line 92)
+* records, terminating: awk split records. (line 125)
+* records, treating files as: gawk split records. (line 93)
* recursive functions: Definition Syntax. (line 83)
* redirect gawk output, in debugger: Debugger Info. (line 72)
* redirection of input: Getline/File. (line 6)
@@ -33459,7 +33504,7 @@ Index
(line 77)
* regexp: Regexp. (line 6)
* regexp constants <1>: Comparison Operators.
- (line 102)
+ (line 103)
* regexp constants <2>: Regexp Constants. (line 6)
* regexp constants: Regexp Usage. (line 57)
* regexp constants, /=.../, /= operator and: Assignment Ops. (line 148)
@@ -33478,7 +33523,7 @@ Index
* regular expressions, as patterns <1>: Regexp Patterns. (line 6)
* regular expressions, as patterns: Regexp Usage. (line 6)
* regular expressions, as record separators: awk split records.
- (line 124)
+ (line 125)
* regular expressions, case sensitivity <1>: User-modified. (line 76)
* regular expressions, case sensitivity: Case-sensitivity. (line 6)
* regular expressions, computed: Computed Regexps. (line 6)
@@ -33489,7 +33534,7 @@ Index
(line 59)
* regular expressions, gawk, command-line options: GNU Regexp Operators.
(line 70)
-* regular expressions, interval expressions and: Options. (line 277)
+* regular expressions, interval expressions and: Options. (line 279)
* regular expressions, leftmost longest match: Leftmost Longest.
(line 6)
* regular expressions, operators <1>: Regexp Operators. (line 6)
@@ -33505,12 +33550,12 @@ Index
* regular expressions, searching for: Egrep Program. (line 6)
* relational operators, See comparison operators: Typing and Comparison.
(line 9)
-* replace in string: String Functions. (line 406)
+* replace in string: String Functions. (line 407)
* return debugger command: Debugger Execution Control.
(line 54)
* return statement, user-defined functions: Return Statement. (line 6)
* return value, close() function: Close Files And Pipes.
- (line 131)
+ (line 132)
* rev() user-defined function: Function Example. (line 54)
* revoutput extension: Extension Sample Revout.
(line 11)
@@ -33526,11 +33571,11 @@ Index
(line 11)
* right angle bracket (>), >> operator (I/O) <1>: Precedence. (line 65)
* right angle bracket (>), >> operator (I/O): Redirection. (line 50)
-* right shift: Bitwise Functions. (line 52)
+* right shift: Bitwise Functions. (line 53)
* right shift, bitwise: Bitwise Functions. (line 32)
* Ritchie, Dennis: Basic Data Typing. (line 54)
* RLENGTH variable: Auto-set. (line 259)
-* RLENGTH variable, match() function and: String Functions. (line 224)
+* RLENGTH variable, match() function and: String Functions. (line 227)
* Robbins, Arnold <1>: Future Extensions. (line 6)
* Robbins, Arnold <2>: Bugs. (line 32)
* Robbins, Arnold <3>: Contributors. (line 141)
@@ -33539,7 +33584,7 @@ Index
* Robbins, Arnold <6>: Passwd Functions. (line 90)
* Robbins, Arnold <7>: Getline/Pipe. (line 39)
* Robbins, Arnold: Command Line Field Separator.
- (line 74)
+ (line 71)
* Robbins, Bill: Getline/Pipe. (line 39)
* Robbins, Harry: Acknowledgments. (line 92)
* Robbins, Jean: Acknowledgments. (line 92)
@@ -33550,16 +33595,16 @@ Index
* round to nearest integer: Numeric Functions. (line 38)
* round() user-defined function: Round Function. (line 16)
* rounding numbers: Round Function. (line 6)
-* ROUNDMODE variable: User-modified. (line 128)
-* RS variable <1>: User-modified. (line 133)
+* ROUNDMODE variable: User-modified. (line 127)
+* RS variable <1>: User-modified. (line 132)
* RS variable: awk split records. (line 12)
* RS variable, multiline records and: Multiple Line. (line 17)
-* rshift: Bitwise Functions. (line 52)
+* rshift: Bitwise Functions. (line 53)
* RSTART variable: Auto-set. (line 265)
-* RSTART variable, match() function and: String Functions. (line 224)
+* RSTART variable, match() function and: String Functions. (line 227)
* RT variable <1>: Auto-set. (line 272)
* RT variable <2>: Multiple Line. (line 129)
-* RT variable: awk split records. (line 124)
+* RT variable: awk split records. (line 125)
* Rubin, Paul <1>: Contributors. (line 15)
* Rubin, Paul: History. (line 30)
* rule, definition of: Getting Started. (line 21)
@@ -33570,7 +33615,7 @@ Index
(line 68)
* sample debugging session: Sample Debugging Session.
(line 6)
-* sandbox mode: Options. (line 284)
+* sandbox mode: Options. (line 286)
* save debugger options: Debugger Info. (line 84)
* scalar or array: Type Functions. (line 11)
* scalar values: Basic Data Typing. (line 13)
@@ -33609,12 +33654,12 @@ Index
* separators, field, FIELDWIDTHS variable and: User-modified. (line 37)
* separators, field, FPAT variable and: User-modified. (line 43)
* separators, field, POSIX and: Fields. (line 6)
-* separators, for records <1>: User-modified. (line 133)
+* separators, for records <1>: User-modified. (line 132)
* separators, for records: awk split records. (line 6)
* separators, for records, regular expressions as: awk split records.
- (line 124)
+ (line 125)
* separators, for statements in actions: Action Overview. (line 19)
-* separators, subscript: User-modified. (line 146)
+* separators, subscript: User-modified. (line 145)
* set breakpoint: Breakpoint Control. (line 11)
* set debugger command: Viewing And Changing Data.
(line 59)
@@ -33623,7 +33668,7 @@ Index
(line 67)
* shadowing of variable values: Definition Syntax. (line 71)
* shell quoting, rules for: Quoting. (line 6)
-* shells, piping commands into: Redirection. (line 142)
+* shells, piping commands into: Redirection. (line 136)
* shells, quoting: Using Shell Variables.
(line 12)
* shells, quoting, rules for: Quoting. (line 18)
@@ -33632,7 +33677,7 @@ Index
* shells, variables: Using Shell Variables.
(line 6)
* shift, bitwise: Bitwise Functions. (line 32)
-* short-circuit operators: Boolean Ops. (line 57)
+* short-circuit operators: Boolean Ops. (line 59)
* show all source files, in debugger: Debugger Info. (line 45)
* show breakpoints: Debugger Info. (line 21)
* show function arguments, in debugger: Debugger Info. (line 18)
@@ -33663,22 +33708,22 @@ Index
(line 38)
* sidebar, Changing NR and FNR: Auto-set. (line 314)
* sidebar, Controlling Output Buffering with system(): I/O Functions.
- (line 138)
+ (line 137)
* sidebar, Escape Sequences for Metacharacters: Escape Sequences.
(line 136)
* sidebar, FS and IGNORECASE: Field Splitting Summary.
(line 64)
* sidebar, Interactive Versus Noninteractive Buffering: I/O Functions.
- (line 107)
-* sidebar, Matching the Null String: Gory Details. (line 141)
+ (line 106)
+* sidebar, Matching the Null String: String Functions. (line 533)
* sidebar, Operator Evaluation Order: Increment Ops. (line 58)
-* sidebar, Piping into sh: Redirection. (line 140)
+* sidebar, Piping into sh: Redirection. (line 134)
* sidebar, Pre-POSIX awk Used OFMT For String Conversion: Strings And Numbers.
(line 55)
* sidebar, Recipe For A Programming Language: History. (line 6)
* sidebar, RS = "\0" Is Not Portable: gawk split records. (line 63)
* sidebar, So Why Does gawk have BEGINFILE and ENDFILE?: Filetrans Function.
- (line 83)
+ (line 82)
* sidebar, Syntactic Ambiguities Between /= and Regular Expressions: Assignment Ops.
(line 146)
* sidebar, Understanding #!: Executable Scripts. (line 31)
@@ -33686,7 +33731,7 @@ Index
* sidebar, Using \n in Bracket Expressions of Dynamic Regexps: Computed Regexps.
(line 57)
* sidebar, Using close()'s Return Value: Close Files And Pipes.
- (line 129)
+ (line 130)
* SIGHUP signal, for dynamic profiling: Profiling. (line 211)
* SIGINT signal (MS-Windows): Profiling. (line 214)
* signals, HUP/SIGHUP, for profiling: Profiling. (line 211)
@@ -33701,10 +33746,10 @@ Index
* sin: Numeric Functions. (line 91)
* sine: Numeric Functions. (line 91)
* single quote ('): One-shot. (line 15)
-* single quote (') in gawk command lines: Long. (line 33)
+* single quote (') in gawk command lines: Long. (line 35)
* single quote ('), in shell commands: Quoting. (line 48)
* single quote ('), vs. apostrophe: Comments. (line 27)
-* single quote ('), with double quotes: Quoting. (line 70)
+* single quote ('), with double quotes: Quoting. (line 73)
* single-character fields: Single Character Fields.
(line 6)
* single-step execution, in the debugger: Debugger Execution Control.
@@ -33714,8 +33759,8 @@ Index
* sleep() extension function: Extension Sample Time.
(line 22)
* Solaris, POSIX-compliant awk: Other Versions. (line 96)
-* sort array: String Functions. (line 42)
-* sort array indices: String Functions. (line 42)
+* sort array: String Functions. (line 41)
+* sort array indices: String Functions. (line 41)
* sort function, arrays, sorting: Array Sorting Functions.
(line 6)
* sort utility: Word Sorting. (line 50)
@@ -33739,14 +33784,14 @@ Index
* source files, search path for: Programs Exercises. (line 70)
* sparse arrays: Array Intro. (line 72)
* Spencer, Henry: Glossary. (line 11)
-* split: String Functions. (line 313)
-* split string into array: String Functions. (line 294)
+* split: String Functions. (line 315)
+* split string into array: String Functions. (line 296)
* split utility: Split Program. (line 6)
* split() function, array elements, deleting: Delete. (line 61)
* split.awk program: Split Program. (line 30)
-* sprintf <1>: String Functions. (line 381)
+* sprintf <1>: String Functions. (line 382)
* sprintf: OFMT. (line 15)
-* sprintf() function, OFMT variable and: User-modified. (line 114)
+* sprintf() function, OFMT variable and: User-modified. (line 113)
* sprintf() function, print/printf statements and: Round Function.
(line 6)
* sqrt: Numeric Functions. (line 94)
@@ -33782,18 +33827,18 @@ Index
* string constants, vs. regexp constants: Computed Regexps. (line 39)
* string extraction (internationalization): String Extraction.
(line 6)
-* string length: String Functions. (line 167)
+* string length: String Functions. (line 170)
* string operators: Concatenation. (line 8)
-* string, regular expression match: String Functions. (line 207)
+* string, regular expression match: String Functions. (line 210)
* string-manipulation functions: String Functions. (line 6)
* string-matching operators: Regexp Usage. (line 19)
* string-translation functions: I18N Functions. (line 6)
-* strings splitting, example: String Functions. (line 333)
-* strings, converting <1>: Bitwise Functions. (line 109)
+* strings splitting, example: String Functions. (line 334)
+* strings, converting <1>: Bitwise Functions. (line 110)
* strings, converting: Strings And Numbers. (line 6)
-* strings, converting letter case: String Functions. (line 520)
+* strings, converting letter case: String Functions. (line 521)
* strings, converting, numbers to: User-modified. (line 30)
-* strings, empty, See null strings: awk split records. (line 114)
+* strings, empty, See null strings: awk split records. (line 115)
* strings, extracting: String Extraction. (line 6)
* strings, for localization: Programmer i18n. (line 14)
* strings, length limitations: Scalar Constants. (line 20)
@@ -33801,15 +33846,15 @@ Index
* strings, null: Regexp Field Splitting.
(line 43)
* strings, numeric: Variable Typing. (line 6)
-* strtonum: String Functions. (line 388)
+* strtonum: String Functions. (line 389)
* strtonum() function (gawk), --non-decimal-data option and: Nondecimal Data.
- (line 36)
-* sub <1>: String Functions. (line 406)
+ (line 35)
+* sub <1>: String Functions. (line 407)
* sub: Using Constant Regexps.
(line 43)
-* sub() function, arguments of: String Functions. (line 460)
+* sub() function, arguments of: String Functions. (line 461)
* sub() function, escape processing: Gory Details. (line 6)
-* subscript separators: User-modified. (line 146)
+* subscript separators: User-modified. (line 145)
* subscripts in arrays, multidimensional: Multidimensional. (line 10)
* subscripts in arrays, multidimensional, scanning: Multiscanning.
(line 11)
@@ -33817,30 +33862,30 @@ Index
(line 6)
* subscripts in arrays, uninitialized variables as: Uninitialized Subscripts.
(line 6)
-* SUBSEP variable: User-modified. (line 146)
+* SUBSEP variable: User-modified. (line 145)
* SUBSEP variable, and multidimensional arrays: Multidimensional.
(line 16)
* substitute in string: String Functions. (line 89)
-* substr: String Functions. (line 479)
-* substring: String Functions. (line 479)
+* substr: String Functions. (line 480)
+* substring: String Functions. (line 480)
* Sumner, Andrew: Other Versions. (line 64)
* supplementary groups of gawk process: Auto-set. (line 244)
* switch statement: Switch Statement. (line 6)
* SYMTAB array: Auto-set. (line 276)
* syntactic ambiguity: /= operator vs. /=.../ regexp constant: Assignment Ops.
(line 148)
-* system: I/O Functions. (line 75)
+* system: I/O Functions. (line 74)
* systime: Time Functions. (line 66)
* t debugger command (alias for tbreak): Breakpoint Control. (line 90)
* tbreak debugger command: Breakpoint Control. (line 90)
-* Tcl: Library Names. (line 57)
+* Tcl: Library Names. (line 58)
* TCP/IP: TCP/IP Networking. (line 6)
* TCP/IP, support for: Special Network. (line 6)
* tee utility: Tee Program. (line 6)
* tee.awk program: Tee Program. (line 26)
* temporary breakpoint: Breakpoint Control. (line 90)
-* terminating records: awk split records. (line 124)
-* testbits.awk program: Bitwise Functions. (line 70)
+* terminating records: awk split records. (line 125)
+* testbits.awk program: Bitwise Functions. (line 71)
* testext extension: Extension Sample API Tests.
(line 6)
* Texinfo <1>: Adding Code. (line 100)
@@ -33856,7 +33901,7 @@ Index
* text, printing: Print. (line 22)
* text, printing, unduplicated lines of: Uniq Program. (line 6)
* TEXTDOMAIN variable <1>: Programmer i18n. (line 9)
-* TEXTDOMAIN variable: User-modified. (line 152)
+* TEXTDOMAIN variable: User-modified. (line 151)
* TEXTDOMAIN variable, BEGIN pattern and: Programmer i18n. (line 60)
* TEXTDOMAIN variable, portability and: I18N Portability. (line 20)
* textdomain() function (C library): Explaining gettext. (line 28)
@@ -33879,15 +33924,15 @@ Index
* timestamps, converting dates to: Time Functions. (line 76)
* timestamps, formatted: Getlocaltime Function.
(line 6)
-* tolower: String Functions. (line 521)
-* toupper: String Functions. (line 527)
+* tolower: String Functions. (line 522)
+* toupper: String Functions. (line 528)
* tr utility: Translate Program. (line 6)
* trace debugger command: Miscellaneous Debugger Commands.
(line 108)
* traceback, display in debugger: Execution Stack. (line 13)
* translate string: I18N Functions. (line 22)
* translate.awk program: Translate Program. (line 55)
-* treating files, as single records: gawk split records. (line 92)
+* treating files, as single records: gawk split records. (line 93)
* troubleshooting, --non-decimal-data option: Options. (line 211)
* troubleshooting, == operator: Comparison Operators.
(line 37)
@@ -33898,26 +33943,26 @@ Index
* troubleshooting, fatal errors, field widths, specifying: Constant Size.
(line 23)
* troubleshooting, fatal errors, printf format strings: Format Modifiers.
- (line 159)
-* troubleshooting, fflush() function: I/O Functions. (line 63)
+ (line 158)
+* troubleshooting, fflush() function: I/O Functions. (line 62)
* troubleshooting, function call syntax: Function Calls. (line 30)
* troubleshooting, gawk: Compatibility Mode. (line 6)
* troubleshooting, gawk, bug reports: Bugs. (line 9)
* troubleshooting, gawk, fatal errors, function arguments: Calling Built-in.
(line 16)
* troubleshooting, getline function: File Checking. (line 25)
-* troubleshooting, gsub()/sub() functions: String Functions. (line 470)
-* troubleshooting, match() function: String Functions. (line 289)
+* troubleshooting, gsub()/sub() functions: String Functions. (line 471)
+* troubleshooting, match() function: String Functions. (line 291)
* troubleshooting, print statement, omitting commas: Print Examples.
(line 31)
-* troubleshooting, printing: Redirection. (line 118)
-* troubleshooting, quotes with file names: Special FD. (line 68)
+* troubleshooting, printing: Redirection. (line 112)
+* troubleshooting, quotes with file names: Special FD. (line 62)
* troubleshooting, readable data files: File Checking. (line 6)
* troubleshooting, regexp constants vs. string constants: Computed Regexps.
(line 39)
* troubleshooting, string concatenation: Concatenation. (line 26)
-* troubleshooting, substr() function: String Functions. (line 497)
-* troubleshooting, system() function: I/O Functions. (line 97)
+* troubleshooting, substr() function: String Functions. (line 498)
+* troubleshooting, system() function: I/O Functions. (line 96)
* troubleshooting, typographical errors, global variables: Options.
(line 98)
* true, logical: Truth Values. (line 6)
@@ -33951,9 +33996,9 @@ Index
* Unix awk, backslashes in escape sequences: Escape Sequences.
(line 132)
* Unix awk, close() function and: Close Files And Pipes.
- (line 131)
+ (line 132)
* Unix awk, password files, field separators and: Command Line Field Separator.
- (line 65)
+ (line 62)
* Unix, awk scripts and: Executable Scripts. (line 6)
* UNIXROOT variable, on OS/2 systems: PC Using. (line 16)
* unsigned integers: Computer Arithmetic. (line 41)
@@ -33972,7 +34017,7 @@ Index
* USR1 signal, for dynamic profiling: Profiling. (line 188)
* values, numeric: Basic Data Typing. (line 13)
* values, string: Basic Data Typing. (line 13)
-* variable assignments and input files: Other Arguments. (line 23)
+* variable assignments and input files: Other Arguments. (line 26)
* variable typing: Typing and Comparison.
(line 9)
* variables <1>: Basic Data Typing. (line 6)
@@ -33982,7 +34027,7 @@ Index
* variables, built-in: Using Variables. (line 23)
* variables, built-in, -v option, setting with: Options. (line 40)
* variables, built-in, conveying information: Auto-set. (line 6)
-* variables, flag: Boolean Ops. (line 67)
+* variables, flag: Boolean Ops. (line 69)
* variables, getline command into, using <1>: Getline/Variable/Coprocess.
(line 6)
* variables, getline command into, using <2>: Getline/Variable/Pipe.
@@ -33994,7 +34039,6 @@ Index
* variables, global, printing list of: Options. (line 93)
* variables, initializing: Using Variables. (line 23)
* variables, local to a function: Variable Scope. (line 6)
-* variables, names of: Arrays. (line 18)
* variables, private: Library Names. (line 11)
* variables, setting: Options. (line 32)
* variables, shadowing: Definition Syntax. (line 71)
@@ -34015,7 +34059,7 @@ Index
* vertical bar (|), |& operator (I/O) <2>: Precedence. (line 65)
* vertical bar (|), |& operator (I/O): Getline/Coprocess. (line 6)
* vertical bar (|), || operator <1>: Precedence. (line 89)
-* vertical bar (|), || operator: Boolean Ops. (line 57)
+* vertical bar (|), || operator: Boolean Ops. (line 59)
* Vinschen, Corinna: Acknowledgments. (line 60)
* w debugger command (alias for watch): Viewing And Changing Data.
(line 67)
@@ -34041,7 +34085,7 @@ Index
* whitespace, as field separators: Default Field Splitting.
(line 6)
* whitespace, functions, calling: Calling Built-in. (line 10)
-* whitespace, newlines as: Options. (line 258)
+* whitespace, newlines as: Options. (line 260)
* Williams, Kent: Contributors. (line 34)
* Woehlke, Matthew: Contributors. (line 79)
* Woods, John: Contributors. (line 27)
@@ -34058,7 +34102,7 @@ Index
* writea() extension function: Extension Sample Read write array.
(line 9)
* xgettext utility: String Extraction. (line 13)
-* xor: Bitwise Functions. (line 55)
+* xor: Bitwise Functions. (line 56)
* XOR bitwise operation: Bitwise Functions. (line 6)
* Yawitz, Efraim: Contributors. (line 131)
* Zaretskii, Eli <1>: Bugs. (line 71)
@@ -34075,12 +34119,12 @@ Index
* | (vertical bar), | operator (I/O): Getline/Pipe. (line 9)
* | (vertical bar), |& operator (I/O) <1>: Two-way I/O. (line 25)
* | (vertical bar), |& operator (I/O) <2>: Precedence. (line 65)
-* | (vertical bar), |& operator (I/O) <3>: Redirection. (line 102)
+* | (vertical bar), |& operator (I/O) <3>: Redirection. (line 96)
* | (vertical bar), |& operator (I/O): Getline/Coprocess. (line 6)
* | (vertical bar), |& operator (I/O), pipes, closing: Close Files And Pipes.
- (line 119)
+ (line 120)
* | (vertical bar), || operator <1>: Precedence. (line 89)
-* | (vertical bar), || operator: Boolean Ops. (line 57)
+* | (vertical bar), || operator: Boolean Ops. (line 59)
* ~ (tilde), ~ operator <1>: Expression Patterns. (line 24)
* ~ (tilde), ~ operator <2>: Precedence. (line 80)
* ~ (tilde), ~ operator <3>: Comparison Operators.
@@ -34094,553 +34138,556 @@ Index

Tag Table:
Node: Top1204
-Node: Foreword41858
-Node: Preface46203
-Ref: Preface-Footnote-149226
-Ref: Preface-Footnote-249333
-Node: History49565
-Node: Names51939
-Ref: Names-Footnote-153033
-Node: This Manual53179
-Ref: This Manual-Footnote-158958
-Node: Conventions59058
-Node: Manual History61403
-Ref: Manual History-Footnote-164479
-Ref: Manual History-Footnote-264520
-Node: How To Contribute64594
-Node: Acknowledgments65833
-Node: Getting Started70581
-Node: Running gawk73015
-Node: One-shot74205
-Node: Read Terminal75430
-Node: Long77455
-Node: Executable Scripts78849
-Ref: Executable Scripts-Footnote-181650
-Node: Comments81752
-Node: Quoting84225
-Node: DOS Quoting89538
-Node: Sample Data Files90213
-Node: Very Simple92820
-Node: Two Rules97705
-Node: More Complex99599
-Ref: More Complex-Footnote-1102513
-Node: Statements/Lines102598
-Ref: Statements/Lines-Footnote-1107054
-Node: Other Features107319
-Node: When108250
-Ref: When-Footnote-1110006
-Node: Intro Summary110071
-Node: Invoking Gawk110954
-Node: Command Line112469
-Node: Options113260
-Ref: Options-Footnote-1128907
-Node: Other Arguments128932
-Node: Naming Standard Input131760
-Node: Environment Variables132853
-Node: AWKPATH Variable133411
-Ref: AWKPATH Variable-Footnote-1136277
-Ref: AWKPATH Variable-Footnote-2136322
-Node: AWKLIBPATH Variable136582
-Node: Other Environment Variables137341
-Node: Exit Status140793
-Node: Include Files141468
-Node: Loading Shared Libraries145046
-Node: Obsolete146430
-Node: Undocumented147127
-Node: Invoking Summary147394
-Node: Regexp148994
-Node: Regexp Usage150453
-Node: Escape Sequences152486
-Node: Regexp Operators158557
-Ref: Regexp Operators-Footnote-1165988
-Ref: Regexp Operators-Footnote-2166135
-Node: Bracket Expressions166233
-Ref: table-char-classes168251
-Node: Leftmost Longest171191
-Node: Computed Regexps172395
-Node: GNU Regexp Operators175773
-Node: Case-sensitivity179479
-Ref: Case-sensitivity-Footnote-1182369
-Ref: Case-sensitivity-Footnote-2182604
-Node: Regexp Summary182712
-Node: Reading Files184181
-Node: Records186273
-Node: awk split records186995
-Node: gawk split records191853
-Ref: gawk split records-Footnote-1196374
-Node: Fields196411
-Ref: Fields-Footnote-1199375
-Node: Nonconstant Fields199461
-Ref: Nonconstant Fields-Footnote-1201691
-Node: Changing Fields201893
-Node: Field Separators207847
-Node: Default Field Splitting210549
-Node: Regexp Field Splitting211666
-Node: Single Character Fields214993
-Node: Command Line Field Separator216052
-Node: Full Line Fields219478
-Ref: Full Line Fields-Footnote-1219986
-Node: Field Splitting Summary220032
-Ref: Field Splitting Summary-Footnote-1223164
-Node: Constant Size223265
-Node: Splitting By Content227871
-Ref: Splitting By Content-Footnote-1231944
-Node: Multiple Line231984
-Ref: Multiple Line-Footnote-1237840
-Node: Getline238019
-Node: Plain Getline240230
-Node: Getline/Variable242936
-Node: Getline/File244083
-Node: Getline/Variable/File245467
-Ref: Getline/Variable/File-Footnote-1247066
-Node: Getline/Pipe247153
-Node: Getline/Variable/Pipe249839
-Node: Getline/Coprocess250946
-Node: Getline/Variable/Coprocess252198
-Node: Getline Notes252935
-Node: Getline Summary255739
-Ref: table-getline-variants256147
-Node: Read Timeout257059
-Ref: Read Timeout-Footnote-1260886
-Node: Command-line directories260944
-Node: Input Summary261848
-Node: Input Exercises264985
-Node: Printing265713
-Node: Print267435
-Node: Print Examples268928
-Node: Output Separators271707
-Node: OFMT273723
-Node: Printf275081
-Node: Basic Printf275987
-Node: Control Letters277526
-Node: Format Modifiers281517
-Node: Printf Examples287544
-Node: Redirection290008
-Node: Special Files296980
-Node: Special FD297513
-Ref: Special FD-Footnote-1301110
-Node: Special Network301184
-Node: Special Caveats302034
-Node: Close Files And Pipes302830
-Ref: Close Files And Pipes-Footnote-1309991
-Ref: Close Files And Pipes-Footnote-2310139
-Node: Output Summary310289
-Node: Output Exercises311286
-Node: Expressions311966
-Node: Values313151
-Node: Constants313827
-Node: Scalar Constants314507
-Ref: Scalar Constants-Footnote-1315366
-Node: Nondecimal-numbers315616
-Node: Regexp Constants318616
-Node: Using Constant Regexps319141
-Node: Variables322213
-Node: Using Variables322868
-Node: Assignment Options324774
-Node: Conversion326649
-Node: Strings And Numbers327173
-Ref: Strings And Numbers-Footnote-1330235
-Node: Locale influences conversions330344
-Ref: table-locale-affects333061
-Node: All Operators333649
-Node: Arithmetic Ops334279
-Node: Concatenation336784
-Ref: Concatenation-Footnote-1339603
-Node: Assignment Ops339709
-Ref: table-assign-ops344692
-Node: Increment Ops345995
-Node: Truth Values and Conditions349433
-Node: Truth Values350516
-Node: Typing and Comparison351565
-Node: Variable Typing352358
-Node: Comparison Operators356010
-Ref: table-relational-ops356420
-Node: POSIX String Comparison359970
-Ref: POSIX String Comparison-Footnote-1361054
-Node: Boolean Ops361192
-Ref: Boolean Ops-Footnote-1365531
-Node: Conditional Exp365622
-Node: Function Calls367349
-Node: Precedence371229
-Node: Locales374898
-Node: Expressions Summary376529
-Node: Patterns and Actions379070
-Node: Pattern Overview380186
-Node: Regexp Patterns381863
-Node: Expression Patterns382406
-Node: Ranges386186
-Node: BEGIN/END389292
-Node: Using BEGIN/END390054
-Ref: Using BEGIN/END-Footnote-1392790
-Node: I/O And BEGIN/END392896
-Node: BEGINFILE/ENDFILE395167
-Node: Empty398098
-Node: Using Shell Variables398415
-Node: Action Overview400698
-Node: Statements403025
-Node: If Statement404873
-Node: While Statement406371
-Node: Do Statement408415
-Node: For Statement409571
-Node: Switch Statement412723
-Node: Break Statement415111
-Node: Continue Statement417152
-Node: Next Statement418977
-Node: Nextfile Statement421347
-Node: Exit Statement424004
-Node: Built-in Variables426408
-Node: User-modified427535
-Ref: User-modified-Footnote-1435224
-Node: Auto-set435286
-Ref: Auto-set-Footnote-1448475
-Ref: Auto-set-Footnote-2448680
-Node: ARGC and ARGV448736
-Node: Pattern Action Summary452640
-Node: Arrays454863
-Node: Array Basics456412
-Node: Array Intro457238
-Ref: figure-array-elements459211
-Ref: Array Intro-Footnote-1461735
-Node: Reference to Elements461863
-Node: Assigning Elements464313
-Node: Array Example464804
-Node: Scanning an Array466536
-Node: Controlling Scanning469537
-Ref: Controlling Scanning-Footnote-1474710
-Node: Delete475026
-Ref: Delete-Footnote-1477777
-Node: Numeric Array Subscripts477834
-Node: Uninitialized Subscripts480017
-Node: Multidimensional481644
-Node: Multiscanning484757
-Node: Arrays of Arrays486346
-Node: Arrays Summary491009
-Node: Functions493114
-Node: Built-in493987
-Node: Calling Built-in495065
-Node: Numeric Functions497053
-Ref: Numeric Functions-Footnote-1501889
-Ref: Numeric Functions-Footnote-2502246
-Ref: Numeric Functions-Footnote-3502294
-Node: String Functions502563
-Ref: String Functions-Footnote-1525560
-Ref: String Functions-Footnote-2525689
-Ref: String Functions-Footnote-3525937
-Node: Gory Details526024
-Ref: table-sub-escapes527797
-Ref: table-sub-proposed529317
-Ref: table-posix-sub530681
-Ref: table-gensub-escapes532221
-Ref: Gory Details-Footnote-1533397
-Node: I/O Functions533548
-Ref: I/O Functions-Footnote-1540658
-Node: Time Functions540805
-Ref: Time Functions-Footnote-1551269
-Ref: Time Functions-Footnote-2551337
-Ref: Time Functions-Footnote-3551495
-Ref: Time Functions-Footnote-4551606
-Ref: Time Functions-Footnote-5551718
-Ref: Time Functions-Footnote-6551945
-Node: Bitwise Functions552211
-Ref: table-bitwise-ops552773
-Ref: Bitwise Functions-Footnote-1557018
-Node: Type Functions557202
-Node: I18N Functions558344
-Node: User-defined559989
-Node: Definition Syntax560793
-Ref: Definition Syntax-Footnote-1566197
-Node: Function Example566266
-Ref: Function Example-Footnote-1568906
-Node: Function Caveats568928
-Node: Calling A Function569446
-Node: Variable Scope570401
-Node: Pass By Value/Reference573389
-Node: Return Statement576899
-Node: Dynamic Typing579883
-Node: Indirect Calls580812
-Ref: Indirect Calls-Footnote-1590528
-Node: Functions Summary590656
-Node: Library Functions593306
-Ref: Library Functions-Footnote-1596924
-Ref: Library Functions-Footnote-2597067
-Node: Library Names597238
-Ref: Library Names-Footnote-1600711
-Ref: Library Names-Footnote-2600931
-Node: General Functions601017
-Node: Strtonum Function602045
-Node: Assert Function604947
-Node: Round Function608273
-Node: Cliff Random Function609814
-Node: Ordinal Functions610830
-Ref: Ordinal Functions-Footnote-1613895
-Ref: Ordinal Functions-Footnote-2614147
-Node: Join Function614358
-Ref: Join Function-Footnote-1616129
-Node: Getlocaltime Function616329
-Node: Readfile Function620065
-Node: Data File Management621904
-Node: Filetrans Function622536
-Node: Rewind Function626605
-Node: File Checking628163
-Ref: File Checking-Footnote-1629295
-Node: Empty Files629496
-Node: Ignoring Assigns631475
-Node: Getopt Function633029
-Ref: Getopt Function-Footnote-1644293
-Node: Passwd Functions644496
-Ref: Passwd Functions-Footnote-1653475
-Node: Group Functions653563
-Ref: Group Functions-Footnote-1661494
-Node: Walking Arrays661707
-Node: Library Functions Summary663310
-Node: Library Exercises664698
-Node: Sample Programs665978
-Node: Running Examples666748
-Node: Clones667476
-Node: Cut Program668700
-Node: Egrep Program678558
-Ref: Egrep Program-Footnote-1686145
-Node: Id Program686255
-Node: Split Program689909
-Ref: Split Program-Footnote-1693447
-Node: Tee Program693575
-Node: Uniq Program696362
-Node: Wc Program703785
-Ref: Wc Program-Footnote-1708050
-Node: Miscellaneous Programs708142
-Node: Dupword Program709355
-Node: Alarm Program711386
-Node: Translate Program716190
-Ref: Translate Program-Footnote-1720763
-Ref: Translate Program-Footnote-2721033
-Node: Labels Program721172
-Ref: Labels Program-Footnote-1724533
-Node: Word Sorting724617
-Node: History Sorting728660
-Node: Extract Program730496
-Node: Simple Sed738032
-Node: Igawk Program741094
-Ref: Igawk Program-Footnote-1755398
-Ref: Igawk Program-Footnote-2755599
-Node: Anagram Program755737
-Node: Signature Program758805
-Node: Programs Summary760052
-Node: Programs Exercises761267
-Ref: Programs Exercises-Footnote-1765398
-Node: Advanced Features765489
-Node: Nondecimal Data767437
-Node: Array Sorting769014
-Node: Controlling Array Traversal769711
-Node: Array Sorting Functions777991
-Ref: Array Sorting Functions-Footnote-1781898
-Node: Two-way I/O782092
-Ref: Two-way I/O-Footnote-1787036
-Ref: Two-way I/O-Footnote-2787215
-Node: TCP/IP Networking787297
-Node: Profiling790139
-Node: Advanced Features Summary797690
-Node: Internationalization799554
-Node: I18N and L10N801034
-Node: Explaining gettext801720
-Ref: Explaining gettext-Footnote-1806746
-Ref: Explaining gettext-Footnote-2806930
-Node: Programmer i18n807095
-Ref: Programmer i18n-Footnote-1811889
-Node: Translator i18n811938
-Node: String Extraction812732
-Ref: String Extraction-Footnote-1813865
-Node: Printf Ordering813951
-Ref: Printf Ordering-Footnote-1816733
-Node: I18N Portability816797
-Ref: I18N Portability-Footnote-1819246
-Node: I18N Example819309
-Ref: I18N Example-Footnote-1822015
-Node: Gawk I18N822087
-Node: I18N Summary822725
-Node: Debugger824064
-Node: Debugging825086
-Node: Debugging Concepts825527
-Node: Debugging Terms827383
-Node: Awk Debugging829980
-Node: Sample Debugging Session830872
-Node: Debugger Invocation831392
-Node: Finding The Bug832728
-Node: List of Debugger Commands839207
-Node: Breakpoint Control840539
-Node: Debugger Execution Control844203
-Node: Viewing And Changing Data847563
-Node: Execution Stack850921
-Node: Debugger Info852434
-Node: Miscellaneous Debugger Commands856428
-Node: Readline Support861612
-Node: Limitations862504
-Node: Debugging Summary864777
-Node: Arbitrary Precision Arithmetic865945
-Node: Computer Arithmetic867432
-Ref: Computer Arithmetic-Footnote-1871819
-Node: Math Definitions871876
-Ref: table-ieee-formats875165
-Ref: Math Definitions-Footnote-1875705
-Node: MPFR features875808
-Node: FP Math Caution877425
-Ref: FP Math Caution-Footnote-1878475
-Node: Inexactness of computations878844
-Node: Inexact representation879792
-Node: Comparing FP Values881147
-Node: Errors accumulate882111
-Node: Getting Accuracy883544
-Node: Try To Round886203
-Node: Setting precision887102
-Ref: table-predefined-precision-strings887784
-Node: Setting the rounding mode889577
-Ref: table-gawk-rounding-modes889941
-Ref: Setting the rounding mode-Footnote-1893395
-Node: Arbitrary Precision Integers893574
-Ref: Arbitrary Precision Integers-Footnote-1897347
-Node: POSIX Floating Point Problems897496
-Ref: POSIX Floating Point Problems-Footnote-1901372
-Node: Floating point summary901410
-Node: Dynamic Extensions903614
-Node: Extension Intro905166
-Node: Plugin License906431
-Node: Extension Mechanism Outline907116
-Ref: figure-load-extension907540
-Ref: figure-load-new-function909025
-Ref: figure-call-new-function910027
-Node: Extension API Description912011
-Node: Extension API Functions Introduction913461
-Node: General Data Types918328
-Ref: General Data Types-Footnote-1924021
-Node: Requesting Values924320
-Ref: table-value-types-returned925057
-Node: Memory Allocation Functions926015
-Ref: Memory Allocation Functions-Footnote-1928762
-Node: Constructor Functions928858
-Node: Registration Functions930616
-Node: Extension Functions931301
-Node: Exit Callback Functions933603
-Node: Extension Version String934851
-Node: Input Parsers935501
-Node: Output Wrappers945315
-Node: Two-way processors949831
-Node: Printing Messages952035
-Ref: Printing Messages-Footnote-1953112
-Node: Updating `ERRNO'953264
-Node: Accessing Parameters954003
-Node: Symbol Table Access955233
-Node: Symbol table by name955747
-Node: Symbol table by cookie957723
-Ref: Symbol table by cookie-Footnote-1961856
-Node: Cached values961919
-Ref: Cached values-Footnote-1965423
-Node: Array Manipulation965514
-Ref: Array Manipulation-Footnote-1966612
-Node: Array Data Types966651
-Ref: Array Data Types-Footnote-1969354
-Node: Array Functions969446
-Node: Flattening Arrays973320
-Node: Creating Arrays980172
-Node: Extension API Variables984903
-Node: Extension Versioning985539
-Node: Extension API Informational Variables987440
-Node: Extension API Boilerplate988526
-Node: Finding Extensions992330
-Node: Extension Example992890
-Node: Internal File Description993620
-Node: Internal File Ops997711
-Ref: Internal File Ops-Footnote-11009143
-Node: Using Internal File Ops1009283
-Ref: Using Internal File Ops-Footnote-11011630
-Node: Extension Samples1011898
-Node: Extension Sample File Functions1013422
-Node: Extension Sample Fnmatch1020990
-Node: Extension Sample Fork1022472
-Node: Extension Sample Inplace1023685
-Node: Extension Sample Ord1025360
-Node: Extension Sample Readdir1026196
-Ref: table-readdir-file-types1027052
-Node: Extension Sample Revout1027851
-Node: Extension Sample Rev2way1028442
-Node: Extension Sample Read write array1029183
-Node: Extension Sample Readfile1031062
-Node: Extension Sample API Tests1032162
-Node: Extension Sample Time1032687
-Node: gawkextlib1034002
-Node: Extension summary1036815
-Node: Extension Exercises1040508
-Node: Language History1041230
-Node: V7/SVR3.11042873
-Node: SVR41045193
-Node: POSIX1046635
-Node: BTL1048021
-Node: POSIX/GNU1048755
-Node: Feature History1054531
-Node: Common Extensions1067622
-Node: Ranges and Locales1068934
-Ref: Ranges and Locales-Footnote-11073551
-Ref: Ranges and Locales-Footnote-21073578
-Ref: Ranges and Locales-Footnote-31073812
-Node: Contributors1074033
-Node: History summary1079458
-Node: Installation1080827
-Node: Gawk Distribution1081778
-Node: Getting1082262
-Node: Extracting1083086
-Node: Distribution contents1084728
-Node: Unix Installation1090498
-Node: Quick Installation1091115
-Node: Additional Configuration Options1093557
-Node: Configuration Philosophy1095295
-Node: Non-Unix Installation1097646
-Node: PC Installation1098104
-Node: PC Binary Installation1099415
-Node: PC Compiling1101263
-Ref: PC Compiling-Footnote-11104262
-Node: PC Testing1104367
-Node: PC Using1105543
-Node: Cygwin1109695
-Node: MSYS1110504
-Node: VMS Installation1111018
-Node: VMS Compilation1111814
-Ref: VMS Compilation-Footnote-11113036
-Node: VMS Dynamic Extensions1113094
-Node: VMS Installation Details1114467
-Node: VMS Running1116719
-Node: VMS GNV1119553
-Node: VMS Old Gawk1120276
-Node: Bugs1120746
-Node: Other Versions1124750
-Node: Installation summary1130974
-Node: Notes1132030
-Node: Compatibility Mode1132895
-Node: Additions1133677
-Node: Accessing The Source1134602
-Node: Adding Code1136038
-Node: New Ports1142216
-Node: Derived Files1146697
-Ref: Derived Files-Footnote-11152172
-Ref: Derived Files-Footnote-21152206
-Ref: Derived Files-Footnote-31152802
-Node: Future Extensions1152916
-Node: Implementation Limitations1153522
-Node: Extension Design1154770
-Node: Old Extension Problems1155924
-Ref: Old Extension Problems-Footnote-11157441
-Node: Extension New Mechanism Goals1157498
-Ref: Extension New Mechanism Goals-Footnote-11160858
-Node: Extension Other Design Decisions1161047
-Node: Extension Future Growth1163153
-Node: Old Extension Mechanism1163989
-Node: Notes summary1165751
-Node: Basic Concepts1166937
-Node: Basic High Level1167618
-Ref: figure-general-flow1167890
-Ref: figure-process-flow1168489
-Ref: Basic High Level-Footnote-11171718
-Node: Basic Data Typing1171903
-Node: Glossary1175231
-Node: Copying1200383
-Node: GNU Free Documentation License1237939
-Node: Index1263075
+Node: Foreword41978
+Node: Preface46325
+Ref: Preface-Footnote-149220
+Ref: Preface-Footnote-249327
+Ref: Preface-Footnote-349560
+Node: History49702
+Node: Names52076
+Ref: Names-Footnote-153170
+Node: This Manual53316
+Ref: This Manual-Footnote-159151
+Node: Conventions59251
+Node: Manual History61596
+Ref: Manual History-Footnote-164672
+Ref: Manual History-Footnote-264713
+Node: How To Contribute64787
+Node: Acknowledgments66026
+Node: Getting Started70774
+Node: Running gawk73208
+Node: One-shot74398
+Node: Read Terminal75623
+Node: Long77650
+Node: Executable Scripts79166
+Ref: Executable Scripts-Footnote-181955
+Node: Comments82057
+Node: Quoting84530
+Node: DOS Quoting90040
+Node: Sample Data Files90715
+Node: Very Simple93308
+Node: Two Rules98199
+Node: More Complex100085
+Node: Statements/Lines102947
+Ref: Statements/Lines-Footnote-1107403
+Node: Other Features107668
+Node: When108599
+Ref: When-Footnote-1110355
+Node: Intro Summary110420
+Node: Invoking Gawk111303
+Node: Command Line112818
+Node: Options113609
+Ref: Options-Footnote-1129375
+Node: Other Arguments129400
+Node: Naming Standard Input132361
+Node: Environment Variables133454
+Node: AWKPATH Variable134012
+Ref: AWKPATH Variable-Footnote-1136864
+Ref: AWKPATH Variable-Footnote-2136909
+Node: AWKLIBPATH Variable137169
+Node: Other Environment Variables137928
+Node: Exit Status141401
+Node: Include Files142076
+Node: Loading Shared Libraries145654
+Node: Obsolete147081
+Node: Undocumented147778
+Node: Invoking Summary148045
+Node: Regexp149711
+Node: Regexp Usage151170
+Node: Escape Sequences153203
+Node: Regexp Operators159303
+Ref: Regexp Operators-Footnote-1166738
+Ref: Regexp Operators-Footnote-2166885
+Node: Bracket Expressions166983
+Ref: table-char-classes169000
+Node: Leftmost Longest171940
+Node: Computed Regexps173242
+Node: GNU Regexp Operators176639
+Node: Case-sensitivity180345
+Ref: Case-sensitivity-Footnote-1183235
+Ref: Case-sensitivity-Footnote-2183470
+Node: Regexp Summary183578
+Node: Reading Files185047
+Node: Records187139
+Node: awk split records187867
+Node: gawk split records192779
+Ref: gawk split records-Footnote-1197318
+Node: Fields197355
+Ref: Fields-Footnote-1200151
+Node: Nonconstant Fields200237
+Ref: Nonconstant Fields-Footnote-1202467
+Node: Changing Fields202669
+Node: Field Separators208601
+Node: Default Field Splitting211303
+Node: Regexp Field Splitting212420
+Node: Single Character Fields215770
+Node: Command Line Field Separator216829
+Node: Full Line Fields220039
+Ref: Full Line Fields-Footnote-1220547
+Node: Field Splitting Summary220593
+Ref: Field Splitting Summary-Footnote-1223724
+Node: Constant Size223825
+Node: Splitting By Content228431
+Ref: Splitting By Content-Footnote-1232504
+Node: Multiple Line232544
+Ref: Multiple Line-Footnote-1238433
+Node: Getline238612
+Node: Plain Getline240823
+Node: Getline/Variable243463
+Node: Getline/File244610
+Node: Getline/Variable/File245994
+Ref: Getline/Variable/File-Footnote-1247593
+Node: Getline/Pipe247680
+Node: Getline/Variable/Pipe250363
+Node: Getline/Coprocess251492
+Node: Getline/Variable/Coprocess252744
+Node: Getline Notes253481
+Node: Getline Summary256273
+Ref: table-getline-variants256681
+Node: Read Timeout257510
+Ref: Read Timeout-Footnote-1261324
+Node: Command-line directories261382
+Node: Input Summary262286
+Node: Input Exercises265538
+Node: Printing266266
+Node: Print268043
+Node: Print Examples269500
+Node: Output Separators272279
+Node: OFMT274295
+Node: Printf275647
+Node: Basic Printf276432
+Node: Control Letters278003
+Node: Format Modifiers281987
+Node: Printf Examples287994
+Node: Redirection290476
+Node: Special FD297207
+Ref: Special FD-Footnote-1300364
+Node: Special Files300438
+Node: Other Inherited Files301054
+Node: Special Network302054
+Node: Special Caveats302915
+Node: Close Files And Pipes303866
+Ref: Close Files And Pipes-Footnote-1311043
+Ref: Close Files And Pipes-Footnote-2311191
+Node: Output Summary311341
+Node: Output Exercises312337
+Node: Expressions313017
+Node: Values314202
+Node: Constants314878
+Node: Scalar Constants315558
+Ref: Scalar Constants-Footnote-1316417
+Node: Nondecimal-numbers316667
+Node: Regexp Constants319667
+Node: Using Constant Regexps320192
+Node: Variables323330
+Node: Using Variables323985
+Node: Assignment Options325889
+Node: Conversion327764
+Node: Strings And Numbers328288
+Ref: Strings And Numbers-Footnote-1331350
+Node: Locale influences conversions331459
+Ref: table-locale-affects334174
+Node: All Operators334762
+Node: Arithmetic Ops335392
+Node: Concatenation337897
+Ref: Concatenation-Footnote-1340716
+Node: Assignment Ops340822
+Ref: table-assign-ops345805
+Node: Increment Ops347083
+Node: Truth Values and Conditions350521
+Node: Truth Values351604
+Node: Typing and Comparison352653
+Node: Variable Typing353446
+Node: Comparison Operators357098
+Ref: table-relational-ops357508
+Node: POSIX String Comparison361023
+Ref: POSIX String Comparison-Footnote-1362095
+Node: Boolean Ops362233
+Ref: Boolean Ops-Footnote-1366712
+Node: Conditional Exp366803
+Node: Function Calls368530
+Node: Precedence372410
+Node: Locales376078
+Node: Expressions Summary377709
+Node: Patterns and Actions380283
+Node: Pattern Overview381399
+Node: Regexp Patterns383078
+Node: Expression Patterns383621
+Node: Ranges387401
+Node: BEGIN/END390507
+Node: Using BEGIN/END391269
+Ref: Using BEGIN/END-Footnote-1394006
+Node: I/O And BEGIN/END394112
+Node: BEGINFILE/ENDFILE396426
+Node: Empty399327
+Node: Using Shell Variables399644
+Node: Action Overview401920
+Node: Statements404247
+Node: If Statement406095
+Node: While Statement407593
+Node: Do Statement409621
+Node: For Statement410763
+Node: Switch Statement413918
+Node: Break Statement416306
+Node: Continue Statement418347
+Node: Next Statement420172
+Node: Nextfile Statement422552
+Node: Exit Statement425182
+Node: Built-in Variables427585
+Node: User-modified428712
+Ref: User-modified-Footnote-1436392
+Node: Auto-set436454
+Ref: Auto-set-Footnote-1449648
+Ref: Auto-set-Footnote-2449853
+Node: ARGC and ARGV449909
+Node: Pattern Action Summary454113
+Node: Arrays456532
+Node: Array Basics457861
+Node: Array Intro458705
+Ref: figure-array-elements460678
+Ref: Array Intro-Footnote-1463202
+Node: Reference to Elements463330
+Node: Assigning Elements465780
+Node: Array Example466271
+Node: Scanning an Array468029
+Node: Controlling Scanning471045
+Ref: Controlling Scanning-Footnote-1476234
+Node: Numeric Array Subscripts476550
+Node: Uninitialized Subscripts478733
+Node: Delete480350
+Ref: Delete-Footnote-1483094
+Node: Multidimensional483151
+Node: Multiscanning486246
+Node: Arrays of Arrays487835
+Node: Arrays Summary492596
+Node: Functions494701
+Node: Built-in495574
+Node: Calling Built-in496652
+Node: Numeric Functions498640
+Ref: Numeric Functions-Footnote-1503464
+Ref: Numeric Functions-Footnote-2503821
+Ref: Numeric Functions-Footnote-3503869
+Node: String Functions504138
+Ref: String Functions-Footnote-1527598
+Ref: String Functions-Footnote-2527727
+Ref: String Functions-Footnote-3527975
+Node: Gory Details528062
+Ref: table-sub-escapes529843
+Ref: table-sub-proposed531363
+Ref: table-posix-sub532727
+Ref: table-gensub-escapes534267
+Ref: Gory Details-Footnote-1535099
+Node: I/O Functions535250
+Ref: I/O Functions-Footnote-1542351
+Node: Time Functions542498
+Ref: Time Functions-Footnote-1552967
+Ref: Time Functions-Footnote-2553035
+Ref: Time Functions-Footnote-3553193
+Ref: Time Functions-Footnote-4553304
+Ref: Time Functions-Footnote-5553416
+Ref: Time Functions-Footnote-6553643
+Node: Bitwise Functions553909
+Ref: table-bitwise-ops554471
+Ref: Bitwise Functions-Footnote-1558779
+Node: Type Functions558948
+Node: I18N Functions560097
+Node: User-defined561742
+Node: Definition Syntax562546
+Ref: Definition Syntax-Footnote-1567950
+Node: Function Example568019
+Ref: Function Example-Footnote-1570936
+Node: Function Caveats570958
+Node: Calling A Function571476
+Node: Variable Scope572431
+Node: Pass By Value/Reference575419
+Node: Return Statement578929
+Node: Dynamic Typing581913
+Node: Indirect Calls582842
+Ref: Indirect Calls-Footnote-1592563
+Node: Functions Summary592691
+Node: Library Functions595390
+Ref: Library Functions-Footnote-1599008
+Ref: Library Functions-Footnote-2599151
+Node: Library Names599322
+Ref: Library Names-Footnote-1602780
+Ref: Library Names-Footnote-2603000
+Node: General Functions603086
+Node: Strtonum Function604114
+Node: Assert Function607134
+Node: Round Function610458
+Node: Cliff Random Function611999
+Node: Ordinal Functions613015
+Ref: Ordinal Functions-Footnote-1616080
+Ref: Ordinal Functions-Footnote-2616332
+Node: Join Function616543
+Ref: Join Function-Footnote-1618314
+Node: Getlocaltime Function618514
+Node: Readfile Function622255
+Node: Data File Management624203
+Node: Filetrans Function624835
+Node: Rewind Function628894
+Node: File Checking630279
+Ref: File Checking-Footnote-1631607
+Node: Empty Files631808
+Node: Ignoring Assigns633787
+Node: Getopt Function635338
+Ref: Getopt Function-Footnote-1646798
+Node: Passwd Functions647001
+Ref: Passwd Functions-Footnote-1655852
+Node: Group Functions655940
+Ref: Group Functions-Footnote-1663843
+Node: Walking Arrays664056
+Node: Library Functions Summary665659
+Node: Library Exercises667060
+Node: Sample Programs668340
+Node: Running Examples669110
+Node: Clones669838
+Node: Cut Program671062
+Node: Egrep Program680792
+Ref: Egrep Program-Footnote-1688294
+Node: Id Program688404
+Node: Split Program692048
+Ref: Split Program-Footnote-1695494
+Node: Tee Program695622
+Node: Uniq Program698409
+Node: Wc Program705830
+Ref: Wc Program-Footnote-1710078
+Node: Miscellaneous Programs710170
+Node: Dupword Program711383
+Node: Alarm Program713414
+Node: Translate Program718218
+Ref: Translate Program-Footnote-1722791
+Ref: Translate Program-Footnote-2723061
+Node: Labels Program723200
+Ref: Labels Program-Footnote-1726549
+Node: Word Sorting726633
+Node: History Sorting730703
+Node: Extract Program732539
+Node: Simple Sed740071
+Node: Igawk Program743133
+Ref: Igawk Program-Footnote-1757459
+Ref: Igawk Program-Footnote-2757660
+Ref: Igawk Program-Footnote-3757782
+Node: Anagram Program757897
+Node: Signature Program760959
+Node: Programs Summary762206
+Node: Programs Exercises763399
+Ref: Programs Exercises-Footnote-1767530
+Node: Advanced Features767621
+Node: Nondecimal Data769569
+Node: Array Sorting771159
+Node: Controlling Array Traversal771856
+Ref: Controlling Array Traversal-Footnote-1780187
+Node: Array Sorting Functions780305
+Ref: Array Sorting Functions-Footnote-1784197
+Node: Two-way I/O784391
+Ref: Two-way I/O-Footnote-1789335
+Ref: Two-way I/O-Footnote-2789521
+Node: TCP/IP Networking789603
+Node: Profiling792444
+Node: Advanced Features Summary799995
+Node: Internationalization801856
+Node: I18N and L10N803336
+Node: Explaining gettext804022
+Ref: Explaining gettext-Footnote-1809048
+Ref: Explaining gettext-Footnote-2809232
+Node: Programmer i18n809397
+Ref: Programmer i18n-Footnote-1814191
+Node: Translator i18n814240
+Node: String Extraction815034
+Ref: String Extraction-Footnote-1816167
+Node: Printf Ordering816253
+Ref: Printf Ordering-Footnote-1819035
+Node: I18N Portability819099
+Ref: I18N Portability-Footnote-1821548
+Node: I18N Example821611
+Ref: I18N Example-Footnote-1824317
+Node: Gawk I18N824389
+Node: I18N Summary825027
+Node: Debugger826366
+Node: Debugging827388
+Node: Debugging Concepts827829
+Node: Debugging Terms829685
+Node: Awk Debugging832282
+Node: Sample Debugging Session833174
+Node: Debugger Invocation833694
+Node: Finding The Bug835030
+Node: List of Debugger Commands841509
+Node: Breakpoint Control842841
+Node: Debugger Execution Control846505
+Node: Viewing And Changing Data849865
+Node: Execution Stack853223
+Node: Debugger Info854736
+Node: Miscellaneous Debugger Commands858730
+Node: Readline Support863914
+Node: Limitations864806
+Node: Debugging Summary867079
+Node: Arbitrary Precision Arithmetic868247
+Node: Computer Arithmetic869734
+Ref: Computer Arithmetic-Footnote-1874121
+Node: Math Definitions874178
+Ref: table-ieee-formats877467
+Ref: Math Definitions-Footnote-1878007
+Node: MPFR features878110
+Node: FP Math Caution879727
+Ref: FP Math Caution-Footnote-1880777
+Node: Inexactness of computations881146
+Node: Inexact representation882094
+Node: Comparing FP Values883449
+Node: Errors accumulate884413
+Node: Getting Accuracy885846
+Node: Try To Round888505
+Node: Setting precision889404
+Ref: table-predefined-precision-strings890086
+Node: Setting the rounding mode891879
+Ref: table-gawk-rounding-modes892243
+Ref: Setting the rounding mode-Footnote-1895697
+Node: Arbitrary Precision Integers895876
+Ref: Arbitrary Precision Integers-Footnote-1899649
+Node: POSIX Floating Point Problems899798
+Ref: POSIX Floating Point Problems-Footnote-1903674
+Node: Floating point summary903712
+Node: Dynamic Extensions905916
+Node: Extension Intro907468
+Node: Plugin License908733
+Node: Extension Mechanism Outline909418
+Ref: figure-load-extension909842
+Ref: figure-load-new-function911327
+Ref: figure-call-new-function912329
+Node: Extension API Description914313
+Node: Extension API Functions Introduction915763
+Node: General Data Types920630
+Ref: General Data Types-Footnote-1926323
+Node: Requesting Values926622
+Ref: table-value-types-returned927359
+Node: Memory Allocation Functions928317
+Ref: Memory Allocation Functions-Footnote-1931064
+Node: Constructor Functions931160
+Node: Registration Functions932918
+Node: Extension Functions933603
+Node: Exit Callback Functions935905
+Node: Extension Version String937153
+Node: Input Parsers937803
+Node: Output Wrappers947617
+Node: Two-way processors952133
+Node: Printing Messages954337
+Ref: Printing Messages-Footnote-1955414
+Node: Updating `ERRNO'955566
+Node: Accessing Parameters956305
+Node: Symbol Table Access957535
+Node: Symbol table by name958049
+Node: Symbol table by cookie960025
+Ref: Symbol table by cookie-Footnote-1964158
+Node: Cached values964221
+Ref: Cached values-Footnote-1967725
+Node: Array Manipulation967816
+Ref: Array Manipulation-Footnote-1968914
+Node: Array Data Types968953
+Ref: Array Data Types-Footnote-1971656
+Node: Array Functions971748
+Node: Flattening Arrays975622
+Node: Creating Arrays982474
+Node: Extension API Variables987205
+Node: Extension Versioning987841
+Node: Extension API Informational Variables989742
+Node: Extension API Boilerplate990828
+Node: Finding Extensions994632
+Node: Extension Example995192
+Node: Internal File Description995922
+Node: Internal File Ops1000013
+Ref: Internal File Ops-Footnote-11011445
+Node: Using Internal File Ops1011585
+Ref: Using Internal File Ops-Footnote-11013932
+Node: Extension Samples1014200
+Node: Extension Sample File Functions1015724
+Node: Extension Sample Fnmatch1023292
+Node: Extension Sample Fork1024774
+Node: Extension Sample Inplace1025987
+Node: Extension Sample Ord1027662
+Node: Extension Sample Readdir1028498
+Ref: table-readdir-file-types1029354
+Node: Extension Sample Revout1030153
+Node: Extension Sample Rev2way1030744
+Node: Extension Sample Read write array1031485
+Node: Extension Sample Readfile1033364
+Node: Extension Sample API Tests1034464
+Node: Extension Sample Time1034989
+Node: gawkextlib1036304
+Node: Extension summary1039117
+Node: Extension Exercises1042810
+Node: Language History1043532
+Node: V7/SVR3.11045175
+Node: SVR41047495
+Node: POSIX1048937
+Node: BTL1050323
+Node: POSIX/GNU1051057
+Node: Feature History1056833
+Node: Common Extensions1069924
+Node: Ranges and Locales1071236
+Ref: Ranges and Locales-Footnote-11075853
+Ref: Ranges and Locales-Footnote-21075880
+Ref: Ranges and Locales-Footnote-31076114
+Node: Contributors1076335
+Node: History summary1081760
+Node: Installation1083129
+Node: Gawk Distribution1084080
+Node: Getting1084564
+Node: Extracting1085388
+Node: Distribution contents1087030
+Node: Unix Installation1092800
+Node: Quick Installation1093417
+Node: Additional Configuration Options1095859
+Node: Configuration Philosophy1097597
+Node: Non-Unix Installation1099948
+Node: PC Installation1100406
+Node: PC Binary Installation1101717
+Node: PC Compiling1103565
+Ref: PC Compiling-Footnote-11106564
+Node: PC Testing1106669
+Node: PC Using1107845
+Node: Cygwin1111997
+Node: MSYS1112806
+Node: VMS Installation1113304
+Node: VMS Compilation1114100
+Ref: VMS Compilation-Footnote-11115322
+Node: VMS Dynamic Extensions1115380
+Node: VMS Installation Details1116753
+Node: VMS Running1119005
+Node: VMS GNV1121839
+Node: VMS Old Gawk1122562
+Node: Bugs1123032
+Node: Other Versions1127036
+Node: Installation summary1133260
+Node: Notes1134316
+Node: Compatibility Mode1135181
+Node: Additions1135963
+Node: Accessing The Source1136888
+Node: Adding Code1138324
+Node: New Ports1144502
+Node: Derived Files1148983
+Ref: Derived Files-Footnote-11154458
+Ref: Derived Files-Footnote-21154492
+Ref: Derived Files-Footnote-31155088
+Node: Future Extensions1155202
+Node: Implementation Limitations1155808
+Node: Extension Design1157056
+Node: Old Extension Problems1158210
+Ref: Old Extension Problems-Footnote-11159727
+Node: Extension New Mechanism Goals1159784
+Ref: Extension New Mechanism Goals-Footnote-11163144
+Node: Extension Other Design Decisions1163333
+Node: Extension Future Growth1165439
+Node: Old Extension Mechanism1166275
+Node: Notes summary1168037
+Node: Basic Concepts1169223
+Node: Basic High Level1169904
+Ref: figure-general-flow1170176
+Ref: figure-process-flow1170775
+Ref: Basic High Level-Footnote-11174004
+Node: Basic Data Typing1174189
+Node: Glossary1177517
+Node: Copying1202669
+Node: GNU Free Documentation License1240225
+Node: Index1265361

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 17972c7a..2e7efca5 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -53,11 +53,16 @@
@c applies to and all the info about who's publishing this edition
@c These apply across the board.
-@set UPDATE-MONTH August, 2014
+@set UPDATE-MONTH September, 2014
@set VERSION 4.1
-@set PATCHLEVEL 1
+@set PATCHLEVEL 2
+@ifset FOR_PRINT
+@set TITLE Effective AWK Programming
+@end ifset
+@ifclear FOR_PRINT
@set TITLE GAWK: Effective AWK Programming
+@end ifclear
@set SUBTITLE A User's Guide for GNU Awk
@set EDITION 4.1
@@ -560,8 +565,8 @@ particular records in a file and perform operations upon them.
* Regexp Field Splitting:: Using regexps as the field separator.
* Single Character Fields:: Making each character a separate
field.
-* Command Line Field Separator:: Setting @code{FS} from the
- command line.
+* Command Line Field Separator:: Setting @code{FS} from the command
+ line.
* Full Line Fields:: Making the full line be a single
field.
* Field Splitting Summary:: Some final points and a summary table.
@@ -605,10 +610,12 @@ particular records in a file and perform operations upon them.
* Printf Examples:: Several examples.
* Redirection:: How to redirect output to multiple
files and pipes.
+* Special FD:: Special files for I/O.
* Special Files:: File name interpretation in
@command{gawk}. @command{gawk} allows
access to inherited file descriptors.
-* Special FD:: Special files for I/O.
+* Other Inherited Files:: Accessing other open files with
+ @command{gawk}.
* Special Network:: Special files for network
communications.
* Special Caveats:: Things to watch out for.
@@ -721,12 +728,12 @@ particular records in a file and perform operations upon them.
elements.
* Controlling Scanning:: Controlling the order in which arrays
are scanned.
-* Delete:: The @code{delete} statement removes an
- element from an array.
* Numeric Array Subscripts:: How to use numbers as subscripts in
@command{awk}.
* Uninitialized Subscripts:: Using Uninitialized variables as
subscripts.
+* Delete:: The @code{delete} statement removes an
+ element from an array.
* Multidimensional:: Emulating multidimensional arrays in
@command{awk}.
* Multiscanning:: Scanning multidimensional arrays.
@@ -1088,7 +1095,7 @@ books on Unix, I found the gray AWK book, a.k.a.@: Aho, Kernighan and
Weinberger, @cite{The AWK Programming Language}, Addison-Wesley,
1988. AWK's simple programming paradigm---find a pattern in the
input and then perform an action---often reduced complex or tedious
-data manipulations to few lines of code. I was excited to try my
+data manipulations to a few lines of code. I was excited to try my
hand at programming in AWK.
Alas, the @command{awk} on my computer was a limited version of the
@@ -1222,7 +1229,7 @@ March, 2001
<affiliation><jobtitle>Nof Ayalon</jobtitle></affiliation>
<affiliation><jobtitle>ISRAEL</jobtitle></affiliation>
</author>
- <date>June, 2014</date>
+ <date>December, 2014</date>
</prefaceinfo>
@end docbook
@@ -1244,7 +1251,7 @@ and with the Unix version of @command{awk} maintained
by Brian Kernighan.
This means that all
properly written @command{awk} programs should work with @command{gawk}.
-Thus, we usually don't distinguish between @command{gawk} and other
+So most of the time, we don't distinguish between @command{gawk} and other
@command{awk} implementations.
@cindex @command{awk}, POSIX and, See Also POSIX @command{awk}
@@ -1291,15 +1298,15 @@ Sort data
Perform simple network communications
@item
-Profile and debug @command{awk} programs.
+Profile and debug @command{awk} programs
@item
-Extend the language with functions written in C or C++.
+Extend the language with functions written in C or C++
@end itemize
This @value{DOCUMENT} teaches you about the @command{awk} language and
how you can use it effectively. You should already be familiar with basic
-system commands, such as @command{cat} and @command{ls},@footnote{These commands
+system commands, such as @command{cat} and @command{ls},@footnote{These utilities
are available on POSIX-compliant systems, as well as on traditional
Unix-based systems. If you are using some other operating system, you still need to
be familiar with the ideas of I/O redirection and pipes.} as well as basic shell
@@ -1321,10 +1328,9 @@ Microsoft Windows
@ifclear FOR_PRINT
(all versions) and OS/2 PCs,
@end ifclear
-and OpenVMS.
-(Some other, obsolete systems to which @command{gawk} was once ported
-are no longer supported and the code for those systems
-has been removed.)
+and OpenVMS.@footnote{Some other, obsolete systems to which @command{gawk}
+was once ported are no longer supported and the code for those systems
+has been removed.}
@menu
* History:: The history of @command{gawk} and
@@ -1516,7 +1522,7 @@ All appear in the index, under the heading ``sidebar.''
Most of the time, the examples use complete @command{awk} programs.
Some of the more advanced sections show only the part of the @command{awk}
-program that illustrates the concept currently being described.
+program that illustrates the concept being described.
While this @value{DOCUMENT} is aimed principally at people who have not been
exposed
@@ -1574,9 +1580,9 @@ sorting arrays in @command{gawk}. It also describes how @command{gawk}
provides arrays of arrays.
@ref{Functions},
-describes the built-in functions @command{awk} and
-@command{gawk} provide, as well as how to define
-your own functions.
+describes the built-in functions @command{awk} and @command{gawk} provide,
+as well as how to define your own functions. It also discusses how
+@command{gawk} lets you call functions indirectly.
Part II shows how to use @command{awk} and @command{gawk} for problem solving.
There is lots of code here for you to read and learn from.
@@ -1649,9 +1655,10 @@ printed edition. You may find them online, as follows:
@uref{http://www.gnu.org/software/gawk/manual/html_node/Notes.html,
The appendix on implementation notes}
-describes how to disable @command{gawk}'s extensions, as
-well as how to contribute new code to @command{gawk},
-and some possible future directions for @command{gawk} development.
+describes how to disable @command{gawk}'s extensions, how to contribute
+new code to @command{gawk}, where to find information on some possible
+future directions for @command{gawk} development, and the design decisions
+behind the extension API.
@uref{http://www.gnu.org/software/gawk/manual/html_node/Basic-Concepts.html,
The appendix on basic concepts}
@@ -1669,7 +1676,7 @@ The GNU FDL}
is the license that covers this @value{DOCUMENT}.
Some of the chapters have exercise sections; these have also been
-omitted from the print edition.
+omitted from the print edition but are available online.
@end ifset
@ifclear FOR_PRINT
@@ -1892,7 +1899,7 @@ The FSF published the first two editions under
the title @cite{The GNU Awk User's Guide}.
@ifset FOR_PRINT
SSC published two editions of the @value{DOCUMENT} under the
-title @cite{Effective awk Programming}, and in O'Reilly published
+title @cite{Effective awk Programming}, and O'Reilly published
the third edition in 2001.
@end ifset
@@ -1924,7 +1931,7 @@ for information on submitting problem reports electronically.
@unnumberedsec How to Stay Current
It may be you have a version of @command{gawk} which is newer than the
-one described in this @value{DOCUMENT}. To find out what has changed,
+one described here. To find out what has changed,
you should first look at the @file{NEWS} file in the @command{gawk}
distribution, which provides a high level summary of what changed in
each release.
@@ -2146,7 +2153,7 @@ take advantage of those opportunities.
Arnold Robbins @*
Nof Ayalon @*
ISRAEL @*
-May, 2014
+December, 2014
@end iftex
@ifnotinfo
@@ -2365,7 +2372,7 @@ to keep you from worrying about the complexities of computer
programming:
@example
-$ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"}
+$ @kbd{awk 'BEGIN @{ print "Don\47t Panic!" @}'}
@print{} Don't Panic!
@end example
@@ -2373,11 +2380,11 @@ $ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"}
reading any input. If there are no other statements in your program,
as is the case here, @command{awk} just stops, instead of trying to read
input it doesn't know how to process.
-The @samp{\47} is a magic way of getting a single quote into
+The @samp{\47} is a magic way (explained later) of getting a single quote into
the program, without having to engage in ugly shell quoting tricks.
@quotation NOTE
-As a side note, if you use Bash as your shell, you should execute the
+If you use Bash as your shell, you should execute the
command @samp{set +H} before running this program interactively, to
disable the C shell-style command history, which treats @samp{!} as a
special character. We recommend putting this command into your personal
@@ -2407,7 +2414,7 @@ $ @kbd{awk '@{ print @}'}
@cindex @command{awk} programs, running
@cindex @command{awk} programs, lengthy
@cindex files, @command{awk} programs in
-Sometimes your @command{awk} programs can be very long. In this case, it is
+Sometimes @command{awk} programs are very long. In these cases, it is
more convenient to put the program into a separate file. In order to tell
@command{awk} to use that file for its program, you type:
@@ -2437,7 +2444,7 @@ awk -f advice
does the same thing as this one:
@example
-awk "BEGIN @{ print \"Don't Panic!\" @}"
+awk 'BEGIN @{ print "Don\47t Panic!" @}'
@end example
@cindex quoting in @command{gawk} command lines
@@ -2449,6 +2456,8 @@ specify with @option{-f}, because most @value{FN}s don't contain any of the shel
special characters. Notice that in @file{advice}, the @command{awk}
program did not have single quotes around it. The quotes are only needed
for programs that are provided on the @command{awk} command line.
+(Also, placing the program in a file allows us to use a literal single quote in the program
+text, instead of the magic @samp{\47}.)
@c STARTOFRANGE sq1x
@cindex single quote (@code{'}) in @command{gawk} command lines
@@ -2512,7 +2521,7 @@ written in @command{awk}.
according to the instructions in your program. (This is different
from a @dfn{compiled} language such as C, where your program is first
compiled into machine code that is executed directly by your system's
-hardware.) The @command{awk} utility is thus termed an @dfn{interpreter}.
+processor.) The @command{awk} utility is thus termed an @dfn{interpreter}.
Many modern languages are interperted.
The line beginning with @samp{#!} lists the full @value{FN} of an
@@ -2521,9 +2530,9 @@ to pass to that interpreter. The operating system then runs the
interpreter with the given argument and the full argument list of the
executed program. The first argument in the list is the full @value{FN}
of the @command{awk} program. The rest of the argument list contains
-either options to @command{awk}, or @value{DF}s, or both. Note that on
+either options to @command{awk}, or @value{DF}s, or both. (Note that on
many systems @command{awk} may be found in @file{/usr/bin} instead of
-in @file{/bin}. Caveat Emptor.
+in @file{/bin}.)
Some systems limit the length of the interpreter name to 32 characters.
Often, this can be dealt with by using a symbolic link.
@@ -2562,7 +2571,7 @@ to provide your script name.
according to the instructions in your program. (This is different
from a @dfn{compiled} language such as C, where your program is first
compiled into machine code that is executed directly by your system's
-hardware.) The @command{awk} utility is thus termed an @dfn{interpreter}.
+processor.) The @command{awk} utility is thus termed an @dfn{interpreter}.
Many modern languages are interperted.
The line beginning with @samp{#!} lists the full @value{FN} of an
@@ -2571,9 +2580,9 @@ to pass to that interpreter. The operating system then runs the
interpreter with the given argument and the full argument list of the
executed program. The first argument in the list is the full @value{FN}
of the @command{awk} program. The rest of the argument list contains
-either options to @command{awk}, or @value{DF}s, or both. Note that on
+either options to @command{awk}, or @value{DF}s, or both. (Note that on
many systems @command{awk} may be found in @file{/usr/bin} instead of
-in @file{/bin}. Caveat Emptor.
+in @file{/bin}.)
Some systems limit the length of the interpreter name to 32 characters.
Often, this can be dealt with by using a symbolic link.
@@ -2752,8 +2761,14 @@ Thus, the example seen
@ifnotinfo
previously
@end ifnotinfo
-in @ref{Read Terminal},
-is applicable:
+in @ref{Read Terminal}:
+
+@example
+awk 'BEGIN @{ print "Don\47t Panic!" @}'
+@end example
+
+@noindent
+could instead be written this way:
@example
$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"}
@@ -2848,6 +2863,9 @@ $ awk -v sq="'" 'BEGIN @{ print "Here is a single quote <" sq ">" @}'
@print{} Here is a single quote <'>
@end example
+(Here, the two string constants and the value of @code{sq} are concatenated
+into a single string which is printed by @code{print}.)
+
If you really need both single and double quotes in your @command{awk}
program, it is probably best to move it into a separate file, where
the shell won't be part of the picture, and you can say what you mean.
@@ -2911,7 +2929,7 @@ The second @value{DF}, called @file{inventory-shipped}, contains
information about monthly shipments. In both files,
each line is considered to be one @dfn{record}.
-In the @value{DF} @file{mail-list}, each record contains the name of a person,
+In @file{mail-list}, each record contains the name of a person,
his/her phone number, his/her email-address, and a code for their relationship
with the author of the list.
The columns are aligned using spaces.
@@ -3071,7 +3089,7 @@ Print the length of the longest line in @file{data}:
@example
expand data | awk '@{ if (x < length($0)) x = length($0) @}
- END @{ print "maximum line length is " x @}'
+ END @{ print "maximum line length is " x @}'
@end example
This example differs slightly from the previous one:
@@ -3103,7 +3121,7 @@ Print the total number of bytes used by @var{files}:
@example
ls -l @var{files} | awk '@{ x += $5 @}
- END @{ print "total bytes: " x @}'
+ END @{ print "total bytes: " x @}'
@end example
@item
@@ -3147,7 +3165,7 @@ the program would print the odd-numbered lines.
@cindex @command{awk} programs
The @command{awk} utility reads the input files one line at a
-time. For each line, @command{awk} tries the patterns of each of the rules.
+time. For each line, @command{awk} tries the patterns of each rule.
If several patterns match, then several actions execute in the order in
which they appear in the @command{awk} program. If no patterns match, then
no actions run.
@@ -3155,7 +3173,7 @@ no actions run.
After processing all the rules that match the line (and perhaps there are none),
@command{awk} reads the next line. (However,
@pxref{Next Statement},
-and also @pxref{Nextfile Statement}).
+and also @pxref{Nextfile Statement}.)
This continues until the program reaches the end of the file.
For example, the following @command{awk} program contains two rules:
@@ -3229,13 +3247,12 @@ the file was last modified. Its output looks like this:
@noindent
@cindex line continuations, with C shell
The first field contains read-write permissions, the second field contains
-the number of links to the file, and the third field identifies the owner of
-the file. The fourth field identifies the group of the file.
-The fifth field contains the size of the file in bytes. The
+the number of links to the file, and the third field identifies the file's owner.
+The fourth field identifies the file's group.
+The fifth field contains the file's size in bytes. The
sixth, seventh, and eighth fields contain the month, day, and time,
respectively, that the file was last modified. Finally, the ninth field
-contains the @value{FN}.@footnote{The @samp{LC_ALL=C} is
-needed to produce this traditional-style output from @command{ls}.}
+contains the @value{FN}.
@c @cindex automatic initialization
@cindex initialization, automatic
@@ -3645,7 +3662,7 @@ more than once, setting another variable each time, like this:
Using @option{-v} to set the values of the built-in
variables may lead to surprising results. @command{awk} will reset the
values of those variables as it needs to, possibly ignoring any
-predefined value you may have given.
+initial value you may have given.
@end quotation
@item -W @var{gawk-opt}
@@ -3728,7 +3745,7 @@ Print the short version of the General Public License and then exit.
@cindex variables, global, printing list of
Print a sorted list of global variables, their types, and final values
to @var{file}. If no @var{file} is provided, print this
-list to the file named @file{awkvars.out} in the current directory.
+list to a file named @file{awkvars.out} in the current directory.
No space is allowed between the @option{-d} and @var{file}, if
@var{file} is supplied.
@@ -3824,7 +3841,7 @@ that @command{gawk} accepts and then exit.
@cindex @option{-i} option
@cindex @option{--include} option
@cindex @command{awk} programs, location of
-Read @command{awk} source library from @var{source-file}. This option
+Read an @command{awk} source library from @var{source-file}. This option
is completely equivalent to using the @code{@@include} directive inside
your program. This option is very similar to the @option{-f} option,
but there are two important differences. First, when @option{-i} is
@@ -3848,7 +3865,7 @@ environment variable. The correct library suffix for your platform will be
supplied by default, so it need not be specified in the extension name.
The extension initialization routine should be named @code{dl_load()}.
An alternative is to use the @code{@@load} keyword inside the program to load
-a shared library. This feature is described in detail in @ref{Dynamic Extensions}.
+a shared library. This advanced feature is described in detail in @ref{Dynamic Extensions}.
@item @option{-L}[@var{value}]
@itemx @option{--lint}[@code{=}@var{value}]
@@ -3897,6 +3914,8 @@ values in input data
@quotation CAUTION
This option can severely break old programs.
Use with care.
+
+This option may disappear in a future version of @command{gawk}.
@end quotation
@item @option{-N}
@@ -4060,6 +4079,7 @@ if they had been concatenated together into one big file. This is
useful for creating libraries of @command{awk} functions. These functions
can be written once and then retrieved from a standard place, instead
of having to be included into each individual program.
+The @option{-i} option is similar in this regard.
(As mentioned in
@ref{Definition Syntax},
function names must be unique.)
@@ -4133,15 +4153,18 @@ Any additional arguments on the command line are normally treated as
input files to be processed in the order specified. However, an
argument that has the form @code{@var{var}=@var{value}}, assigns
the value @var{value} to the variable @var{var}---it does not specify a
-file at all.
-(See
-@ref{Assignment Options}.)
+file at all. (See @ref{Assignment Options}.) In the following example,
+@var{count=1} is a variable assignment, not a @value{FN}:
+
+@example
+awk -f program.awk file1 count=1 file2
+@end example
@cindex @command{gawk}, @code{ARGIND} variable in
@cindex @code{ARGIND} variable, command-line arguments
@cindex @code{ARGV} array, indexing into
@cindex @code{ARGC}/@code{ARGV} variables, command-line arguments
-All these arguments are made available to your @command{awk} program in the
+All the command-line arguments are made available to your @command{awk} program in the
@code{ARGV} array (@pxref{Built-in Variables}). Command-line options
and the program text (if present) are omitted from @code{ARGV}.
All other arguments, including variable assignments, are
@@ -4272,15 +4295,15 @@ separated by colons@footnote{Semicolons on MS-Windows and MS-DOS.}. @command{ga
@samp{.:/usr/local/share/awk}.@footnote{Your version of @command{gawk}
may use a different directory; it
will depend upon how @command{gawk} was built and installed. The actual
-directory is the value of @samp{$(datadir)} generated when
+directory is the value of @code{$(datadir)} generated when
@command{gawk} was configured. You probably don't need to worry about this,
though.}
The search path feature is particularly helpful for building libraries
of useful @command{awk} functions. The library files can be placed in a
standard directory in the default path and then specified on
-the command line with a short @value{FN}. Otherwise, the full @value{FN}
-would have to be typed for each file.
+the command line with a short @value{FN}. Otherwise, you would have to
+type the full @value{FN} for each file.
By using the @option{-i} option, or the @option{-e} and @option{-f} options, your command-line
@command{awk} programs can use facilities in @command{awk} library files
@@ -4289,25 +4312,23 @@ Path searching is not done if @command{gawk} is in compatibility mode.
This is true for both @option{--traditional} and @option{--posix}.
@xref{Options}.
-If the source code is not found after the initial search, the path is searched
+If the source code file is not found after the initial search, the path is searched
again after adding the default @samp{.awk} suffix to the @value{FN}.
-@quotation NOTE
-@c 4/2014:
-@c using @samp{.} to get quotes, since @file{} no longer supplies them.
-To include
-the current directory in the path, either place
-@samp{.} explicitly in the path or write a null entry in the
-path. (A null entry is indicated by starting or ending the path with a
-colon or by placing two colons next to each other [@samp{::}].)
-This path search mechanism is similar
+@command{gawk}'s path search mechanism is similar
to the shell's.
(See @uref{http://www.gnu.org/software/bash/manual/,
-@cite{The Bourne-Again SHell manual}.})
+@cite{The Bourne-Again SHell manual}}.)
+It treats a null entry in the path as indicating the current
+directory.
+(A null entry is indicated by starting or ending the path with a
+colon or by placing two colons next to each other [@samp{::}].)
-However, @command{gawk} always looks in the current directory @emph{before}
-searching @env{AWKPATH}, so there is no real reason to include
-the current directory in the search path.
+@quotation NOTE
+@command{gawk} always looks in the current directory @emph{before}
+searching @env{AWKPATH}. Thus, while you can include the current directory
+in the search path, either explicitly or with a null entry, there is no
+real reason to do so.
@c Prior to 4.0, gawk searched the current directory after the
@c path search, but it's not worth documenting it.
@end quotation
@@ -4348,16 +4369,6 @@ behavior, but they are more specialized. Those in the following
list are meant to be used by regular users.
@table @env
-@item POSIXLY_CORRECT
-Causes @command{gawk} to switch to POSIX compatibility
-mode, disabling all traditional and GNU extensions.
-@xref{Options}.
-
-@item GAWK_SOCK_RETRIES
-Controls the number of times @command{gawk} attempts to
-retry a two-way TCP/IP (socket) connection before giving up.
-@xref{TCP/IP Networking}.
-
@item GAWK_MSEC_SLEEP
Specifies the interval between connection retries,
in milliseconds. On systems that do not support
@@ -4368,6 +4379,16 @@ the value is rounded up to an integral number of seconds.
Specifies the time, in milliseconds, for @command{gawk} to
wait for input before returning with an error.
@xref{Read Timeout}.
+
+@item GAWK_SOCK_RETRIES
+Controls the number of times @command{gawk} attempts to
+retry a two-way TCP/IP (socket) connection before giving up.
+@xref{TCP/IP Networking}.
+
+@item POSIXLY_CORRECT
+Causes @command{gawk} to switch to POSIX compatibility
+mode, disabling all traditional and GNU extensions.
+@xref{Options}.
@end table
The environment variables in the following list are meant
@@ -4382,7 +4403,7 @@ file as the size of the memory buffer to allocate for I/O. Otherwise,
the value should be a number, and @command{gawk} uses that number as
the size of the buffer to allocate. (When this variable is not set,
@command{gawk} uses the smaller of the file's size and the ``default''
-blocksize, which is usually the filesystems I/O blocksize.)
+blocksize, which is usually the filesystem's I/O blocksize.)
@item AWK_HASH
If this variable exists with a value of @samp{gst}, @command{gawk}
@@ -4397,10 +4418,11 @@ for debugging problems on filesystems on non-POSIX operating systems
where I/O is performed in records, not in blocks.
@item GAWK_MSG_SRC
-If this variable exists, @command{gawk} includes the source file
-name and line number from which warning and/or fatal messages
+If this variable exists, @command{gawk} includes the file
+name and line number within the @command{gawk} source code
+from which warning and/or fatal messages
are generated. Its purpose is to help isolate the source of a
-message, since there can be multiple places which produce the
+message, since there are multiple places which produce the
same warning or error message.
@item GAWK_NO_DFA
@@ -4613,6 +4635,7 @@ that requires access to an extension.
@ref{Dynamic Extensions}, describes how to write extensions (in C or C++)
that can be loaded with either @code{@@load} or the @option{-l} option.
+It also describes the @code{ordchr} extension.
@node Obsolete
@section Obsolete Options and/or Features
@@ -4681,15 +4704,15 @@ awk '@{ sum += $1 @} END @{ print sum @}'
@end example
@command{gawk} actually supports this but it is purposely undocumented
-because it is considered bad style. The correct way to write such a program
-is either
+because it is bad style. The correct way to write such a program
+is either:
@example
awk '@{ sum += $1 @} ; END @{ print sum @}'
@end example
@noindent
-or
+or:
@example
awk '@{ sum += $1 @}
@@ -4697,8 +4720,7 @@ awk '@{ sum += $1 @}
@end example
@noindent
-@xref{Statements/Lines}, for a fuller
-explanation.
+@xref{Statements/Lines}, for a fuller explanation.
You can insert newlines after the @samp{;} in @code{for} loops.
This seems to have been a long-undocumented feature in Unix @command{awk}.
@@ -4738,7 +4760,8 @@ affects how @command{awk} processes input.
@item
You can use a single minus sign (@samp{-}) to refer to standard input
-on the command line.
+on the command line. @command{gawk} also lets you use the special
+@value{FN} @file{/dev/stdin}.
@item
@command{gawk} pays attention to a number of environment variables.
@@ -4927,7 +4950,7 @@ such as TAB or newline. While there is nothing to stop you from entering most
unprintable characters directly in a string constant or regexp constant,
they may look ugly.
-The following table lists
+The following list presents
all the escape sequences used in @command{awk} and
what they represent. Unless noted otherwise, all these escape
sequences apply to both string constants and regexp constants:
@@ -5043,13 +5066,13 @@ characters @samp{a+b}.
@cindex @code{\} (backslash), in escape sequences
@cindex portability
For complete portability, do not use a backslash before any character not
-shown in the previous list.
+shown in the previous list and that is not an operator.
To summarize:
@itemize @value{BULLET}
@item
-The escape sequences in the table above are always processed first,
+The escape sequences in the list above are always processed first,
for both string constants and regexp constants. This happens very early,
as soon as @command{awk} reads your program.
@@ -5222,7 +5245,7 @@ are recognized and converted into corresponding real characters as
the very first step in processing regexps.
Here is a list of metacharacters. All characters that are not escape
-sequences and that are not listed in the table stand for themselves:
+sequences and that are not listed in the following stand for themselves:
@c Use @asis so the docbook comes out ok. Sigh.
@table @asis
@@ -5479,7 +5502,7 @@ characters to be matched.
@cindex Extended Regular Expressions (EREs)
@cindex EREs (Extended Regular Expressions)
@cindex @command{egrep} utility
-This treatment of @samp{\} in bracket expressions
+The treatment of @samp{\} in bracket expressions
is compatible with other @command{awk}
implementations and is also mandated by POSIX.
The regular expressions in @command{awk} are a superset
@@ -5596,11 +5619,11 @@ Consider the following:
echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
@end example
-This example uses the @code{sub()} function (which we haven't discussed yet;
-@pxref{String Functions})
-to make a change to the input record. Here, the regexp @code{/a+/}
-indicates ``one or more @samp{a} characters,'' and the replacement
-text is @samp{<A>}.
+This example uses the @code{sub()} function to make a change to the input
+record. (@code{sub()} replaces the first instance of any text matched
+by the first argument with the string provided as the second argument;
+@pxref{String Functions}). Here, the regexp @code{/a+/} indicates ``one
+or more @samp{a} characters,'' and the replacement text is @samp{<A>}.
The input contains four @samp{a} characters.
@command{awk} (and POSIX) regular expressions always match
@@ -5716,7 +5739,7 @@ intend a regexp match.
@cindex regular expressions, dynamic, with embedded newlines
@cindex newlines, in dynamic regexps
-Some versions of @command{awk} do not allow the newline
+Some older versions of @command{awk} do not allow the newline
character to be used inside a bracket expression for a dynamic regexp:
@example
@@ -5725,7 +5748,7 @@ $ @kbd{awk '$0 ~ "[ \t\n]"'}
@error{} ]...
@error{} source line number 1
@error{} context is
-@error{} >>> <<<
+@error{} $0 ~ "[ >>> \t\n]" <<<
@end example
@cindex newlines, in regexp constants
@@ -5754,7 +5777,7 @@ occur often in practice, but it's worth noting for future reference.
@cindex regular expressions, dynamic, with embedded newlines
@cindex newlines, in dynamic regexps
-Some versions of @command{awk} do not allow the newline
+Some older versions of @command{awk} do not allow the newline
character to be used inside a bracket expression for a dynamic regexp:
@example
@@ -5763,7 +5786,7 @@ $ @kbd{awk '$0 ~ "[ \t\n]"'}
@error{} ]...
@error{} source line number 1
@error{} context is
-@error{} >>> <<<
+@error{} $0 ~ "[ >>> \t\n]" <<<
@end example
@cindex newlines, in regexp constants
@@ -6087,11 +6110,6 @@ Within bracket expressions, POSIX character classes let you specify
certain groups of characters in a locale-independent fashion.
@item
-@command{gawk}'s @code{IGNORECASE} variable lets you control the
-case sensitivity of regexp matching. In other @command{awk}
-versions, use @code{tolower()} or @code{toupper()}.
-
-@item
Regular expressions match the leftmost longest text in the string being
matched. This matters for cases where you need to know the extent of
the match, such as for text substitution and when the record separator
@@ -6101,6 +6119,11 @@ is a regexp.
Matching expressions may use dynamic regexps, that is, string values
treated as regular expressions.
+@item
+@command{gawk}'s @code{IGNORECASE} variable lets you control the
+case sensitivity of regexp matching. In other @command{awk}
+versions, use @code{tolower()} or @code{toupper()}.
+
@end itemize
@c ENDOFRANGE regexp
@@ -6168,7 +6191,7 @@ used with it do not have to be named on the @command{awk} command line
@command{awk} divides the input for your program into records and fields.
It keeps track of the number of records that have been read so far from
the current input file. This value is stored in a built-in variable
-called @code{FNR} which is reset to zero when a new file is started.
+called @code{FNR} which is reset to zero every time a new file is started.
Another built-in variable, @code{NR}, records the total number of input
records read so far from all @value{DF}s. It starts at zero, but is
never automatically reset to zero.
@@ -6298,7 +6321,8 @@ Using an unusual character such as @samp{/} is more likely to
produce correct behavior in the majority of cases, but there
are no guarantees. The moral is: Know Your Data.
-There is one unusual case, that occurs when @command{gawk} is
+When using regular characters as the record separator,
+there is one unusual case that occurs when @command{gawk} is
being fully POSIX-compliant (@pxref{Options}).
Then, the following (extreme) pipeline prints a surprising @samp{1}:
@@ -6387,7 +6411,7 @@ $ @kbd{echo record 1 AAAA record 2 BBBB record 3 |}
@noindent
The square brackets delineate the contents of @code{RT}, letting you
-see the leading and trailing whitespace. The final value of @code{RT}
+see the leading and trailing whitespace. The final value of
@code{RT} is a newline.
@xref{Simple Sed}, for a more useful example
of @code{RS} as a regexp and @code{RT}.
@@ -6406,7 +6430,7 @@ metacharacters match the beginning and end of a @emph{string}, and not
the beginning and end of a @emph{line}. As a result, something like
@samp{RS = "^[[:upper:]]"} can only match at the beginning of a file.
This is because @command{gawk} views the input file as one long string
-that happens to contain newline characters in it.
+that happens to contain newline characters.
It is thus best to avoid anchor characters in the value of @code{RS}.
@end quotation
@@ -6416,7 +6440,7 @@ variable are @command{gawk} extensions; they are not available in
compatibility mode
(@pxref{Options}).
In compatibility mode, only the first character of the value of
-@code{RS} is used to determine the end of the record.
+@code{RS} determines the end of the record.
@cindex sidebar, @code{RS = "\0"} Is Not Portable
@ifdocbook
@@ -6457,10 +6481,11 @@ about.} store strings internally as C-style strings. C strings use the
It happens that recent versions of @command{mawk} can use the @value{NUL}
character as a record separator. However, this is a special case:
@command{mawk} does not allow embedded @value{NUL} characters in strings.
+(This may change in a future version of @command{mawk}.)
@cindex records, treating files as
@cindex treating files, as single records
-@xref{Readfile Function}, for an interesting, portable way to read
+@xref{Readfile Function}, for an interesting way to read
whole files. If you are using @command{gawk}, see @ref{Extension Sample
Readfile}, for another option.
@@ -6507,10 +6532,11 @@ about.} store strings internally as C-style strings. C strings use the
It happens that recent versions of @command{mawk} can use the @value{NUL}
character as a record separator. However, this is a special case:
@command{mawk} does not allow embedded @value{NUL} characters in strings.
+(This may change in a future version of @command{mawk}.)
@cindex records, treating files as
@cindex treating files, as single records
-@xref{Readfile Function}, for an interesting, portable way to read
+@xref{Readfile Function}, for an interesting way to read
whole files. If you are using @command{gawk}, see @ref{Extension Sample
Readfile}, for another option.
@end cartouche
@@ -6592,15 +6618,11 @@ $ @kbd{awk '$1 ~ /li/ @{ print $0 @}' mail-list}
@noindent
This example prints each record in the file @file{mail-list} whose first
-field contains the string @samp{li}. The operator @samp{~} is called a
-@dfn{matching operator}
-(@pxref{Regexp Usage});
-it tests whether a string (here, the field @code{$1}) matches a given regular
-expression.
+field contains the string @samp{li}.
-By contrast, the following example
-looks for @samp{li} in @emph{the entire record} and prints the first
-field and the last field for each matching input record:
+By contrast, the following example looks for @samp{li} in @emph{the
+entire record} and prints the first and last fields for each matching
+input record:
@example
$ @kbd{awk '/li/ @{ print $1, $NF @}' mail-list}
@@ -6723,8 +6745,8 @@ It is also possible to also assign contents to fields that are out
of range. For example:
@example
-$ awk '@{ $6 = ($5 + $4 + $3 + $2)
-> print $6 @}' inventory-shipped
+$ @kbd{awk '@{ $6 = ($5 + $4 + $3 + $2)}
+> @kbd{ print $6 @}' inventory-shipped}
@print{} 168
@print{} 297
@print{} 301
@@ -6813,7 +6835,7 @@ Here is an example:
@example
$ echo a b c d e f | awk '@{ print "NF =", NF;
-> NF = 3; print $0 @}'
+> NF = 3; print $0 @}'
@print{} NF = 6
@print{} a b c
@end example
@@ -6821,7 +6843,7 @@ $ echo a b c d e f | awk '@{ print "NF =", NF;
@cindex portability, @code{NF} variable@comma{} decrementing
@quotation CAUTION
Some versions of @command{awk} don't
-rebuild @code{$0} when @code{NF} is decremented. Caveat emptor.
+rebuild @code{$0} when @code{NF} is decremented.
@end quotation
Finally, there are times when it is convenient to force
@@ -6857,7 +6879,7 @@ record, exactly as it was read from the input. This includes
any leading or trailing whitespace, and the exact whitespace (or other
characters) that separate the fields.
-It is a not-uncommon error to try to change the field separators
+It is a common error to try to change the field separators
in a record simply by setting @code{FS} and @code{OFS}, and then
expecting a plain @samp{print} or @samp{print $0} to print the
modified record.
@@ -6882,7 +6904,7 @@ record, exactly as it was read from the input. This includes
any leading or trailing whitespace, and the exact whitespace (or other
characters) that separate the fields.
-It is a not-uncommon error to try to change the field separators
+It is a common error to try to change the field separators
in a record simply by setting @code{FS} and @code{OFS}, and then
expecting a plain @samp{print} or @samp{print $0} to print the
modified record.
@@ -7086,9 +7108,10 @@ $ @kbd{echo ' a b c d' | awk '@{ print; $2 = $2; print @}'}
The first @code{print} statement prints the record as it was read,
with leading whitespace intact. The assignment to @code{$2} rebuilds
@code{$0} by concatenating @code{$1} through @code{$NF} together,
-separated by the value of @code{OFS}. Because the leading whitespace
-was ignored when finding @code{$1}, it is not part of the new @code{$0}.
-Finally, the last @code{print} statement prints the new @code{$0}.
+separated by the value of @code{OFS} (which is a space by default).
+Because the leading whitespace was ignored when finding @code{$1},
+it is not part of the new @code{$0}. Finally, the last @code{print}
+statement prints the new @code{$0}.
@cindex @code{FS}, containing @code{^}
@cindex @code{^} (caret), in @code{FS}
@@ -7110,7 +7133,7 @@ also works this way. For example:
@example
$ @kbd{echo 'xxAA xxBxx C' |}
> @kbd{gawk -F '(^x+)|( +)' '@{ for (i = 1; i <= NF; i++)}
-> @kbd{printf "-->%s<--\n", $i @}'}
+> @kbd{ printf "-->%s<--\n", $i @}'}
@print{} --><--
@print{} -->AA<--
@print{} -->xxBxx<--
@@ -7173,12 +7196,7 @@ awk -F, '@var{program}' @var{input-files}
@noindent
sets @code{FS} to the @samp{,} character. Notice that the option uses
an uppercase @samp{F} instead of a lowercase @samp{f}. The latter
-option (@option{-f}) specifies a file
-containing an @command{awk} program. Case is significant in command-line
-options:
-the @option{-F} and @option{-f} options have nothing to do with each other.
-You can use both options at the same time to set the @code{FS} variable
-@emph{and} get an @command{awk} program from a file.
+option (@option{-f}) specifies a file containing an @command{awk} program.
The value used for the argument to @option{-F} is processed in exactly the
same way as assignments to the built-in variable @code{FS}.
@@ -7292,7 +7310,7 @@ to @code{FS} (the backslash is stripped). This creates a regexp meaning
If instead you want fields to be separated by a literal period followed
by any single character, use @samp{FS = "\\.."}.
-The following table summarizes how fields are split, based on the value
+The following list summarizes how fields are split, based on the value
of @code{FS} (@samp{==} means ``is equal to''):
@table @code
@@ -7313,8 +7331,7 @@ Leading and trailing matches of @var{regexp} delimit empty fields.
@item FS == ""
Each individual character in the record becomes a separate field.
-(This is a @command{gawk} extension; it is not specified by the
-POSIX standard.)
+(This is a common extension; it is not specified by the POSIX standard.)
@end table
@cindex sidebar, Changing @code{FS} Does Not Affect the Fields
@@ -7861,7 +7878,7 @@ BEGIN @{ RS = "" ; FS = "\n" @}
Running the program produces the following output:
@example
-$ awk -f addrs.awk addresses
+$ @kbd{awk -f addrs.awk addresses}
@print{} Name is: Jane Doe
@print{} Address is: 123 Main Street
@print{} City and State are: Anywhere, SE 12345-6789
@@ -7873,12 +7890,9 @@ $ awk -f addrs.awk addresses
@dots{}
@end example
-@xref{Labels Program}, for a more realistic
-program that deals with address lists.
-The following
-table
-summarizes how records are split, based on the
-value of
+@xref{Labels Program}, for a more realistic program that deals with
+address lists. The following list summarizes how records are split,
+based on the value of
@ifinfo
@code{RS}.
(@samp{==} means ``is equal to.'')
@@ -7913,8 +7927,8 @@ POSIX standard.)
@cindex @command{gawk}, @code{RT} variable in
@cindex @code{RT} variable
-In all cases, @command{gawk} sets @code{RT} to the input text that matched the
-value specified by @code{RS}.
+If not in compatibility mode (@pxref{Options}), @command{gawk} sets
+@code{RT} to the input text that matched the value specified by @code{RS}.
But if the input file ended without any text that matches @code{RS},
then @command{gawk} sets @code{RT} to the null string.
@c ENDOFRANGE recm
@@ -8012,9 +8026,7 @@ processing on the next record @emph{right now}. For example:
while (j == 0) @{
# get more text
if (getline <= 0) @{
- m = "unexpected EOF or error"
- m = (m ": " ERRNO)
- print m > "/dev/stderr"
+ print("unexpected EOF or error:", ERRNO) > "/dev/stderr"
exit
@}
# build up the line using string concatenation
@@ -8283,7 +8295,7 @@ bletch
@end example
@noindent
-Notice that this program ran the command @command{who} and printed the previous result.
+Notice that this program ran the command @command{who} and printed the result.
(If you try this program yourself, you will of course get different results,
depending upon who is logged in on your system.)
@@ -8308,7 +8320,7 @@ Unfortunately, @command{gawk} has not been consistent in its treatment
of a construct like @samp{@w{"echo "} "date" | getline}.
Most versions, including the current version, treat it at as
@samp{@w{("echo "} "date") | getline}.
-(This how BWK @command{awk} behaves.)
+(This is also how BWK @command{awk} behaves.)
Some versions changed and treated it as
@samp{@w{"echo "} ("date" | getline)}.
(This is how @command{mawk} behaves.)
@@ -8336,7 +8348,7 @@ BEGIN @{
@end example
In this version of @code{getline}, none of the built-in variables are
-changed and the record is not split into fields.
+changed and the record is not split into fields. However, @code{RT} is set.
@ifinfo
@c Thanks to Paul Eggert for initial wording here
@@ -8444,7 +8456,7 @@ causes @command{awk} to set the value of @code{FILENAME}. Normally,
@code{FILENAME} does not have a value inside @code{BEGIN} rules, because you
have not yet started to process the command-line @value{DF}s.
@value{DARKCORNER}
-(@xref{BEGIN/END},
+(See @ref{BEGIN/END};
also @pxref{Auto-set}.)
@item
@@ -8491,7 +8503,7 @@ end of file is encountered, before the element in @code{a} is assigned?
@command{gawk} treats @code{getline} like a function call, and evaluates
the expression @samp{a[++c]} before attempting to read from @file{f}.
However, some versions of @command{awk} only evaluate the expression once they
-know that there is a string value to be assigned. Caveat Emptor.
+know that there is a string value to be assigned.
@end itemize
@node Getline Summary
@@ -8507,15 +8519,15 @@ Note: for each variant, @command{gawk} sets the @code{RT} built-in variable.
@float Table,table-getline-variants
@caption{@code{getline} Variants and What They Set}
@multitable @columnfractions .33 .38 .27
-@headitem Variant @tab Effect @tab Standard / Extension
-@item @code{getline} @tab Sets @code{$0}, @code{NF}, @code{FNR}, @code{NR}, and @code{RT} @tab Standard
-@item @code{getline} @var{var} @tab Sets @var{var}, @code{FNR}, @code{NR}, and @code{RT} @tab Standard
-@item @code{getline <} @var{file} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab Standard
-@item @code{getline @var{var} < @var{file}} @tab Sets @var{var} and @code{RT} @tab Standard
-@item @var{command} @code{| getline} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab Standard
-@item @var{command} @code{| getline} @var{var} @tab Sets @var{var} and @code{RT} @tab Standard
-@item @var{command} @code{|& getline} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab Extension
-@item @var{command} @code{|& getline} @var{var} @tab Sets @var{var} and @code{RT} @tab Extension
+@headitem Variant @tab Effect @tab @command{awk} / @command{gawk}
+@item @code{getline} @tab Sets @code{$0}, @code{NF}, @code{FNR}, @code{NR}, and @code{RT} @tab @command{awk}
+@item @code{getline} @var{var} @tab Sets @var{var}, @code{FNR}, @code{NR}, and @code{RT} @tab @command{awk}
+@item @code{getline <} @var{file} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab @command{awk}
+@item @code{getline @var{var} < @var{file}} @tab Sets @var{var} and @code{RT} @tab @command{awk}
+@item @var{command} @code{| getline} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab @command{awk}
+@item @var{command} @code{| getline} @var{var} @tab Sets @var{var} and @code{RT} @tab @command{awk}
+@item @var{command} @code{|& getline} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab @command{gawk}
+@item @var{command} @code{|& getline} @var{var} @tab Sets @var{var} and @code{RT} @tab @command{gawk}
@end multitable
@end float
@c ENDOFRANGE getl
@@ -8532,7 +8544,7 @@ This @value{SECTION} describes a feature that is specific to @command{gawk}.
You may specify a timeout in milliseconds for reading input from the keyboard,
a pipe, or two-way communication, including TCP/IP sockets. This can be done
on a per input, command or connection basis, by setting a special element
-in the @code{PROCINFO} (@pxref{Auto-set}) array:
+in the @code{PROCINFO} array (@pxref{Auto-set}):
@example
PROCINFO["input_name", "READ_TIMEOUT"] = @var{timeout in milliseconds}
@@ -8564,7 +8576,7 @@ while ((getline < "/dev/stdin") > 0)
@command{gawk} terminates the read operation if input does not
arrive after waiting for the timeout period, returns failure
-and sets the @code{ERRNO} variable to an appropriate string value.
+and sets @code{ERRNO} to an appropriate string value.
A negative or zero value for the timeout is the same as specifying
no timeout at all.
@@ -8671,6 +8683,10 @@ The possibilities are as follows:
@end multitable
@item
+@code{FNR} indicates how many records have been read from the current input file;
+@code{NR} indicates how many records have been read in total.
+
+@item
@command{gawk} sets @code{RT} to the text matched by @code{RS}.
@item
@@ -8681,7 +8697,7 @@ fields there are. The default way to split fields is between whitespace
characters.
@item
-Fields may be referenced using a variable, as in @samp{$NF}. Fields
+Fields may be referenced using a variable, as in @code{$NF}. Fields
may also be assigned values, which causes the value of @code{$0} to be
recomputed when it is later referenced. Assigning to a field with a number
greater than @code{NF} creates the field and rebuilds the record, using
@@ -8691,16 +8707,17 @@ thing. Decrementing @code{NF} throws away fields and rebuilds the record.
@item
Field splitting is more complicated than record splitting.
-@multitable @columnfractions .40 .40 .20
+@multitable @columnfractions .40 .45 .15
@headitem Field separator value @tab Fields are split @dots{} @tab @command{awk} / @command{gawk}
@item @code{FS == " "} @tab On runs of whitespace @tab @command{awk}
@item @code{FS == @var{any single character}} @tab On that character @tab @command{awk}
@item @code{FS == @var{regexp}} @tab On text matching the regexp @tab @command{awk}
@item @code{FS == ""} @tab Each individual character is a separate field @tab @command{gawk}
@item @code{FIELDWIDTHS == @var{list of columns}} @tab Based on character position @tab @command{gawk}
-@item @code{FPAT == @var{regexp}} @tab On text around text matching the regexp @tab @command{gawk}
+@item @code{FPAT == @var{regexp}} @tab On the text surrounding text matching the regexp @tab @command{gawk}
@end multitable
+@item
Using @samp{FS = "\n"} causes the entire record to be a single field
(assuming that newlines separate records).
@@ -8709,11 +8726,11 @@ Using @samp{FS = "\n"} causes the entire record to be a single field
This can also be done using command-line variable assignment.
@item
-@code{PROCINFO["FS"]} can be used to see how fields are being split.
+Use @code{PROCINFO["FS"]} to see how fields are being split.
@item
Use @code{getline} in its various forms to read additional records,
-from the default input stream, from a file, or from a pipe or co-process.
+from the default input stream, from a file, or from a pipe or coprocess.
@item
Use @code{PROCINFO[@var{file}, "READ_TIMEOUT"]} to cause reads to timeout
@@ -8782,6 +8799,7 @@ and discusses the @code{close()} built-in function.
* Printf:: The @code{printf} statement.
* Redirection:: How to redirect output to multiple files and
pipes.
+* Special FD:: Special files for I/O.
* Special Files:: File name interpretation in @command{gawk}.
@command{gawk} allows access to inherited file
descriptors.
@@ -8793,7 +8811,7 @@ and discusses the @code{close()} built-in function.
@node Print
@section The @code{print} Statement
-The @code{print} statement is used for producing output with simple, standardized
+Use the @code{print} statement to produce output with simple, standardized
formatting. You specify only the strings or numbers to print, in a
list separated by commas. They are output, separated by single spaces,
followed by a newline. The statement looks like this:
@@ -8817,7 +8835,7 @@ expression. Numeric values are converted to strings and then printed.
@cindex text, printing
The simple statement @samp{print} with no items is equivalent to
@samp{print $0}: it prints the entire current record. To print a blank
-line, use @samp{print ""}, where @code{""} is the empty string.
+line, use @samp{print ""}.
To print a fixed piece of text, use a string constant, such as
@w{@code{"Don't Panic"}}, as one item. If you forget to use the
double-quote characters, your text is taken as an @command{awk}
@@ -8825,8 +8843,8 @@ expression, and you will probably get an error. Keep in mind that a
space is printed between any two items.
Note that the @code{print} statement is a statement and not an
-expression---you can't use it the pattern part of a pattern-action
-statement, for example.
+expression---you can't use it in the pattern part of a
+@var{pattern}-@var{action} statement, for example.
@node Print Examples
@section @code{print} Statement Examples
@@ -8837,9 +8855,22 @@ newline, the newline is output along with the rest of the string. A
single @code{print} statement can make any number of lines this way.
@cindex newlines, printing
-The following is an example of printing a string that contains embedded newlines
+The following is an example of printing a string that contains embedded
+@ifinfo
+newlines
(the @samp{\n} is an escape sequence, used to represent the newline
character; @pxref{Escape Sequences}):
+@end ifinfo
+@ifhtml
+newlines
+(the @samp{\n} is an escape sequence, used to represent the newline
+character; @pxref{Escape Sequences}):
+@end ifhtml
+@ifnotinfo
+@ifnothtml
+newlines:
+@end ifnothtml
+@end ifnotinfo
@example
$ @kbd{awk 'BEGIN @{ print "line one\nline two\nline three" @}'}
@@ -9019,13 +9050,13 @@ more fully in
@cindexawkfunc{sprintf}
@cindex @code{OFMT} variable
@cindex output, format specifier@comma{} @code{OFMT}
-The built-in variable @code{OFMT} contains the default format specification
+The built-in variable @code{OFMT} contains the format specification
that @code{print} uses with @code{sprintf()} when it wants to convert a
number to a string for printing.
The default value of @code{OFMT} is @code{"%.6g"}.
The way @code{print} prints numbers can be changed
-by supplying different format specifications
-as the value of @code{OFMT}, as shown in the following example:
+by supplying a different format specification
+for the value of @code{OFMT}, as shown in the following example:
@example
$ @kbd{awk 'BEGIN @{}
@@ -9055,9 +9086,7 @@ With @code{printf} you can
specify the width to use for each item, as well as various
formatting choices for numbers (such as what output base to use, whether to
print an exponent, whether to print a sign, and how many digits to print
-after the decimal point). You do this by supplying a string, called
-the @dfn{format string}, that controls how and where to print the other
-arguments.
+after the decimal point).
@menu
* Basic Printf:: Syntax of the @code{printf} statement.
@@ -9077,10 +9106,10 @@ printf @var{format}, @var{item1}, @var{item2}, @dots{}
@end example
@noindent
-The entire list of arguments may optionally be enclosed in parentheses. The
-parentheses are necessary if any of the item expressions use the @samp{>}
-relational operator; otherwise, it can be confused with an output redirection
-(@pxref{Redirection}).
+As print @code{print}, the entire list of arguments may optionally be
+enclosed in parentheses. Here too, the parentheses are necessary if any
+of the item expressions use the @samp{>} relational operator; otherwise,
+it can be confused with an output redirection (@pxref{Redirection}).
@cindex format specifiers
The difference between @code{printf} and @code{print} is the @var{format}
@@ -9103,10 +9132,10 @@ on @code{printf} statements. For example:
@example
$ @kbd{awk 'BEGIN @{}
> @kbd{ORS = "\nOUCH!\n"; OFS = "+"}
-> @kbd{msg = "Dont Panic!"}
+> @kbd{msg = "Don\47t Panic!"}
> @kbd{printf "%s\n", msg}
> @kbd{@}'}
-@print{} Dont Panic!
+@print{} Don't Panic!
@end example
@noindent
@@ -9128,7 +9157,7 @@ the field width. Here is a list of the format-control letters:
@c @asis for docbook to come out right
@table @asis
@item @code{%c}
-Print a number as an ASCII character; thus, @samp{printf "%c",
+Print a number as a character; thus, @samp{printf "%c",
65} outputs the letter @samp{A}. The output for a string value is
the first character of the string.
@@ -9154,7 +9183,7 @@ a single byte (0--255).
@item @code{%d}, @code{%i}
Print a decimal integer.
The two control letters are equivalent.
-(The @samp{%i} specification is for compatibility with ISO C.)
+(The @code{%i} specification is for compatibility with ISO C.)
@item @code{%e}, @code{%E}
Print a number in scientific (exponential) notation;
@@ -9169,7 +9198,7 @@ prints @samp{1.950e+03}, with a total of four significant figures, three of
which follow the decimal point.
(The @samp{4.3} represents two modifiers,
discussed in the next @value{SUBSECTION}.)
-@samp{%E} uses @samp{E} instead of @samp{e} in the output.
+@code{%E} uses @samp{E} instead of @samp{e} in the output.
@item @code{%f}
Print a number in floating-point notation.
@@ -9195,16 +9224,16 @@ The special ``not a number'' value formats as @samp{-nan} or @samp{nan}
(@pxref{Math Definitions}).
@item @code{%F}
-Like @samp{%f} but the infinity and ``not a number'' values are spelled
+Like @code{%f} but the infinity and ``not a number'' values are spelled
using uppercase letters.
-The @samp{%F} format is a POSIX extension to ISO C; not all systems
-support it. On those that don't, @command{gawk} uses @samp{%f} instead.
+The @code{%F} format is a POSIX extension to ISO C; not all systems
+support it. On those that don't, @command{gawk} uses @code{%f} instead.
@item @code{%g}, @code{%G}
Print a number in either scientific notation or in floating-point
notation, whichever uses fewer characters; if the result is printed in
-scientific notation, @samp{%G} uses @samp{E} instead of @samp{e}.
+scientific notation, @code{%G} uses @samp{E} instead of @samp{e}.
@item @code{%o}
Print an unsigned octal integer
@@ -9220,7 +9249,7 @@ are floating-point; it is provided primarily for compatibility with C.)
@item @code{%x}, @code{%X}
Print an unsigned hexadecimal integer;
-@samp{%X} uses the letters @samp{A} through @samp{F}
+@code{%X} uses the letters @samp{A} through @samp{F}
instead of @samp{a} through @samp{f}
(@pxref{Nondecimal-numbers}).
@@ -9235,7 +9264,7 @@ argument and it ignores any modifiers.
@quotation NOTE
When using the integer format-control letters for values that are
outside the range of the widest C integer type, @command{gawk} switches to
-the @samp{%g} format specifier. If @option{--lint} is provided on the
+the @code{%g} format specifier. If @option{--lint} is provided on the
command line (@pxref{Options}), @command{gawk}
warns about this. Other versions of @command{awk} may print invalid
values or do something else entirely.
@@ -9251,7 +9280,7 @@ values or do something else entirely.
A format specification can also include @dfn{modifiers} that can control
how much of the item's value is printed, as well as how much space it gets.
The modifiers come between the @samp{%} and the format-control letter.
-We will use the bullet symbol ``@bullet{}'' in the following examples to
+We use the bullet symbol ``@bullet{}'' in the following examples to
represent
spaces in the output. Here are the possible modifiers, in the order in
which they may appear:
@@ -9282,7 +9311,7 @@ It is in fact a @command{gawk} extension, intended for use in translating
messages at runtime.
@xref{Printf Ordering},
which describes how and why to use positional specifiers.
-For now, we will not use them.
+For now, we ignore them.
@item -
The minus sign, used before the width modifier (see later on in
@@ -9310,15 +9339,15 @@ to format is positive. The @samp{+} overrides the space modifier.
@item #
Use an ``alternate form'' for certain control letters.
-For @samp{%o}, supply a leading zero.
-For @samp{%x} and @samp{%X}, supply a leading @samp{0x} or @samp{0X} for
+For @code{%o}, supply a leading zero.
+For @code{%x} and @code{%X}, supply a leading @code{0x} or @samp{0X} for
a nonzero result.
-For @samp{%e}, @samp{%E}, @samp{%f}, and @samp{%F}, the result always
+For @code{%e}, @code{%E}, @code{%f}, and @code{%F}, the result always
contains a decimal point.
-For @samp{%g} and @samp{%G}, trailing zeros are not removed from the result.
+For @code{%g} and @code{%G}, trailing zeros are not removed from the result.
@item 0
-A leading @samp{0} (zero) acts as a flag that indicates that output should be
+A leading @samp{0} (zero) acts as a flag indicating that output should be
padded with zeros instead of spaces.
This applies only to the numeric output formats.
This flag only has an effect when the field width is wider than the
@@ -9504,7 +9533,7 @@ the @command{awk} program:
@example
awk 'BEGIN @{ print "Name Number"
print "---- ------" @}
- @{ printf "%-10s %s\n", $1, $2 @}' mail-list
+ @{ printf "%-10s %s\n", $1, $2 @}' mail-list
@end example
The above example mixes @code{print} and @code{printf} statements in
@@ -9514,7 +9543,7 @@ same results:
@example
awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number"
printf "%-10s %s\n", "----", "------" @}
- @{ printf "%-10s %s\n", $1, $2 @}' mail-list
+ @{ printf "%-10s %s\n", $1, $2 @}' mail-list
@end example
@noindent
@@ -9529,7 +9558,7 @@ emphasized by storing it in a variable, like this:
awk 'BEGIN @{ format = "%-10s %s\n"
printf format, "Name", "Number"
printf format, "----", "------" @}
- @{ printf format, $1, $2 @}' mail-list
+ @{ printf format, $1, $2 @}' mail-list
@end example
@c ENDOFRANGE printfs
@@ -9550,7 +9579,7 @@ This is called @dfn{redirection}.
@quotation NOTE
When @option{--sandbox} is specified (@pxref{Options}),
-redirecting output to files and pipes is disabled.
+redirecting output to files, pipes and coprocesses is disabled.
@end quotation
A redirection appears after the @code{print} or @code{printf} statement.
@@ -9647,17 +9676,11 @@ in an @command{awk} script run periodically for system maintenance:
@example
report = "mail bug-system"
-print "Awk script failed:", $0 | report
-m = ("at record number " FNR " of " FILENAME)
-print m | report
+print("Awk script failed:", $0) | report
+print("at record number", FNR, "of", FILENAME) | report
close(report)
@end example
-The message is built using string concatenation and saved in the variable
-@code{m}. It's then sent down the pipeline to the @command{mail} program.
-(The parentheses group the items to concatenate---see
-@ref{Concatenation}.)
-
The @code{close()} function is called here because it's a good idea to close
the pipe as soon as all the intended output has been sent to it.
@xref{Close Files And Pipes},
@@ -9800,23 +9823,8 @@ It then sends the list to the shell for execution.
@c ENDOFRANGE outre
@c ENDOFRANGE reout
-@node Special Files
-@section Special @value{FFN}s in @command{gawk}
-@c STARTOFRANGE gfn
-@cindex @command{gawk}, file names in
-
-@command{gawk} provides a number of special @value{FN}s that it interprets
-internally. These @value{FN}s provide access to standard file descriptors
-and TCP/IP networking.
-
-@menu
-* Special FD:: Special files for I/O.
-* Special Network:: Special files for network communications.
-* Special Caveats:: Things to watch out for.
-@end menu
-
@node Special FD
-@subsection Special Files for Standard Descriptors
+@section Special Files for Standard Pre-Opened Data Streams
@cindex standard input
@cindex input, standard
@cindex standard output
@@ -9827,9 +9835,12 @@ and TCP/IP networking.
@cindex files, descriptors, See file descriptors
Running programs conventionally have three input and output streams
-already available to them for reading and writing. These are known as
-the @dfn{standard input}, @dfn{standard output}, and @dfn{standard error
-output}. These streams are, by default, connected to your keyboard and screen, but
+already available to them for reading and writing. These are known
+as the @dfn{standard input}, @dfn{standard output}, and @dfn{standard
+error output}. These open streams (and any other open file or pipe)
+are often referred to by the technical term @dfn{file descriptors}.
+
+These streams are, by default, connected to your keyboard and screen, but
they are often redirected with the shell, via the @samp{<}, @samp{<<},
@samp{>}, @samp{>>}, @samp{>&}, and @samp{|} operators. Standard error
is typically used for writing error messages; the reason there are two separate
@@ -9838,7 +9849,7 @@ redirected separately.
@cindex differences in @command{awk} and @command{gawk}, error messages
@cindex error handling
-In other implementations of @command{awk}, the only way to write an error
+In traditional implementations of @command{awk}, the only way to write an error
message to standard error in an @command{awk} program is as follows:
@example
@@ -9864,19 +9875,19 @@ that is connected to your keyboard and screen. It represents the
``terminal,''@footnote{The ``tty'' in @file{/dev/tty} stands for
``Teletype,'' a serial terminal.} which on modern systems is a keyboard
and screen, not a serial console.)
-This usually has the same effect but not always: although the
+This generally has the same effect but not always: although the
standard error stream is usually the screen, it can be redirected; when
that happens, writing to the screen is not correct. In fact, if
@command{awk} is run from a background job, it may not have a
terminal at all.
Then opening @file{/dev/tty} fails.
-@command{gawk} provides special @value{FN}s for accessing the three standard
-streams. @value{COMMONEXT} It also provides syntax for accessing
-any other inherited open files. If the @value{FN} matches
-one of these special names when @command{gawk} redirects input or output,
-then it directly uses the stream that the @value{FN} stands for.
-These special @value{FN}s work for all operating systems that @command{gawk}
+@command{gawk}, BWK @command{awk} and @command{mawk} provide
+special @value{FN}s for accessing the three standard streams.
+If the @value{FN} matches one of these special names when @command{gawk}
+(or one of the others) redirects input or output, then it directly uses
+the descriptor that the @value{FN} stands for. These special
+@value{FN}s work for all operating systems that @command{gawk}
has been ported to, not just those that are POSIX-compliant:
@cindex common extensions, @code{/dev/stdin} special file
@@ -9898,19 +9909,10 @@ The standard output (file descriptor 1).
@item /dev/stderr
The standard error output (file descriptor 2).
-
-@item /dev/fd/@var{N}
-The file associated with file descriptor @var{N}. Such a file must
-be opened by the program initiating the @command{awk} execution (typically
-the shell). Unless special pains are taken in the shell from which
-@command{gawk} is invoked, only descriptors 0, 1, and 2 are available.
@end table
-The @value{FN}s @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
-are aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and @file{/dev/fd/2},
-respectively. However, they are more self-explanatory.
-The proper way to write an error message in a @command{gawk} program
-is to use @file{/dev/stderr}, like this:
+With these facilities,
+the proper way to write an error message then becomes:
@example
print "Serious error detected!" > "/dev/stderr"
@@ -9922,14 +9924,51 @@ Like any other redirection, the value must be a string.
It is a common error to omit the quotes, which leads
to confusing results.
-Finally, using the @code{close()} function on a @value{FN} of the
+@command{gawk} does not treat these @value{FN}s as special when
+in POSIX compatibility mode. However, since BWK @command{awk}
+supports them, @command{gawk} does support them even when
+invoked with the @option{--traditional} option (@pxref{Options}).
+
+@node Special Files
+@section Special @value{FFN}s in @command{gawk}
+@c STARTOFRANGE gfn
+@cindex @command{gawk}, file names in
+
+Besides access to standard input, stanard output, and standard error,
+@command{gawk} provides access to any open file descriptor.
+Additionally, there are special @value{FN}s reserved for
+TCP/IP networking.
+
+@menu
+* Other Inherited Files:: Accessing other open files with
+ @command{gawk}.
+* Special Network:: Special files for network communications.
+* Special Caveats:: Things to watch out for.
+@end menu
+
+@node Other Inherited Files
+@subsection Accessing Other Open Files With @command{gawk}
+
+Besides the @code{/dev/stdin}, @code{/dev/stdout}, and @code{/dev/stderr}
+special @value{FN}s mentioned earlier, @command{gawk} provides syntax
+for accessing any other inherited open file:
+
+@table @file
+@item /dev/fd/@var{N}
+The file associated with file descriptor @var{N}. Such a file must
+be opened by the program initiating the @command{awk} execution (typically
+the shell). Unless special pains are taken in the shell from which
+@command{gawk} is invoked, only descriptors 0, 1, and 2 are available.
+@end table
+
+The @value{FN}s @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
+are essentially aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and
+@file{/dev/fd/2}, respectively. However, those names are more self-explanatory.
+
+Note that using @code{close()} on a @value{FN} of the
form @code{"/dev/fd/@var{N}"}, for file descriptor numbers
above two, does actually close the given file descriptor.
-The @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
-special files are also recognized internally by several other
-versions of @command{awk}.
-
@node Special Network
@subsection Special Files for Network Communications
@cindex networks, support for
@@ -9958,15 +9997,20 @@ Full discussion is delayed until
@node Special Caveats
@subsection Special @value{FFN} Caveats
-Here is a list of things to bear in mind when using the
+Here are some things to bear in mind when using the
special @value{FN}s that @command{gawk} provides:
@itemize @value{BULLET}
@cindex compatibility mode (@command{gawk}), file names
@cindex file names, in compatibility mode
@item
-Recognition of these special @value{FN}s is disabled if @command{gawk} is in
-compatibility mode (@pxref{Options}).
+Recognition of the @value{FN}s for the three standard pre-opened
+files is disabled only in POSIX mode.
+
+@item
+Recognition of the other special @value{FN}s is disabled if @command{gawk} is in
+compatibility mode (either @option{--traditional} or @option{--posix};
+@pxref{Options}).
@item
@command{gawk} @emph{always}
@@ -10136,7 +10180,8 @@ to a string indicating the error.
Note also that @samp{close(FILENAME)} has no ``magic'' effects on the
implicit loop that reads through the files named on the command line.
It is, more likely, a close of a file that was never opened with a
-redirection, so @command{awk} silently does nothing.
+redirection, so @command{awk} silently does nothing, except return
+a negative value.
@cindex @code{|} (vertical bar), @code{|&} operator (I/O), pipes@comma{} closing
When using the @samp{|&} operator to communicate with a coprocess,
@@ -10148,10 +10193,10 @@ the first argument is the name of the command or special file used
to start the coprocess.
The second argument should be a string, with either of the values
@code{"to"} or @code{"from"}. Case does not matter.
-As this is an advanced feature, a more complete discussion is
+As this is an advanced feature, discussion is
delayed until
@ref{Two-way I/O},
-which discusses it in more detail and gives an example.
+which describes it in more detail and gives an example.
@cindex sidebar, Using @code{close()}'s Return Value
@ifdocbook
@@ -10285,15 +10330,15 @@ that modify the behavior of the format control letters.
@item
Output from both @code{print} and @code{printf} may be redirected to
-files, pipes, and co-processes.
+files, pipes, and coprocesses.
@item
@command{gawk} provides special file names for access to standard input,
output and error, and for network communications.
@item
-Use @code{close()} to close open file, pipe and co-process redirections.
-For co-processes, it is possible to close only one direction of the
+Use @code{close()} to close open file, pipe and coprocess redirections.
+For coprocesses, it is possible to close only one direction of the
communications.
@end itemize
@@ -10607,7 +10652,7 @@ if (/barfly/ || /camelot/)
@noindent
are exactly equivalent.
One rather bizarre consequence of this rule is that the following
-Boolean expression is valid, but does not do what the user probably
+Boolean expression is valid, but does not do what its author probably
intended:
@example
@@ -10653,10 +10698,9 @@ Modern implementations of @command{awk}, including @command{gawk}, allow
the third argument of @code{split()} to be a regexp constant, but some
older implementations do not.
@value{DARKCORNER}
-This can lead to confusion when attempting to use regexp constants
-as arguments to user-defined functions
-(@pxref{User-defined}).
-For example:
+Because some built-in functions accept regexp constants as arguments,
+it can be confusing when attempting to use regexp constants as arguments
+to user-defined functions (@pxref{User-defined}). For example:
@example
function mysub(pat, repl, str, global)
@@ -10724,8 +10768,8 @@ variable's current value. Variables are given new values with
@dfn{decrement operators}.
@xref{Assignment Ops}.
In addition, the @code{sub()} and @code{gsub()} functions can
-change a variable's value, and the @code{match()}, @code{patsplit()}
-and @code{split()} functions can change the contents of their
+change a variable's value, and the @code{match()}, @code{split()}
+and @code{patsplit()} functions can change the contents of their
array parameters. @xref{String Functions}.
@cindex variables, built-in
@@ -10741,7 +10785,7 @@ Variables in @command{awk} can be assigned either numeric or string values.
The kind of value a variable holds can change over the life of a program.
By default, variables are initialized to the empty string, which
is zero if converted to a number. There is no need to explicitly
-``initialize'' a variable in @command{awk},
+initialize a variable in @command{awk},
which is what you would do in C and in most other traditional languages.
@node Assignment Options
@@ -10978,7 +11022,7 @@ $ @kbd{echo 4,321 | LC_ALL=en_DK.utf-8 gawk '@{ print $1 + 1 @}'}
@noindent
The @code{en_DK.utf-8} locale is for English in Denmark, where the comma acts as
the decimal point separator. In the normal @code{"C"} locale, @command{gawk}
-treats @samp{4,321} as @samp{4}, while in the Danish locale, it's treated
+treats @samp{4,321} as 4, while in the Danish locale, it's treated
as the full number, 4.321.
Some earlier versions of @command{gawk} fully complied with this aspect
@@ -11535,7 +11579,7 @@ awk '/[=]=/' /dev/null
@end example
@command{gawk} does not have this problem; BWK @command{awk}
-and @command{mawk} also do not (@pxref{Other Versions}).
+and @command{mawk} also do not.
@docbook
</sidebar>
@@ -11581,7 +11625,7 @@ awk '/[=]=/' /dev/null
@end example
@command{gawk} does not have this problem; BWK @command{awk}
-and @command{mawk} also do not (@pxref{Other Versions}).
+and @command{mawk} also do not.
@end cartouche
@end ifnotdocbook
@c ENDOFRANGE exas
@@ -11893,7 +11937,7 @@ attribute.
@item
Fields, @code{getline} input, @code{FILENAME}, @code{ARGV} elements,
@code{ENVIRON} elements, and the elements of an array created by
-@code{patsplit()}, @code{split()} and @code{match()} that are numeric
+@code{match()}, @code{split()} and @code{patsplit()} that are numeric
strings have the @var{strnum} attribute. Otherwise, they have
the @var{string} attribute. Uninitialized variables also have the
@var{strnum} attribute.
@@ -12048,22 +12092,23 @@ Thus, the six-character input string @w{@samp{ +3.14}} receives the
The following examples print @samp{1} when the comparison between
the two different constants is true, @samp{0} otherwise:
+@c 22.9.2014: Tested with mawk and BWK awk, got same results.
@example
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == " +3.14" @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == " +3.14") @}'} @ii{True}
@print{} 1
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == "+3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == "+3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == "3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == "3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == 3.14 @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == 3.14) @}'} @ii{True}
@print{} 1
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == " +3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == " +3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == "+3.14" @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == "+3.14") @}'} @ii{True}
@print{} 1
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == "3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == "3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == 3.14 @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == 3.14) @}'} @ii{True}
@print{} 1
@end example
@@ -12137,9 +12182,8 @@ part of the test always succeeds. Because the operators are
so similar, this kind of error is very difficult to spot when
scanning the source code.
-@cindex @command{gawk}, comparison operators and
-The following table of expressions illustrates the kind of comparison
-@command{gawk} performs, as well as what the result of the comparison is:
+The following list of expressions illustrates the kinds of comparisons
+@command{awk} performs, as well as what the result of each comparison is:
@table @code
@item 1.5 <= 2.0
@@ -12212,7 +12256,7 @@ dynamic regexp (@pxref{Regexp Usage}; also
@cindex @command{awk}, regexp constants and
@cindex regexp constants
-In modern implementations of @command{awk}, a constant regular
+A constant regular
expression in slashes by itself is also an expression. The regexp
@code{/@var{regexp}/} is an abbreviation for the following comparison expression:
@@ -12232,7 +12276,7 @@ where this is discussed in more detail.
The POSIX standard says that string comparison is performed based
on the locale's @dfn{collating order}. This is the order in which
characters sort, as defined by the locale (for more discussion,
-@pxref{Ranges and Locales}). This order is usually very different
+@pxref{Locales}). This order is usually very different
from the results obtained when doing straight character-by-character
comparison.@footnote{Technically, string comparison is supposed
to behave the same way as if the strings are compared with the C
@@ -12312,7 +12356,7 @@ no substring @samp{foo} in the record.
True if at least one of @var{boolean1} or @var{boolean2} is true.
For example, the following statement prints all records in the input
that contain @emph{either} @samp{edu} or
-@samp{li} or both:
+@samp{li}:
@example
if ($0 ~ /edu/ || $0 ~ /li/) print
@@ -12321,6 +12365,9 @@ if ($0 ~ /edu/ || $0 ~ /li/) print
The subexpression @var{boolean2} is evaluated only if @var{boolean1}
is false. This can make a difference when @var{boolean2} contains
expressions that have side effects.
+(Thus, this test never really distinguishes records that contain both
+@samp{edu} and @samp{li}---as soon as @samp{edu} is matched,
+the full test succeeds.)
@item ! @var{boolean}
True if @var{boolean} is false. For example,
@@ -12330,7 +12377,7 @@ variable is not defined:
@example
BEGIN @{ if (! ("HOME" in ENVIRON))
- print "no home!" @}
+ print "no home!" @}
@end example
(The @code{in} operator is described in
@@ -12629,7 +12676,7 @@ expression because the first @samp{$} has higher precedence than the
@samp{++}; to avoid the problem the expression can be rewritten as
@samp{$($0++)--}.
-This table presents @command{awk}'s operators, in order of highest
+This list presents @command{awk}'s operators, in order of highest
to lowest precedence:
@c @asis for docbook to come out right
@@ -12786,8 +12833,8 @@ system about the local character set and language. The ISO C standard
defines a default @code{"C"} locale, which is an environment that is
typical of what many C programmers are used to.
-Once upon a time, the locale setting used to affect regexp matching
-(@pxref{Ranges and Locales}), but this is no longer true.
+Once upon a time, the locale setting used to affect regexp matching,
+but this is no longer true (@pxref{Ranges and Locales}).
Locales can affect record splitting. For the normal case of @samp{RS =
"\n"}, the locale is largely irrelevant. For other single-character
@@ -12799,7 +12846,7 @@ character}, to find the record terminator.
Locales can affect how dates and times are formatted (@pxref{Time
Functions}). For example, a common way to abbreviate the date September
4, 2015 in the United States is ``9/4/15.'' In many countries in
-Europe, however, it is abbreviated ``4.9.15.'' Thus, the @samp{%x}
+Europe, however, it is abbreviated ``4.9.15.'' Thus, the @code{%x}
specification in a @code{"US"} locale might produce @samp{9/4/15},
while in a @code{"EUROPE"} locale, it might produce @samp{4.9.15}.
@@ -12841,7 +12888,8 @@ Locales can influence the conversions.
@item
@command{awk} provides the usual arithmetic operators (addition,
subtraction, multiplication, division, modulus), and unary plus and minus.
-It also provides comparison operators, boolean operators, and regexp
+It also provides comparison operators, boolean operators, array membership
+testing, and regexp
matching operators. String concatenation is accomplished by placing
two expressions next to each other; there is no explicit operator.
The three-operand @samp{?:} operator provides an ``if-else'' test within
@@ -12856,7 +12904,7 @@ In @command{awk}, a value is considered to be true if it is non-zero
@emph{or} non-null. Otherwise, the value is false.
@item
-A value's type is set upon each assignment and may change over its
+A variable's type is set upon each assignment and may change over its
lifetime. The type determines how it behaves in comparisons (string
or numeric).
@@ -12936,7 +12984,7 @@ is nonzero (if a number) or non-null (if a string).
(@xref{Expression Patterns}.)
@item @var{begpat}, @var{endpat}
-A pair of patterns separated by a comma, specifying a range of records.
+A pair of patterns separated by a comma, specifying a @dfn{range} of records.
The range includes both the initial record that matches @var{begpat} and
the final record that matches @var{endpat}.
(@xref{Ranges}.)
@@ -13026,8 +13074,8 @@ $ @kbd{awk '$1 ~ /li/ @{ print $2 @}' mail-list}
@cindex regexp constants, as patterns
@cindex patterns, regexp constants as
A regexp constant as a pattern is also a special case of an expression
-pattern. The expression @code{/li/} has the value one if @samp{li}
-appears in the current input record. Thus, as a pattern, @code{/li/}
+pattern. The expression @samp{/li/} has the value one if @samp{li}
+appears in the current input record. Thus, as a pattern, @samp{/li/}
matches any record containing @samp{li}.
@cindex Boolean expressions, as patterns
@@ -13209,7 +13257,7 @@ input is read. For example:
@example
$ @kbd{awk '}
> @kbd{BEGIN @{ print "Analysis of \"li\"" @}}
-> @kbd{/li/ @{ ++n @}}
+> @kbd{/li/ @{ ++n @}}
> @kbd{END @{ print "\"li\" appears in", n, "records." @}' mail-list}
@print{} Analysis of "li"
@print{} "li" appears in 4 records.
@@ -13289,9 +13337,10 @@ The POSIX standard specifies that @code{NF} is available in an @code{END}
rule. It contains the number of fields from the last input record.
Most probably due to an oversight, the standard does not say that @code{$0}
is also preserved, although logically one would think that it should be.
-In fact, @command{gawk} does preserve the value of @code{$0} for use in
-@code{END} rules. Be aware, however, that BWK @command{awk}, and possibly
-other implementations, do not.
+In fact, all of BWK @command{awk}, @command{mawk}, and @command{gawk}
+preserve the value of @code{$0} for use in @code{END} rules. Be aware,
+however, that some other implementations and many older versions
+of Unix @command{awk} do not.
The third point follows from the first two. The meaning of @samp{print}
inside a @code{BEGIN} or @code{END} rule is the same as always:
@@ -13386,8 +13435,8 @@ level of the @command{awk} program.
@cindex @code{next} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
The @code{next} statement (@pxref{Next Statement}) is not allowed inside
-either a @code{BEGINFILE} or and @code{ENDFILE} rule. The @code{nextfile}
-statement (@pxref{Nextfile Statement}) is allowed only inside a
+either a @code{BEGINFILE} or an @code{ENDFILE} rule. The @code{nextfile}
+statement is allowed only inside a
@code{BEGINFILE} rule, but not inside an @code{ENDFILE} rule.
@cindex @code{getline} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
@@ -13451,7 +13500,7 @@ There are two ways to get the value of the shell variable
into the body of the @command{awk} program.
@cindex shells, quoting
-The most common method is to use shell quoting to substitute
+A common method is to use shell quoting to substitute
the variable's value into the program inside the script.
For example, consider the following program:
@@ -13708,20 +13757,21 @@ If the @var{condition} is true, it executes the statement @var{body}.
is not zero and not a null string.)
@end ifinfo
After @var{body} has been executed,
-@var{condition} is tested again, and if it is still true, @var{body} is
-executed again. This process repeats until the @var{condition} is no longer
-true. If the @var{condition} is initially false, the body of the loop is
-never executed and @command{awk} continues with the statement following
+@var{condition} is tested again, and if it is still true, @var{body}
+executes again. This process repeats until the @var{condition} is no longer
+true. If the @var{condition} is initially false, the body of the loop
+never executes and @command{awk} continues with the statement following
the loop.
This example prints the first three fields of each record, one per line:
@example
-awk '@{
- i = 1
- while (i <= 3) @{
- print $i
- i++
- @}
+awk '
+@{
+ i = 1
+ while (i <= 3) @{
+ print $i
+ i++
+ @}
@}' inventory-shipped
@end example
@@ -13755,14 +13805,14 @@ do
while (@var{condition})
@end example
-Even if the @var{condition} is false at the start, the @var{body} is
-executed at least once (and only once, unless executing @var{body}
+Even if the @var{condition} is false at the start, the @var{body}
+executes at least once (and only once, unless executing @var{body}
makes @var{condition} true). Contrast this with the corresponding
@code{while} statement:
@example
while (@var{condition})
- @var{body}
+ @var{body}
@end example
@noindent
@@ -13772,11 +13822,11 @@ The following is an example of a @code{do} statement:
@example
@{
- i = 1
- do @{
- print $0
- i++
- @} while (i <= 10)
+ i = 1
+ do @{
+ print $0
+ i++
+ @} while (i <= 10)
@}
@end example
@@ -13813,9 +13863,10 @@ compares it against the desired number of iterations.
For example:
@example
-awk '@{
- for (i = 1; i <= 3; i++)
- print $i
+awk '
+@{
+ for (i = 1; i <= 3; i++)
+ print $i
@}' inventory-shipped
@end example
@@ -13843,7 +13894,7 @@ between 1 and 100:
@example
for (i = 1; i <= 100; i *= 2)
- print i
+ print i
@end example
If there is nothing to be done, any of the three expressions in the
@@ -14163,7 +14214,7 @@ The @code{next} statement is not allowed inside @code{BEGINFILE} and
@cindex functions, user-defined, @code{next}/@code{nextfile} statements and
According to the POSIX standard, the behavior is undefined if the
@code{next} statement is used in a @code{BEGIN} or @code{END} rule.
-@command{gawk} treats it as a syntax error. Although POSIX permits it,
+@command{gawk} treats it as a syntax error. Although POSIX does not disallow it,
most other @command{awk} implementations don't allow the @code{next}
statement inside function bodies (@pxref{User-defined}). Just as with any
other @code{next} statement, a @code{next} statement inside a function
@@ -14218,7 +14269,7 @@ opened with redirections. It is not related to the main processing that
@quotation NOTE
For many years, @code{nextfile} was a
-@command{gawk} extension. As of September, 2012, it was accepted for
+common extension. In September, 2012, it was accepted for
inclusion into the POSIX standard.
See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
@end quotation
@@ -14227,8 +14278,8 @@ See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
@cindex @code{nextfile} statement, user-defined functions and
@cindex Brian Kernighan's @command{awk}
@cindex @command{mawk} utility
-The current version of BWK @command{awk}, and @command{mawk} (@pxref{Other
-Versions}) also support @code{nextfile}. However, they don't allow the
+The current version of BWK @command{awk}, and @command{mawk}
+also support @code{nextfile}. However, they don't allow the
@code{nextfile} statement inside function bodies (@pxref{User-defined}).
@command{gawk} does; a @code{nextfile} inside a function body reads the
next record and starts processing it with the first rule in the program,
@@ -14260,8 +14311,8 @@ the program to stop immediately.
An @code{exit} statement that is not part of a @code{BEGIN} or @code{END}
rule stops the execution of any further automatic rules for the current
record, skips reading any remaining input records, and executes the
-@code{END} rule if there is one.
-Any @code{ENDFILE} rules are also skipped; they are not executed.
+@code{END} rule if there is one. @command{gawk} also skips
+any @code{ENDFILE} rules; they do not execute.
In such a case,
if you don't want the @code{END} rule to do its job, set a variable
@@ -14369,7 +14420,7 @@ respectively, should use binary I/O. A string value of @code{"rw"} or
@code{"wr"} indicates that all files should use binary I/O. Any other
string value is treated the same as @code{"rw"}, but causes @command{gawk}
to generate a warning message. @code{BINMODE} is described in more
-detail in @ref{PC Using}. @command{mawk} @pxref{Other Versions}),
+detail in @ref{PC Using}. @command{mawk} (@pxref{Other Versions}),
also supports this variable, but only using numeric values.
@cindex @code{CONVFMT} variable
@@ -14496,7 +14547,7 @@ printing with the @code{print} statement. It works by being passed
as the first argument to the @code{sprintf()} function
(@pxref{String Functions}).
Its default value is @code{"%.6g"}. Earlier versions of @command{awk}
-also used @code{OFMT} to specify the format for converting numbers to
+used @code{OFMT} to specify the format for converting numbers to
strings in general expressions; this is now done by @code{CONVFMT}.
@cindex @code{sprintf()} function, @code{OFMT} variable and
@@ -14648,8 +14699,8 @@ successive instances of the same @value{FN} on the command line.
@cindex file names, distinguishing
While you can change the value of @code{ARGIND} within your @command{awk}
-program, @command{gawk} automatically sets it to a new value when the
-next file is opened.
+program, @command{gawk} automatically sets it to a new value when it
+opens the next file.
@cindex @code{ENVIRON} array
@cindex environment variables, in @code{ENVIRON} array
@@ -14714,10 +14765,10 @@ can give @code{FILENAME} a value.
@cindex @code{FNR} variable
@item @code{FNR}
-The current record number in the current file. @code{FNR} is
-incremented each time a new record is read
-(@pxref{Records}). It is reinitialized
-to zero each time a new input file is started.
+The current record number in the current file. @command{awk} increments
+@code{FNR} each time it reads a new record (@pxref{Records}).
+@command{awk} resets @code{FNR} to zero each time it starts a new
+input file.
@cindex @code{NF} variable
@item @code{NF}
@@ -14749,7 +14800,7 @@ array causes a fatal error. Any attempt to assign to an element of
The number of input records @command{awk} has processed since
the beginning of the program's execution
(@pxref{Records}).
-@code{NR} is incremented each time a new record is read.
+@command{awk} increments @code{NR} each time it reads a new record.
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array
@@ -14829,7 +14880,7 @@ The parent process ID of the current process.
@item PROCINFO["sorted_in"]
If this element exists in @code{PROCINFO}, its value controls the
order in which array indices will be processed by
-@samp{for (@var{index} in @var{array})} loops.
+@samp{for (@var{indx} in @var{array})} loops.
Since this is an advanced feature, we defer the
full description until later; see
@ref{Scanning an Array}.
@@ -14850,7 +14901,7 @@ The version of @command{gawk}.
The following additional elements in the array
are available to provide information about the MPFR and GMP libraries
-if your version of @command{gawk} supports arbitrary precision numbers
+if your version of @command{gawk} supports arbitrary precision arithmetic
(@pxref{Arbitrary Precision Arithmetic}):
@table @code
@@ -14899,14 +14950,14 @@ The @code{PROCINFO} array has the following additional uses:
@itemize @value{BULLET}
@item
-It may be used to cause coprocesses to communicate over pseudo-ttys
-instead of through two-way pipes; this is discussed further in
-@ref{Two-way I/O}.
-
-@item
It may be used to provide a timeout when reading from any
open input file, pipe, or coprocess.
@xref{Read Timeout}, for more information.
+
+@item
+It may be used to cause coprocesses to communicate over pseudo-ttys
+instead of through two-way pipes; this is discussed further in
+@ref{Two-way I/O}.
@end itemize
@cindex @code{RLENGTH} variable
@@ -15194,6 +15245,12 @@ following @option{-v} are passed on to the @command{awk} program.
(@xref{Getopt Function}, for an @command{awk} library function that
parses command-line options.)
+When designing your program, you should choose options that don't
+conflict with @command{gawk}'s, since it will process any options
+that it accepts before passing the rest of the command line on to
+your program. Using @samp{#!} with the @option{-E} option may help
+(@pxref{Executable Scripts}, and @pxref{Options}).
+
@node Pattern Action Summary
@section Summary
@@ -15228,7 +15285,7 @@ input and output statements, and deletion statements.
The control statements in @command{awk} are @code{if}-@code{else},
@code{while}, @code{for}, and @code{do}-@code{while}. @command{gawk}
adds the @code{switch} statement. There are two flavors of @code{for}
-statement: one for for performing general looping, and the other iterating
+statement: one for performing general looping, and the other for iterating
through an array.
@item
@@ -15245,12 +15302,17 @@ The @code{exit} statement terminates your program. When executed
from an action (or function body) it transfers control to the
@code{END} statements. From an @code{END} statement body, it exits
immediately. You may pass an optional numeric value to be used
-at @command{awk}'s exit status.
+as @command{awk}'s exit status.
@item
Some built-in variables provide control over @command{awk}, mainly for I/O.
Other variables convey information from @command{awk} to your program.
+@item
+@code{ARGC} and @code{ARGV} make the command-line arguments available
+to your program. Manipulating them from a @code{BEGIN} rule lets you
+control how @command{awk} will process the provided @value{DF}s.
+
@end itemize
@node Arrays
@@ -15271,24 +15333,13 @@ The @value{CHAPTER} moves on to discuss @command{gawk}'s facility
for sorting arrays, and ends with a brief description of @command{gawk}'s
ability to support true arrays of arrays.
-@cindex variables, names of
-@cindex functions, names of
-@cindex arrays, names of, and names of functions/variables
-@cindex names, arrays/variables
-@cindex namespace issues
-@command{awk} maintains a single set
-of names that may be used for naming variables, arrays, and functions
-(@pxref{User-defined}).
-Thus, you cannot have a variable and an array with the same name in the
-same @command{awk} program.
-
@menu
* Array Basics:: The basics of arrays.
-* Delete:: The @code{delete} statement removes an element
- from an array.
* Numeric Array Subscripts:: How to use numbers as subscripts in
@command{awk}.
* Uninitialized Subscripts:: Using Uninitialized variables as subscripts.
+* Delete:: The @code{delete} statement removes an element
+ from an array.
* Multidimensional:: Emulating multidimensional arrays in
@command{awk}.
* Arrays of Arrays:: True multidimensional arrays.
@@ -15716,14 +15767,14 @@ begin with a number:
@example
@c file eg/misc/arraymax.awk
@{
- if ($1 > max)
- max = $1
- arr[$1] = $0
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
@}
END @{
- for (x = 1; x <= max; x++)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ print arr[x]
@}
@c endfile
@end example
@@ -15763,9 +15814,9 @@ program's @code{END} rule, as follows:
@example
END @{
- for (x = 1; x <= max; x++)
- if (x in arr)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ if (x in arr)
+ print arr[x]
@}
@end example
@@ -15787,7 +15838,7 @@ an array:
@example
for (@var{var} in @var{array})
- @var{body}
+ @var{body}
@end example
@noindent
@@ -15860,7 +15911,7 @@ BEGIN @{
@}
@end example
-Here is what happens when run with @command{gawk}:
+Here is what happens when run with @command{gawk} (and @command{mawk}):
@example
$ @kbd{gawk -f loopcheck.awk}
@@ -15978,7 +16029,8 @@ does not affect the loop.
For example:
@example
-$ @kbd{gawk 'BEGIN @{}
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
> @kbd{ a[4] = 4}
> @kbd{ a[3] = 3}
> @kbd{ for (i in a)}
@@ -15986,7 +16038,8 @@ $ @kbd{gawk 'BEGIN @{}
> @kbd{@}'}
@print{} 4 4
@print{} 3 3
-$ @kbd{gawk 'BEGIN @{}
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
> @kbd{ PROCINFO["sorted_in"] = "@@ind_str_asc"}
> @kbd{ a[4] = 4}
> @kbd{ a[3] = 3}
@@ -16035,118 +16088,6 @@ the @code{delete} statement.
In addition, @command{gawk} provides built-in functions for
sorting arrays; see @ref{Array Sorting Functions}.
-@node Delete
-@section The @code{delete} Statement
-@cindex @code{delete} statement
-@cindex deleting elements in arrays
-@cindex arrays, elements, deleting
-@cindex elements in arrays, deleting
-
-To remove an individual element of an array, use the @code{delete}
-statement:
-
-@example
-delete @var{array}[@var{index-expression}]
-@end example
-
-Once an array element has been deleted, any value the element once
-had is no longer available. It is as if the element had never
-been referred to or been given a value.
-The following is an example of deleting elements in an array:
-
-@example
-for (i in frequencies)
- delete frequencies[i]
-@end example
-
-@noindent
-This example removes all the elements from the array @code{frequencies}.
-Once an element is deleted, a subsequent @code{for} statement to scan the array
-does not report that element and the @code{in} operator to check for
-the presence of that element returns zero (i.e., false):
-
-@example
-delete foo[4]
-if (4 in foo)
- print "This will never be printed"
-@end example
-
-@cindex null strings, and deleting array elements
-It is important to note that deleting an element is @emph{not} the
-same as assigning it a null value (the empty string, @code{""}).
-For example:
-
-@example
-foo[4] = ""
-if (4 in foo)
- print "This is printed, even though foo[4] is empty"
-@end example
-
-@cindex lint checking, array elements
-It is not an error to delete an element that does not exist.
-However, if @option{--lint} is provided on the command line
-(@pxref{Options}),
-@command{gawk} issues a warning message when an element that
-is not in the array is deleted.
-
-@cindex common extensions, @code{delete} to delete entire arrays
-@cindex extensions, common@comma{} @code{delete} to delete entire arrays
-@cindex arrays, deleting entire contents
-@cindex deleting entire arrays
-@cindex @code{delete} @var{array}
-@cindex differences in @command{awk} and @command{gawk}, array elements, deleting
-All the elements of an array may be deleted with a single statement
-by leaving off the subscript in the @code{delete} statement,
-as follows:
-
-
-@example
-delete @var{array}
-@end example
-
-Using this version of the @code{delete} statement is about three times
-more efficient than the equivalent loop that deletes each element one
-at a time.
-
-@cindex Brian Kernighan's @command{awk}
-@quotation NOTE
-For many years,
-using @code{delete} without a subscript was a @command{gawk} extension.
-As of September, 2012, it was accepted for
-inclusion into the POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
-the Austin Group website}. This form of the @code{delete} statement is also supported
-by BWK @command{awk} and @command{mawk}, as well as
-by a number of other implementations (@pxref{Other Versions}).
-@end quotation
-
-@cindex portability, deleting array elements
-@cindex Brennan, Michael
-The following statement provides a portable but nonobvious way to clear
-out an array:@footnote{Thanks to Michael Brennan for pointing this out.}
-
-@example
-split("", array)
-@end example
-
-@cindex @code{split()} function, array elements@comma{} deleting
-The @code{split()} function
-(@pxref{String Functions})
-clears out the target array first. This call asks it to split
-apart the null string. Because there is no data to split out, the
-function simply clears the array and then returns.
-
-@quotation CAUTION
-Deleting an array does not change its type; you cannot
-delete an array and then use the array's name as a scalar
-(i.e., a regular variable). For example, the following does not work:
-
-@example
-a[1] = 3
-delete a
-a = 3
-@end example
-@end quotation
-
@node Numeric Array Subscripts
@section Using Numbers to Subscript Arrays
@@ -16187,7 +16128,7 @@ since @code{"12.15"} is different from @code{"12.153"}.
@cindex integer array indices
According to the rules for conversions
(@pxref{Conversion}), integer
-values are always converted to strings as integers, no matter what the
+values always convert to strings as integers, no matter what the
value of @code{CONVFMT} may happen to be. So the usual case of
the following works:
@@ -16210,7 +16151,7 @@ and
all refer to the same element!
As with many things in @command{awk}, the majority of the time
-things work as one would expect them to. But it is useful to have a precise
+things work as you would expect them to. But it is useful to have a precise
knowledge of the actual rules since they can sometimes have a subtle
effect on your programs.
@@ -16274,6 +16215,119 @@ Even though it is somewhat unusual, the null string
if @option{--lint} is provided
on the command line (@pxref{Options}).
+@node Delete
+@section The @code{delete} Statement
+@cindex @code{delete} statement
+@cindex deleting elements in arrays
+@cindex arrays, elements, deleting
+@cindex elements in arrays, deleting
+
+To remove an individual element of an array, use the @code{delete}
+statement:
+
+@example
+delete @var{array}[@var{index-expression}]
+@end example
+
+Once an array element has been deleted, any value the element once
+had is no longer available. It is as if the element had never
+been referred to or been given a value.
+The following is an example of deleting elements in an array:
+
+@example
+for (i in frequencies)
+ delete frequencies[i]
+@end example
+
+@noindent
+This example removes all the elements from the array @code{frequencies}.
+Once an element is deleted, a subsequent @code{for} statement to scan the array
+does not report that element and the @code{in} operator to check for
+the presence of that element returns zero (i.e., false):
+
+@example
+delete foo[4]
+if (4 in foo)
+ print "This will never be printed"
+@end example
+
+@cindex null strings, and deleting array elements
+It is important to note that deleting an element is @emph{not} the
+same as assigning it a null value (the empty string, @code{""}).
+For example:
+
+@example
+foo[4] = ""
+if (4 in foo)
+ print "This is printed, even though foo[4] is empty"
+@end example
+
+@cindex lint checking, array elements
+It is not an error to delete an element that does not exist.
+However, if @option{--lint} is provided on the command line
+(@pxref{Options}),
+@command{gawk} issues a warning message when an element that
+is not in the array is deleted.
+
+@cindex common extensions, @code{delete} to delete entire arrays
+@cindex extensions, common@comma{} @code{delete} to delete entire arrays
+@cindex arrays, deleting entire contents
+@cindex deleting entire arrays
+@cindex @code{delete} @var{array}
+@cindex differences in @command{awk} and @command{gawk}, array elements, deleting
+All the elements of an array may be deleted with a single statement
+by leaving off the subscript in the @code{delete} statement,
+as follows:
+
+
+@example
+delete @var{array}
+@end example
+
+Using this version of the @code{delete} statement is about three times
+more efficient than the equivalent loop that deletes each element one
+at a time.
+
+This form of the @code{delete} statement is also supported
+by BWK @command{awk} and @command{mawk}, as well as
+by a number of other implementations.
+
+@cindex Brian Kernighan's @command{awk}
+@quotation NOTE
+For many years, using @code{delete} without a subscript was a common
+extension. In September, 2012, it was accepted for inclusion into the
+POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
+the Austin Group website}.
+@end quotation
+
+@cindex portability, deleting array elements
+@cindex Brennan, Michael
+The following statement provides a portable but nonobvious way to clear
+out an array:@footnote{Thanks to Michael Brennan for pointing this out.}
+
+@example
+split("", array)
+@end example
+
+@cindex @code{split()} function, array elements@comma{} deleting
+The @code{split()} function
+(@pxref{String Functions})
+clears out the target array first. This call asks it to split
+apart the null string. Because there is no data to split out, the
+function simply clears the array and then returns.
+
+@quotation CAUTION
+Deleting all the elements from an array does not change its type; you cannot
+clear an array and then use the array's name as a scalar
+(i.e., a regular variable). For example, the following does not work:
+
+@example
+a[1] = 3
+delete a
+a = 3
+@end example
+@end quotation
+
@node Multidimensional
@section Multidimensional Arrays
@@ -16285,7 +16339,7 @@ on the command line (@pxref{Options}).
@cindex arrays, multidimensional
A multidimensional array is an array in which an element is identified
by a sequence of indices instead of a single index. For example, a
-two-dimensional array requires two indices. The usual way (in most
+two-dimensional array requires two indices. The usual way (in many
languages, including @command{awk}) to refer to an element of a
two-dimensional array named @code{grid} is with
@code{grid[@var{x},@var{y}]}.
@@ -16460,8 +16514,9 @@ a[1][3][1, "name"] = "barney"
Each subarray and the main array can be of different length. In fact, the
elements of an array or its subarray do not all have to have the same
type. This means that the main array and any of its subarrays can be
-non-rectangular, or jagged in structure. One can assign a scalar value to
-the index @code{4} of the main array @code{a}:
+non-rectangular, or jagged in structure. You can assign a scalar value to
+the index @code{4} of the main array @code{a}, even though @code{a[1]}
+is itself an array and not a scalar:
@example
a[4] = "An element in a jagged array"
@@ -16543,6 +16598,8 @@ for (i in array) @{
print array[i][j]
@}
@}
+ else
+ print array[i]
@}
@end example
@@ -16827,8 +16884,9 @@ Often random integers are needed instead. Following is a user-defined function
that can be used to obtain a random non-negative integer less than @var{n}:
@example
-function randint(n) @{
- return int(n * rand())
+function randint(n)
+@{
+ return int(n * rand())
@}
@end example
@@ -16848,8 +16906,7 @@ function roll(n) @{ return 1 + int(rand() * n) @}
# Roll 3 six-sided dice and
# print total number of points.
@{
- printf("%d points\n",
- roll(6)+roll(6)+roll(6))
+ printf("%d points\n", roll(6) + roll(6) + roll(6))
@}
@end example
@@ -16938,7 +16995,7 @@ doing index calculations, particularly if you are used to C.
In the following list, optional parameters are enclosed in square brackets@w{ ([ ]).}
Several functions perform string substitution; the full discussion is
provided in the description of the @code{sub()} function, which comes
-towards the end since the list is presented in alphabetic order.
+towards the end since the list is presented alphabetically.
Those functions that are specific to @command{gawk} are marked with a
pound sign (@samp{#}). They are not available in compatibility mode
@@ -16982,6 +17039,7 @@ When comparing strings, @code{IGNORECASE} affects the sorting
(@pxref{Array Sorting Functions}). If the
@var{source} array contains subarrays as values (@pxref{Arrays of
Arrays}), they will come last, after all scalar values.
+Subarrays are @emph{not} recursively sorted.
For example, if the contents of @code{a} are as follows:
@@ -17118,7 +17176,10 @@ $ @kbd{awk 'BEGIN @{ print index("peanut", "an") @}'}
@noindent
If @var{find} is not found, @code{index()} returns zero.
-It is a fatal error to use a regexp constant for @var{find}.
+With BWK @command{awk} and @command{gawk},
+it is a fatal error to use a regexp constant for @var{find}.
+Other implementations allow it, simply treating the regexp
+constant as an expression meaning @samp{$0 ~ /regexp/}.
@item @code{length(}[@var{string}]@code{)}
@cindexawkfunc{length}
@@ -17232,13 +17293,12 @@ For example:
@example
@c file eg/misc/findpat.awk
@{
- if ($1 == "FIND")
- regex = $2
- else @{
- where = match($0, regex)
- if (where != 0)
- print "Match of", regex, "found at",
- where, "in", $0
+ if ($1 == "FIND")
+ regex = $2
+ else @{
+ where = match($0, regex)
+ if (where != 0)
+ print "Match of", regex, "found at", where, "in", $0
@}
@}
@c endfile
@@ -17334,7 +17394,7 @@ Any leading separator will be in @code{@var{seps}[0]}.
The @code{patsplit()} function splits strings into pieces in a
manner similar to the way input lines are split into fields using @code{FPAT}
-(@pxref{Splitting By Content}.
+(@pxref{Splitting By Content}).
Before splitting the string, @code{patsplit()} deletes any previously existing
elements in the arrays @var{array} and @var{seps}.
@@ -17347,8 +17407,7 @@ and store the pieces in @var{array} and the separator strings in the
@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
forth. The string value of the third argument, @var{fieldsep}, is
a regexp describing where to split @var{string} (much as @code{FS} can
-be a regexp describing where to split input records;
-@pxref{Regexp Field Splitting}).
+be a regexp describing where to split input records).
If @var{fieldsep} is omitted, the value of @code{FS} is used.
@code{split()} returns the number of elements created.
@var{seps} is a @command{gawk} extension with @code{@var{seps}[@var{i}]}
@@ -17643,6 +17702,59 @@ Nonalphabetic characters are left unchanged. For example,
@code{toupper("MiXeD cAsE 123")} returns @code{"MIXED CASE 123"}.
@end table
+@cindex sidebar, Matching the Null String
+@ifdocbook
+@docbook
+<sidebar><title>Matching the Null String</title>
+@end docbook
+
+@cindex matching, null strings
+@cindex null strings, matching
+@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
+@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
+
+In @command{awk}, the @samp{*} operator can match the null string.
+This is particularly important for the @code{sub()}, @code{gsub()},
+and @code{gensub()} functions. For example:
+
+@example
+$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
+@print{} XaXbXcX
+@end example
+
+@noindent
+Although this makes a certain amount of sense, it can be surprising.
+
+@docbook
+</sidebar>
+@end docbook
+@end ifdocbook
+
+@ifnotdocbook
+@cartouche
+@center @b{Matching the Null String}
+
+
+@cindex matching, null strings
+@cindex null strings, matching
+@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
+@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
+
+In @command{awk}, the @samp{*} operator can match the null string.
+This is particularly important for the @code{sub()}, @code{gsub()},
+and @code{gensub()} functions. For example:
+
+@example
+$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
+@print{} XaXbXcX
+@end example
+
+@noindent
+Although this makes a certain amount of sense, it can be surprising.
+@end cartouche
+@end ifnotdocbook
+
+
@node Gory Details
@subsubsection More About @samp{\} and @samp{&} with @code{sub()}, @code{gsub()}, and @code{gensub()}
@@ -17656,7 +17768,7 @@ Nonalphabetic characters are left unchanged. For example,
@cindex ampersand (@code{&}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
@quotation CAUTION
-This section has been known to cause headaches.
+This subsubsection has been reported to cause headaches.
You might want to skip it upon first reading.
@end quotation
@@ -17947,58 +18059,6 @@ and the special cases for @code{sub()} and @code{gsub()},
we recommend the use of @command{gawk} and @code{gensub()} when you have
to do substitutions.
-@cindex sidebar, Matching the Null String
-@ifdocbook
-@docbook
-<sidebar><title>Matching the Null String</title>
-@end docbook
-
-@cindex matching, null strings
-@cindex null strings, matching
-@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
-@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
-
-In @command{awk}, the @samp{*} operator can match the null string.
-This is particularly important for the @code{sub()}, @code{gsub()},
-and @code{gensub()} functions. For example:
-
-@example
-$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
-@print{} XaXbXcX
-@end example
-
-@noindent
-Although this makes a certain amount of sense, it can be surprising.
-
-@docbook
-</sidebar>
-@end docbook
-@end ifdocbook
-
-@ifnotdocbook
-@cartouche
-@center @b{Matching the Null String}
-
-
-@cindex matching, null strings
-@cindex null strings, matching
-@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
-@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
-
-In @command{awk}, the @samp{*} operator can match the null string.
-This is particularly important for the @code{sub()}, @code{gsub()},
-and @code{gensub()} functions. For example:
-
-@example
-$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
-@print{} XaXbXcX
-@end example
-
-@noindent
-Although this makes a certain amount of sense, it can be surprising.
-@end cartouche
-@end ifnotdocbook
-
@node I/O Functions
@subsection Input/Output Functions
@cindex input/output functions
@@ -18051,10 +18111,9 @@ buffers its output and the @code{fflush()} function forces
@cindex extensions, common@comma{} @code{fflush()} function
@cindex Brian Kernighan's @command{awk}
-@code{fflush()} was added to BWK @command{awk} in
-April of 1992. For two decades, it was not part of the POSIX standard.
-As of December, 2012, it was accepted for inclusion into the POSIX
-standard.
+Brian Kernighan added @code{fflush()} to his @command{awk} in April
+of 1992. For two decades, it was a common extension. In December,
+2012, it was accepted for inclusion into the POSIX standard.
See @uref{http://austingroupbugs.net/view.php?id=634, the Austin Group website}.
POSIX standardizes @code{fflush()} as follows: If there
@@ -18451,7 +18510,7 @@ is out of range, @code{mktime()} returns @minus{}1.
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array
-@item @code{strftime(} [@var{format} [@code{,} @var{timestamp} [@code{,} @var{utc-flag}] ] ]@code{)}
+@item @code{strftime(}[@var{format} [@code{,} @var{timestamp} [@code{,} @var{utc-flag}] ] ]@code{)}
@c STARTOFRANGE strf
@cindexgawkfunc{strftime}
@cindex format time string
@@ -18557,7 +18616,7 @@ of its ISO week number is 2013, even though its year is 2012.
The full year of the ISO week number, as a decimal number.
@item %h
-Equivalent to @samp{%b}.
+Equivalent to @code{%b}.
@item %H
The hour (24-hour clock) as a decimal number (00--23).
@@ -18626,7 +18685,7 @@ The locale's ``appropriate'' date representation.
@item %X
The locale's ``appropriate'' time representation.
-(This is @samp{%T} in the @code{"C"} locale.)
+(This is @code{%T} in the @code{"C"} locale.)
@item %y
The year modulo 100 as a decimal number (00--99).
@@ -18647,7 +18706,7 @@ no time zone is determinable.
@item %Ec %EC %Ex %EX %Ey %EY %Od %Oe %OH
@itemx %OI %Om %OM %OS %Ou %OU %OV %Ow %OW %Oy
``Alternate representations'' for the specifications
-that use only the second letter (@samp{%c}, @samp{%C},
+that use only the second letter (@code{%c}, @code{%C},
and so on).@footnote{If you don't understand any of this, don't worry about
it; these facilities are meant to make it easier to ``internationalize''
programs.
@@ -18718,7 +18777,7 @@ the string. For example:
@example
$ date '+Today is %A, %B %d, %Y.'
-@print{} Today is Monday, May 05, 2014.
+@print{} Today is Monday, September 22, 2014.
@end example
Here is the @command{gawk} version of the @command{date} utility.
@@ -18910,19 +18969,18 @@ For example, if you have a bit string @samp{10111001} and you shift it
right by three bits, you end up with @samp{00010111}.@footnote{This example
shows that 0's come in on the left side. For @command{gawk}, this is
always true, but in some languages, it's possible to have the left side
-fill with 1's. Caveat emptor.}
+fill with 1's.}
@c Purposely decided to use 0's and 1's here. 2/2001.
-If you start over
-again with @samp{10111001} and shift it left by three bits, you end up
-with @samp{11001000}.
-@command{gawk} provides built-in functions that implement the
-bitwise operations just described. They are:
+If you start over again with @samp{10111001} and shift it left by three
+bits, you end up with @samp{11001000}. The following list describes
+@command{gawk}'s built-in functions that implement the bitwise operations.
+Optional parameters are enclosed in square brackets ([ ]):
@cindex @command{gawk}, bitwise operations in
@table @code
@cindexgawkfunc{and}
@cindex bitwise AND
-@item @code{and(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
+@item @code{and(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
Return the bitwise AND of the arguments. There must be at least two.
@cindexgawkfunc{compl}
@@ -18937,7 +18995,7 @@ Return the value of @var{val}, shifted left by @var{count} bits.
@cindexgawkfunc{or}
@cindex bitwise OR
-@item @code{or(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
+@item @code{or(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
Return the bitwise OR of the arguments. There must be at least two.
@cindexgawkfunc{rshift}
@@ -18947,7 +19005,7 @@ Return the value of @var{val}, shifted right by @var{count} bits.
@cindexgawkfunc{xor}
@cindex bitwise XOR
-@item @code{xor(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
+@item @code{xor(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
Return the bitwise XOR of the arguments. There must be at least two.
@end table
@@ -19070,7 +19128,7 @@ results of the @code{compl()}, @code{lshift()}, and @code{rshift()} functions.
@command{gawk} provides a single function that lets you distinguish
an array from a scalar variable. This is necessary for writing code
-that traverses every element of an array of arrays.
+that traverses every element of an array of arrays
(@pxref{Arrays of Arrays}).
@table @code
@@ -19086,12 +19144,14 @@ an array or not. The second is inside the body of a user-defined function
(not discussed yet; @pxref{User-defined}), to test if a parameter is an
array or not.
-Note, however, that using @code{isarray()} at the global level to test
+@quotation NOTE
+Using @code{isarray()} at the global level to test
variables makes no sense. Since you are the one writing the program, you
are supposed to know if your variables are arrays or not. And in fact,
due to the way @command{gawk} works, if you pass the name of a variable
that has not been previously used to @code{isarray()}, @command{gawk}
-will end up turning it into a scalar.
+ends up turning it into a scalar.
+@end quotation
@node I18N Functions
@subsection String-Translation Functions
@@ -19352,7 +19412,7 @@ extra whitespace signifies the start of the local variable list):
function delarray(a, i)
@{
for (i in a)
- delete a[i]
+ delete a[i]
@}
@end example
@@ -19363,7 +19423,7 @@ Instead of having
to repeat this loop everywhere that you need to clear out
an array, your program can just call @code{delarray}.
(This guarantees portability. The use of @samp{delete @var{array}} to delete
-the contents of an entire array is a recent@footnote{Late in 2012.}
+the contents of an entire array is a relatively recent@footnote{Late in 2012.}
addition to the POSIX standard.)
The following is an example of a recursive function. It takes a string
@@ -19393,7 +19453,7 @@ $ @kbd{echo "Don't Panic!" |}
@print{} !cinaP t'noD
@end example
-The C @code{ctime()} function takes a timestamp and returns it in a string,
+The C @code{ctime()} function takes a timestamp and returns it as a string,
formatted in a well-known fashion.
The following example uses the built-in @code{strftime()} function
(@pxref{Time Functions})
@@ -19408,13 +19468,19 @@ to create an @command{awk} version of @code{ctime()}:
function ctime(ts, format)
@{
- format = PROCINFO["strftime"]
+ format = "%a %b %e %H:%M:%S %Z %Y"
+
if (ts == 0)
ts = systime() # use current time as default
return strftime(format, ts)
@}
@c endfile
@end example
+
+You might think that @code{ctime()} could use @code{PROCINFO["strftime"]}
+for its format string. That would be a mistake, since @code{ctime()} is
+supposed to return the time formatted in a standard fashion, and user-level
+code could have changed @code{PROCINFO["strftime"]}.
@c ENDOFRANGE fdef
@node Function Caveats
@@ -19986,7 +20052,7 @@ saving it in @code{start}.
The last part of the code loops through each function name (from @code{$2} up to
the marker, @samp{data:}), calling the function named by the field. The indirect
function call itself occurs as a parameter in the call to @code{printf}.
-(The @code{printf} format string uses @samp{%s} as the format specifier so that we
+(The @code{printf} format string uses @code{%s} as the format specifier so that we
can use functions that return strings, as well as numbers. Note that the result
from the indirect call is concatenated with the empty string, in order to force
it to be a string value.)
@@ -20063,7 +20129,7 @@ function quicksort(data, left, right, less_than, i, last)
# quicksort_swap --- helper function for quicksort, should really be inline
-function quicksort_swap(data, i, j, temp)
+function quicksort_swap(data, i, j, temp)
@{
temp = data[i]
data[i] = data[j]
@@ -20214,10 +20280,11 @@ functions.
@item
POSIX @command{awk} provides three kinds of built-in functions: numeric,
-string, and I/O. @command{gawk} provides functions that work with values
-representing time, do bit manipulation, sort arrays, and internationalize
-and localize programs. @command{gawk} also provides several extensions to
-some of standard functions, typically in the form of additional arguments.
+string, and I/O. @command{gawk} provides functions that sort arrays, work
+with values representing time, do bit manipulation, determine variable
+type (array vs.@: scalar), and internationalize and localize programs.
+@command{gawk} also provides several extensions to some of standard
+functions, typically in the form of additional arguments.
@item
Functions accept zero or more arguments and return a value. The
@@ -20468,8 +20535,9 @@ are very difficult to track down:
function lib_func(x, y, l1, l2)
@{
@dots{}
- @var{use variable} some_var # some_var should be local
- @dots{} # but is not by oversight
+ # some_var should be local but by oversight is not
+ @var{use variable} some_var
+ @dots{}
@}
@end example
@@ -20580,7 +20648,7 @@ function mystrtonum(str, ret, n, i, k, c)
# a[5] = "123.45"
# a[6] = "1.e3"
# a[7] = "1.32"
-# a[7] = "1.32E2"
+# a[8] = "1.32E2"
#
# for (i = 1; i in a; i++)
# print a[i], strtonum(a[i]), mystrtonum(a[i])
@@ -20591,9 +20659,12 @@ function mystrtonum(str, ret, n, i, k, c)
The function first looks for C-style octal numbers (base 8).
If the input string matches a regular expression describing octal
numbers, then @code{mystrtonum()} loops through each character in the
-string. It sets @code{k} to the index in @code{"01234567"} of the current
-octal digit. Since the return value is one-based, the @samp{k--}
-adjusts @code{k} so it can be used in computing the return value.
+string. It sets @code{k} to the index in @code{"1234567"} of the current
+octal digit.
+The return value will either be the same number as the digit, or zero
+if the character is not there, which will be true for a @samp{0}.
+This is safe, since the regexp test in the @code{if} ensures that
+only octal values are converted.
Similar logic applies to the code that checks for and converts a
hexadecimal value, which starts with @samp{0x} or @samp{0X}.
@@ -20626,7 +20697,7 @@ that a condition or set of conditions is true. Before proceeding with a
particular computation, you make a statement about what you believe to be
the case. Such a statement is known as an
@dfn{assertion}. The C language provides an @code{<assert.h>} header file
-and corresponding @code{assert()} macro that the programmer can use to make
+and corresponding @code{assert()} macro that a programmer can use to make
assertions. If an assertion fails, the @code{assert()} macro arranges to
print a diagnostic message describing the condition that should have
been true but was not, and then it kills the program. In C, using
@@ -21096,7 +21167,7 @@ function getlocaltime(time, ret, now, i)
now = systime()
# return date(1)-style output
- ret = strftime(PROCINFO["strftime"], now)
+ ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
# clear out target array
delete time
@@ -21211,6 +21282,9 @@ if (length(contents) == 0)
This tests the result to see if it is empty or not. An equivalent
test would be @samp{contents == ""}.
+@xref{Extension Sample Readfile}, for an extension function that
+also reads an entire file into memory.
+
@node Data File Management
@section @value{DDF} Management
@@ -21268,15 +21342,14 @@ Besides solving the problem in only nine(!) lines of code, it does so
@c # Arnold Robbins, arnold@@skeeve.com, Public Domain
@c # January 1992
-FILENAME != _oldfilename \
-@{
+FILENAME != _oldfilename @{
if (_oldfilename != "")
endfile(_oldfilename)
_oldfilename = FILENAME
beginfile(FILENAME)
@}
-END @{ endfile(FILENAME) @}
+END @{ endfile(FILENAME) @}
@end example
This file must be loaded before the user's ``main'' program, so that the
@@ -21329,7 +21402,7 @@ FNR == 1 @{
beginfile(FILENAME)
@}
-END @{ endfile(_filename_) @}
+END @{ endfile(_filename_) @}
@c endfile
@end example
@@ -21428,24 +21501,12 @@ function rewind( i)
@c endfile
@end example
-This code relies on the @code{ARGIND} variable
-(@pxref{Auto-set}),
-which is specific to @command{gawk}.
-If you are not using
-@command{gawk}, you can use ideas presented in
-@ifnotinfo
-the previous @value{SECTION}
-@end ifnotinfo
-@ifinfo
-@ref{Filetrans Function},
-@end ifinfo
-to either update @code{ARGIND} on your own
-or modify this code as appropriate.
-
-The @code{rewind()} function also relies on the @code{nextfile} keyword
-(@pxref{Nextfile Statement}). Because of this, you should not call it
-from an @code{ENDFILE} rule. (This isn't necessary anyway, since as soon
-as an @code{ENDFILE} rule finishes @command{gawk} goes to the next file!)
+The @code{rewind()} function relies on the @code{ARGIND} variable
+(@pxref{Auto-set}), which is specific to @command{gawk}. It also
+relies on the @code{nextfile} keyword (@pxref{Nextfile Statement}).
+Because of this, you should not call it from an @code{ENDFILE} rule.
+(This isn't necessary anyway, since as soon as an @code{ENDFILE} rule
+finishes @command{gawk} goes to the next file!)
@node File Checking
@subsection Checking for Readable @value{DDF}s
@@ -21478,7 +21539,7 @@ the following program to your @command{awk} program:
BEGIN @{
for (i = 1; i < ARGC; i++) @{
- if (ARGV[i] ~ /^[[:alpha:]_][[:alnum:]_]*=.*/ \
+ if (ARGV[i] ~ /^[a-zA-Z_][a-zA-Z0-9_]*=.*/ \
|| ARGV[i] == "-" || ARGV[i] == "/dev/stdin")
continue # assignment or standard input
else if ((getline junk < ARGV[i]) < 0) # unreadable
@@ -21496,6 +21557,11 @@ Removing the element from @code{ARGV} with @code{delete}
skips the file (since it's no longer in the list).
See also @ref{ARGC and ARGV}.
+The regular expression check purposely does not use character classes
+such as @samp{[:alpha:]} and @samp{[:alnum:]}
+(@pxref{Bracket Expressions})
+since @command{awk} variable names only allow the English letters.
+
@node Empty Files
@subsection Checking for Zero-length Files
@@ -21592,7 +21658,7 @@ a library file does the trick:
function disable_assigns(argc, argv, i)
@{
for (i = 1; i < argc; i++)
- if (argv[i] ~ /^[[:alpha:]_][[:alnum:]_]*=.*/)
+ if (argv[i] ~ /^[a-zA-Z_][a-zA-Z0-9_]*=.*/)
argv[i] = ("./" argv[i])
@}
@@ -21964,12 +22030,18 @@ In both runs, the first @option{--} terminates the arguments to
etc., as its own options.
@quotation NOTE
-After @code{getopt()} is through, it is the responsibility of the
-user level code to clear out all the elements of @code{ARGV} from 1
+After @code{getopt()} is through,
+user level code must clear out all the elements of @code{ARGV} from 1
to @code{Optind}, so that @command{awk} does not try to process the
command-line options as @value{FN}s.
@end quotation
+Using @samp{#!} with the @option{-E} option may help avoid
+conflicts between your program's options and @command{gawk}'s options,
+since @option{-E} causes @command{gawk} to abandon processing of
+further options
+(@pxref{Executable Scripts}, and @pxref{Options}).
+
Several of the sample programs presented in
@ref{Sample Programs},
use @code{getopt()} to process their arguments.
@@ -22214,13 +22286,14 @@ The @code{BEGIN} rule sets a private variable to the directory where
routine, we have chosen to put it in @file{/usr/local/libexec/awk};
however, you might want it to be in a different directory on your system.
-The function @code{_pw_init()} keeps three copies of the user information
-in three associative arrays. The arrays are indexed by username
+The function @code{_pw_init()} fills three copies of the user information
+into three associative arrays. The arrays are indexed by username
(@code{_pw_byname}), by user ID number (@code{_pw_byuid}), and by order of
occurrence (@code{_pw_bycount}).
The variable @code{_pw_inited} is used for efficiency, since @code{_pw_init()}
needs to be called only once.
+@cindex @code{PROCINFO} array, testing the field splitting
@cindex @code{getline} command, @code{_pw_init()} function
Because this function uses @code{getline} to read information from
@command{pwcat}, it first saves the values of @code{FS}, @code{RS}, and @code{$0}.
@@ -22228,13 +22301,8 @@ It notes in the variable @code{using_fw} whether field splitting
with @code{FIELDWIDTHS} is in effect or not.
Doing so is necessary, since these functions could be called
from anywhere within a user's program, and the user may have his
-or her
-own way of splitting records and fields.
-
-@cindex @code{PROCINFO} array, testing the field splitting
-The @code{using_fw} variable checks @code{PROCINFO["FS"]}, which
-is @code{"FIELDWIDTHS"} if field splitting is being done with
-@code{FIELDWIDTHS}. This makes it possible to restore the correct
+or her own way of splitting records and fields.
+This makes it possible to restore the correct
field-splitting mechanism later. The test can only be true for
@command{gawk}. It is false if using @code{FS} or @code{FPAT},
or on some other @command{awk} implementation.
@@ -22548,8 +22616,7 @@ function _gr_init( oldfs, oldrs, olddol0, grcat,
n = split($4, a, "[ \t]*,[ \t]*")
for (i = 1; i <= n; i++)
if (a[i] in _gr_groupsbyuser)
- _gr_groupsbyuser[a[i]] = \
- _gr_groupsbyuser[a[i]] " " $1
+ _gr_groupsbyuser[a[i]] = gr_groupsbyuser[a[i]] " " $1
else
_gr_groupsbyuser[a[i]] = $1
@@ -22776,8 +22843,8 @@ $ @kbd{gawk -f walk_array.awk}
@itemize @value{BULLET}
@item
Reading programs is an excellent way to learn Good Programming.
-The functions provided in this @value{CHAPTER} and the next are intended
-to serve that purpose.
+The functions and programs provided in this @value{CHAPTER} and the next
+are intended to serve that purpose.
@item
When writing general-purpose library functions, put some thought into how
@@ -23064,22 +23131,16 @@ supplied:
# Requires getopt() and join() library functions
@group
-function usage( e1, e2)
+function usage()
@{
- e1 = "usage: cut [-f list] [-d c] [-s] [files...]"
- e2 = "usage: cut [-c list] [files...]"
- print e1 > "/dev/stderr"
- print e2 > "/dev/stderr"
+ print("usage: cut [-f list] [-d c] [-s] [files...]") > "/dev/stderr"
+ print("usage: cut [-c list] [files...]") > "/dev/stderr"
exit 1
@}
@end group
@c endfile
@end example
-@noindent
-The variables @code{e1} and @code{e2} are used so that the function
-fits nicely on the @value{PAGE}.
-
@cindex @code{BEGIN} pattern, running @command{awk} programs and
@cindex @code{FS} variable, running @command{awk} programs and
Next comes a @code{BEGIN} rule that parses the command-line options.
@@ -23580,19 +23641,15 @@ and then exits:
@example
@c file eg/prog/egrep.awk
-function usage( e)
+function usage()
@{
- e = "Usage: egrep [-csvil] [-e pat] [files ...]"
- e = e "\n\tegrep [-csvil] pat [files ...]"
- print e > "/dev/stderr"
+ print("Usage: egrep [-csvil] [-e pat] [files ...]") > "/dev/stderr"
+ print("\n\tegrep [-csvil] pat [files ...]") > "/dev/stderr"
exit 1
@}
@c endfile
@end example
-The variable @code{e} is used so that the function fits nicely
-on the printed page.
-
@c ENDOFRANGE regexps
@c ENDOFRANGE sfregexp
@c ENDOFRANGE fsregexp
@@ -23650,6 +23707,7 @@ numbers:
# May 1993
# Revised February 1996
# Revised May 2014
+# Revised September 2014
@c endfile
@end ignore
@@ -23668,26 +23726,22 @@ BEGIN @{
printf("uid=%d", uid)
pw = getpwuid(uid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
if (euid != uid) @{
printf(" euid=%d", euid)
pw = getpwuid(euid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
@}
printf(" gid=%d", gid)
pw = getgrgid(gid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
if (egid != gid) @{
printf(" egid=%d", egid)
pw = getgrgid(egid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
@}
for (i = 1; ("group" i) in PROCINFO; i++) @{
@@ -23696,8 +23750,7 @@ BEGIN @{
group = PROCINFO["group" i]
printf("%d", group)
pw = getgrgid(group)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
if (("group" (i+1)) in PROCINFO)
printf(",")
@}
@@ -23707,8 +23760,10 @@ BEGIN @{
function pr_first_field(str, a)
@{
- split(str, a, ":")
- printf("(%s)", a[1])
+ if (str != "") @{
+ split(str, a, ":")
+ printf("(%s)", a[1])
+ @}
@}
@c endfile
@end example
@@ -23731,7 +23786,8 @@ tested, and the loop body never executes.
The @code{pr_first_field()} function simply isolates out some
code that is used repeatedly, making the whole program
-slightly shorter and cleaner.
+shorter and cleaner. In particular, moving the check for
+the empty string into this function saves several lines of code.
@c ENDOFRANGE id
@@ -23858,19 +23914,14 @@ The @code{usage()} function simply prints an error message and exits:
@example
@c file eg/prog/split.awk
-function usage( e)
+function usage()
@{
- e = "usage: split [-num] [file] [outname]"
- print e > "/dev/stderr"
+ print("usage: split [-num] [file] [outname]") > "/dev/stderr"
exit 1
@}
@c endfile
@end example
-@noindent
-The variable @code{e} is used so that the function
-fits nicely on the @value{PAGE}.
-
This program is a bit sloppy; it relies on @command{awk} to automatically close the last file
instead of doing it in an @code{END} rule.
It also assumes that letters are contiguous in the character set,
@@ -24029,10 +24080,10 @@ The options for @command{uniq} are:
@table @code
@item -d
-Print only repeated lines.
+Print only repeated (duplicated) lines.
@item -u
-Print only nonrepeated lines.
+Print only nonrepeated (unique) lines.
@item -c
Count lines. This option overrides @option{-d} and @option{-u}. Both repeated
@@ -24101,10 +24152,9 @@ standard output, @file{/dev/stdout}:
@end ignore
@c file eg/prog/uniq.awk
-function usage( e)
+function usage()
@{
- e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]"
- print e > "/dev/stderr"
+ print("Usage: uniq [-udc [-n]] [+n] [ in [ out ]]") > "/dev/stderr"
exit 1
@}
@@ -24158,22 +24208,20 @@ BEGIN @{
@end example
The following function, @code{are_equal()}, compares the current line,
-@code{$0}, to the
-previous line, @code{last}. It handles skipping fields and characters.
-If no field count and no character count are specified, @code{are_equal()}
-simply returns one or zero depending upon the result of a simple string
-comparison of @code{last} and @code{$0}. Otherwise, things get more
-complicated.
-If fields have to be skipped, each line is broken into an array using
-@code{split()}
-(@pxref{String Functions});
-the desired fields are then joined back into a line using @code{join()}.
-The joined lines are stored in @code{clast} and @code{cline}.
-If no fields are skipped, @code{clast} and @code{cline} are set to
-@code{last} and @code{$0}, respectively.
-Finally, if characters are skipped, @code{substr()} is used to strip off the
-leading @code{charcount} characters in @code{clast} and @code{cline}. The
-two strings are then compared and @code{are_equal()} returns the result:
+@code{$0}, to the previous line, @code{last}. It handles skipping fields
+and characters. If no field count and no character count are specified,
+@code{are_equal()} returns one or zero depending upon the result of a
+simple string comparison of @code{last} and @code{$0}.
+
+Otherwise, things get more complicated. If fields have to be skipped,
+each line is broken into an array using @code{split()} (@pxref{String
+Functions}); the desired fields are then joined back into a line
+using @code{join()}. The joined lines are stored in @code{clast} and
+@code{cline}. If no fields are skipped, @code{clast} and @code{cline}
+are set to @code{last} and @code{$0}, respectively. Finally, if
+characters are skipped, @code{substr()} is used to strip off the leading
+@code{charcount} characters in @code{clast} and @code{cline}. The two
+strings are then compared and @code{are_equal()} returns the result:
@example
@c file eg/prog/uniq.awk
@@ -24264,6 +24312,13 @@ END @{
@c endfile
@end example
+@c FIXME: Include this?
+@ignore
+This program does not follow our recommended convention of naming
+global variables with a leading capital letter. Doing that would
+make the program a little easier to follow.
+@end ignore
+
@ifset FOR_PRINT
The logic for choosing which lines to print represents a @dfn{state
machine}, which is ``a device that can be in one of a set number of stable
@@ -24309,7 +24364,7 @@ one or more input files. Its usage is as follows:
If no files are specified on the command line, @command{wc} reads its standard
input. If there are multiple files, it also prints total counts for all
-the files. The options and their meanings are shown in the following list:
+the files. The options and their meanings are as follows:
@table @code
@item -l
@@ -24961,7 +25016,7 @@ of lines on the page
Most of the work is done in the @code{printpage()} function.
The label lines are stored sequentially in the @code{line} array. But they
have to print horizontally; @code{line[1]} next to @code{line[6]},
-@code{line[2]} next to @code{line[7]}, and so on. Two loops are used to
+@code{line[2]} next to @code{line[7]}, and so on. Two loops
accomplish this. The outer loop, controlled by @code{i}, steps through
every 10 lines of data; this is each row of labels. The inner loop,
controlled by @code{j}, goes through the lines within the row.
@@ -25075,7 +25130,7 @@ in a useful format.
At first glance, a program like this would seem to do the job:
@example
-# Print list of word frequencies
+# wordfreq-first-try.awk --- print list of word frequencies
@{
for (i = 1; i <= NF; i++)
@@ -25292,16 +25347,16 @@ Texinfo input file into separate files.
This @value{DOCUMENT} is written in @uref{http://www.gnu.org/software/texinfo/, Texinfo},
the GNU project's document formatting language.
A single Texinfo source file can be used to produce both
-printed and online documentation.
+printed documentation, with @TeX{}, and online documentation.
@ifnotinfo
-Texinfo is fully documented in the book
+(Texinfo is fully documented in the book
@cite{Texinfo---The GNU Documentation Format},
available from the Free Software Foundation,
-and also available @uref{http://www.gnu.org/software/texinfo/manual/texinfo/, online}.
+and also available @uref{http://www.gnu.org/software/texinfo/manual/texinfo/, online}.)
@end ifnotinfo
@ifinfo
-The Texinfo language is described fully, starting with
-@inforef{Top, , Texinfo, texinfo,Texinfo---The GNU Documentation Format}.
+(The Texinfo language is described fully, starting with
+@inforef{Top, , Texinfo, texinfo,Texinfo---The GNU Documentation Format}.)
@end ifinfo
For our purposes, it is enough to know three things about Texinfo input
@@ -25379,8 +25434,7 @@ exits with a zero exit status, signifying OK:
@cindex @code{extract.awk} program
@example
@c file eg/prog/extract.awk
-# extract.awk --- extract files and run programs
-# from texinfo files
+# extract.awk --- extract files and run programs from texinfo files
@c endfile
@ignore
@c file eg/prog/extract.awk
@@ -25394,8 +25448,7 @@ exits with a zero exit status, signifying OK:
BEGIN @{ IGNORECASE = 1 @}
-/^@@c(omment)?[ \t]+system/ \
-@{
+/^@@c(omment)?[ \t]+system/ @{
if (NF < 3) @{
e = ("extract: " FILENAME ":" FNR)
e = (e ": badly formed `system' line")
@@ -25452,8 +25505,7 @@ line. That line is then printed to the output file:
@example
@c file eg/prog/extract.awk
-/^@@c(omment)?[ \t]+file/ \
-@{
+/^@@c(omment)?[ \t]+file/ @{
if (NF != 3) @{
e = ("extract: " FILENAME ":" FNR ": badly formed `file' line")
print e > "/dev/stderr"
@@ -25513,7 +25565,7 @@ The @code{END} rule handles the final cleanup, closing the open file:
function unexpected_eof()
@{
printf("extract: %s:%d: unexpected EOF or error\n",
- FILENAME, FNR) > "/dev/stderr"
+ FILENAME, FNR) > "/dev/stderr"
exit 1
@}
@end group
@@ -25773,6 +25825,7 @@ should be the @command{awk} program. If there are no command-line
arguments left, @command{igawk} prints an error message and exits.
Otherwise, the first argument is appended to @code{program}.
In any case, after the arguments have been processed,
+the shell variable
@code{program} contains the complete text of the original @command{awk}
program.
@@ -25895,8 +25948,8 @@ the path, and an attempt is made to open the generated @value{FN}.
The only way to test if a file can be read in @command{awk} is to go
ahead and try to read it with @code{getline}; this is what @code{pathto()}
does.@footnote{On some very old versions of @command{awk}, the test
-@samp{getline junk < t} can loop forever if the file exists but is empty.
-Caveat emptor.} If the file can be read, it is closed and the @value{FN}
+@samp{getline junk < t} can loop forever if the file exists but is empty.}
+If the file can be read, it is closed and the @value{FN}
is returned:
@ignore
@@ -26096,12 +26149,10 @@ in C or C++, and it is frequently easier to do certain kinds of string
and argument manipulation using the shell than it is in @command{awk}.
Finally, @command{igawk} shows that it is not always necessary to add new
-features to a program; they can often be layered on top.
-@ignore
-With @command{igawk},
-there is no real reason to build @code{@@include} processing into
-@command{gawk} itself.
-@end ignore
+features to a program; they can often be layered on top.@footnote{@command{gawk}
+does @code{@@include} processing itself in order to support the use
+of @command{awk} programs as Web CGI scripts.}
+
@c ENDOFRANGE libfex
@c ENDOFRANGE flibex
@c ENDOFRANGE awkpex
@@ -26119,12 +26170,11 @@ One word is an anagram of another if both words contain
the same letters
(for example, ``babbling'' and ``blabbing'').
-An elegant algorithm is presented in Column 2, Problem C of
-Jon Bentley's @cite{Programming Pearls}, second edition.
-The idea is to give words that are anagrams a common signature,
-sort all the words together by their signature, and then print them.
-Dr.@: Bentley observes that taking the letters in each word and
-sorting them produces that common signature.
+Column 2, Problem C of Jon Bentley's @cite{Programming Pearls}, second
+edition, presents an elegant algorithm. The idea is to give words that
+are anagrams a common signature, sort all the words together by their
+signature, and then print them. Dr.@: Bentley observes that taking the
+letters in each word and sorting them produces that common signature.
The following program uses arrays of arrays to bring together
words with the same signature and array sorting to print the words
@@ -26358,7 +26408,7 @@ BEGIN {
@itemize @value{BULLET}
@item
-The functions provided in this @value{CHAPTER} and the previous one
+The programs provided in this @value{CHAPTER}
continue on the theme that reading programs is an excellent way to learn
Good Programming.
@@ -26635,13 +26685,11 @@ discusses the ability to dynamically add new built-in functions to
@cindex constants, nondecimal
If you run @command{gawk} with the @option{--non-decimal-data} option,
-you can have nondecimal constants in your input data:
+you can have nondecimal values in your input data:
-@c line break here for small book format
@example
$ @kbd{echo 0123 123 0x123 |}
-> @kbd{gawk --non-decimal-data '@{ printf "%d, %d, %d\n",}
-> @kbd{$1, $2, $3 @}'}
+> @kbd{gawk --non-decimal-data '@{ printf "%d, %d, %d\n", $1, $2, $3 @}'}
@print{} 83, 123, 291
@end example
@@ -26682,6 +26730,8 @@ Instead, use the @code{strtonum()} function to convert your data
(@pxref{String Functions}).
This makes your programs easier to write and easier to read, and
leads to less surprising results.
+
+This option may disappear in a future version of @command{gawk}.
@end quotation
@node Array Sorting
@@ -26716,7 +26766,9 @@ pre-defined values to @code{PROCINFO["sorted_in"]} in order to
control the order in which @command{gawk} traverses an array
during a @code{for} loop.
-In addition, the value of @code{PROCINFO["sorted_in"]} can be a function name.
+In addition, the value of @code{PROCINFO["sorted_in"]} can be a
+function name.@footnote{This is why the predefined sorting orders
+start with an @samp{@@} character, which cannot be part of an identifier.}
This lets you traverse an array based on any custom criterion.
The array elements are ordered according to the return value of this
function. The comparison function should be defined with at least
@@ -26848,7 +26900,7 @@ according to login name. The following program sorts records
by a specific field position and can be used for this purpose:
@example
-# sort.awk --- simple program to sort by field position
+# passwd-sort.awk --- simple program to sort by field position
# field position is specified by the global variable POS
function cmp_field(i1, v1, i2, v2)
@@ -26907,7 +26959,7 @@ As mentioned above, the order of the indices is arbitrary if two
elements compare equal. This is usually not a problem, but letting
the tied elements come out in arbitrary order can be an issue, especially
when comparing item values. The partial ordering of the equal elements
-may change during the next loop traversal, if other elements are added or
+may change the next time the array is traversed, if other elements are added or
removed from the array. One way to resolve ties when comparing elements
with otherwise equal values is to include the indices in the comparison
rules. Note that doing this may make the loop traversal less efficient,
@@ -27076,7 +27128,6 @@ come into play; comparisons are based on character values only.@footnote{This
is true because locale-based comparison occurs only when in POSIX
compatibility mode, and since @code{asort()} and @code{asorti()} are
@command{gawk} extensions, they are not available in that case.}
-Caveat Emptor.
@node Two-way I/O
@section Two-Way Communications with Another Process
@@ -27142,7 +27193,7 @@ for example, @file{/tmp} will not do, as another user might happen
to be using a temporary file with the same name.@footnote{Michael
Brennan suggests the use of @command{rand()} to generate unique
@value{FN}s. This is a valid point; nevertheless, temporary files
-remain more difficult than two-way pipes.} @c 8/2014
+remain more difficult to use than two-way pipes.} @c 8/2014
@cindex coprocesses
@cindex input/output, two-way
@@ -27285,7 +27336,7 @@ using regular pipes.
@ @ @ @ @i{A host is a host from coast to coast,@*
@ @ @ @ and no-one can talk to host that's close,@*
@ @ @ @ unless the host that isn't close@*
-@ @ @ @ is busy hung or dead.}
+@ @ @ @ is busy, hung, or dead.}
@end quotation
@end ifnotdocbook
@@ -27295,7 +27346,7 @@ using regular pipes.
&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>A host is a host from coast to coast,</emphasis>
&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>and no-one can talk to host that's close,</emphasis>
&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>unless the host that isn't close</emphasis>
-&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>is busy hung or dead.</emphasis></literallayout>
+&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>is busy, hung, or dead.</emphasis></literallayout>
</blockquote>
@end docbook
@@ -27326,7 +27377,7 @@ the system default, most likely IPv4.
@item protocol
The protocol to use over IP. This must be either @samp{tcp}, or
@samp{udp}, for a TCP or UDP IP connection,
-respectively. The use of TCP is recommended for most applications.
+respectively. TCP should be used for most applications.
@item local-port
@cindex @code{getaddrinfo()} function (C library)
@@ -27359,10 +27410,10 @@ Consider the following very simple example:
@example
BEGIN @{
- Service = "/inet/tcp/0/localhost/daytime"
- Service |& getline
- print $0
- close(Service)
+ Service = "/inet/tcp/0/localhost/daytime"
+ Service |& getline
+ print $0
+ close(Service)
@}
@end example
@@ -27727,9 +27778,9 @@ those functions sort arrays. Or you may provide one of the predefined control
strings that work for @code{PROCINFO["sorted_in"]}.
@item
-You can use the @samp{|&} operator to create a two-way pipe to a co-process.
-You read from the co-process with @code{getline} and write to it with @code{print}
-or @code{printf}. Use @code{close()} to close off the co-process completely, or
+You can use the @samp{|&} operator to create a two-way pipe to a coprocess.
+You read from the coprocess with @code{getline} and write to it with @code{print}
+or @code{printf}. Use @code{close()} to close off the coprocess completely, or
optionally, close off one side of the two-way communications.
@item
@@ -35169,7 +35220,7 @@ for case translation
(@pxref{String Functions}).
@item
-A cleaner specification for the @samp{%c} format-control letter in the
+A cleaner specification for the @code{%c} format-control letter in the
@code{printf} function
(@pxref{Control Letters}).
@@ -37572,7 +37623,7 @@ need to use the @code{BINMODE} variable.
This can cause problems with other Unix-like components that have
been ported to MS-Windows that expect @command{gawk} to do automatic
-translation of @code{"\r\n"}, since it won't. Caveat Emptor!
+translation of @code{"\r\n"}, since it won't.
@node VMS Installation
@appendixsubsec How to Compile and Install @command{gawk} on Vax/VMS and OpenVMS
@@ -38041,10 +38092,8 @@ Date: Wed, 4 Sep 1996 08:11:48 -0700 (PDT)
@docbook
<blockquote><attribution>Michael Brennan</attribution>
-<literallayout>
-<emphasis>It's kind of fun to put comments like this in your awk code.</emphasis>
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<literal>// Do C++ comments work? answer: yes! of course</literal>
-</literallayout>
+<literallayout><emphasis>It's kind of fun to put comments like this in your awk code.</emphasis>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<literal>// Do C++ comments work? answer: yes! of course</literal></literallayout>
</blockquote>
@end docbook
@@ -41580,6 +41629,7 @@ Consistency issues:
Use --foo, not -Wfoo when describing long options
Use "Bell Laboratories", but not "Bell Labs".
Use "behavior" instead of "behaviour".
+ Use "coprocess" instead of "co-process".
Use "zeros" instead of "zeroes".
Use "nonzero" not "non-zero".
Use "runtime" not "run time" or "run-time".
@@ -41684,4 +41734,3 @@ But to use it you have to say
which sorta sucks.
TODO:
------
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index 004157f0..d026a3b1 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -48,11 +48,16 @@
@c applies to and all the info about who's publishing this edition
@c These apply across the board.
-@set UPDATE-MONTH August, 2014
+@set UPDATE-MONTH September, 2014
@set VERSION 4.1
-@set PATCHLEVEL 1
+@set PATCHLEVEL 2
+@ifset FOR_PRINT
+@set TITLE Effective AWK Programming
+@end ifset
+@ifclear FOR_PRINT
@set TITLE GAWK: Effective AWK Programming
+@end ifclear
@set SUBTITLE A User's Guide for GNU Awk
@set EDITION 4.1
@@ -555,8 +560,8 @@ particular records in a file and perform operations upon them.
* Regexp Field Splitting:: Using regexps as the field separator.
* Single Character Fields:: Making each character a separate
field.
-* Command Line Field Separator:: Setting @code{FS} from the
- command line.
+* Command Line Field Separator:: Setting @code{FS} from the command
+ line.
* Full Line Fields:: Making the full line be a single
field.
* Field Splitting Summary:: Some final points and a summary table.
@@ -600,10 +605,12 @@ particular records in a file and perform operations upon them.
* Printf Examples:: Several examples.
* Redirection:: How to redirect output to multiple
files and pipes.
+* Special FD:: Special files for I/O.
* Special Files:: File name interpretation in
@command{gawk}. @command{gawk} allows
access to inherited file descriptors.
-* Special FD:: Special files for I/O.
+* Other Inherited Files:: Accessing other open files with
+ @command{gawk}.
* Special Network:: Special files for network
communications.
* Special Caveats:: Things to watch out for.
@@ -716,12 +723,12 @@ particular records in a file and perform operations upon them.
elements.
* Controlling Scanning:: Controlling the order in which arrays
are scanned.
-* Delete:: The @code{delete} statement removes an
- element from an array.
* Numeric Array Subscripts:: How to use numbers as subscripts in
@command{awk}.
* Uninitialized Subscripts:: Using Uninitialized variables as
subscripts.
+* Delete:: The @code{delete} statement removes an
+ element from an array.
* Multidimensional:: Emulating multidimensional arrays in
@command{awk}.
* Multiscanning:: Scanning multidimensional arrays.
@@ -1083,7 +1090,7 @@ books on Unix, I found the gray AWK book, a.k.a.@: Aho, Kernighan and
Weinberger, @cite{The AWK Programming Language}, Addison-Wesley,
1988. AWK's simple programming paradigm---find a pattern in the
input and then perform an action---often reduced complex or tedious
-data manipulations to few lines of code. I was excited to try my
+data manipulations to a few lines of code. I was excited to try my
hand at programming in AWK.
Alas, the @command{awk} on my computer was a limited version of the
@@ -1217,7 +1224,7 @@ March, 2001
<affiliation><jobtitle>Nof Ayalon</jobtitle></affiliation>
<affiliation><jobtitle>ISRAEL</jobtitle></affiliation>
</author>
- <date>June, 2014</date>
+ <date>December, 2014</date>
</prefaceinfo>
@end docbook
@@ -1239,7 +1246,7 @@ and with the Unix version of @command{awk} maintained
by Brian Kernighan.
This means that all
properly written @command{awk} programs should work with @command{gawk}.
-Thus, we usually don't distinguish between @command{gawk} and other
+So most of the time, we don't distinguish between @command{gawk} and other
@command{awk} implementations.
@cindex @command{awk}, POSIX and, See Also POSIX @command{awk}
@@ -1286,15 +1293,15 @@ Sort data
Perform simple network communications
@item
-Profile and debug @command{awk} programs.
+Profile and debug @command{awk} programs
@item
-Extend the language with functions written in C or C++.
+Extend the language with functions written in C or C++
@end itemize
This @value{DOCUMENT} teaches you about the @command{awk} language and
how you can use it effectively. You should already be familiar with basic
-system commands, such as @command{cat} and @command{ls},@footnote{These commands
+system commands, such as @command{cat} and @command{ls},@footnote{These utilities
are available on POSIX-compliant systems, as well as on traditional
Unix-based systems. If you are using some other operating system, you still need to
be familiar with the ideas of I/O redirection and pipes.} as well as basic shell
@@ -1316,10 +1323,9 @@ Microsoft Windows
@ifclear FOR_PRINT
(all versions) and OS/2 PCs,
@end ifclear
-and OpenVMS.
-(Some other, obsolete systems to which @command{gawk} was once ported
-are no longer supported and the code for those systems
-has been removed.)
+and OpenVMS.@footnote{Some other, obsolete systems to which @command{gawk}
+was once ported are no longer supported and the code for those systems
+has been removed.}
@menu
* History:: The history of @command{gawk} and
@@ -1483,7 +1489,7 @@ All appear in the index, under the heading ``sidebar.''
Most of the time, the examples use complete @command{awk} programs.
Some of the more advanced sections show only the part of the @command{awk}
-program that illustrates the concept currently being described.
+program that illustrates the concept being described.
While this @value{DOCUMENT} is aimed principally at people who have not been
exposed
@@ -1541,9 +1547,9 @@ sorting arrays in @command{gawk}. It also describes how @command{gawk}
provides arrays of arrays.
@ref{Functions},
-describes the built-in functions @command{awk} and
-@command{gawk} provide, as well as how to define
-your own functions.
+describes the built-in functions @command{awk} and @command{gawk} provide,
+as well as how to define your own functions. It also discusses how
+@command{gawk} lets you call functions indirectly.
Part II shows how to use @command{awk} and @command{gawk} for problem solving.
There is lots of code here for you to read and learn from.
@@ -1616,9 +1622,10 @@ printed edition. You may find them online, as follows:
@uref{http://www.gnu.org/software/gawk/manual/html_node/Notes.html,
The appendix on implementation notes}
-describes how to disable @command{gawk}'s extensions, as
-well as how to contribute new code to @command{gawk},
-and some possible future directions for @command{gawk} development.
+describes how to disable @command{gawk}'s extensions, how to contribute
+new code to @command{gawk}, where to find information on some possible
+future directions for @command{gawk} development, and the design decisions
+behind the extension API.
@uref{http://www.gnu.org/software/gawk/manual/html_node/Basic-Concepts.html,
The appendix on basic concepts}
@@ -1636,7 +1643,7 @@ The GNU FDL}
is the license that covers this @value{DOCUMENT}.
Some of the chapters have exercise sections; these have also been
-omitted from the print edition.
+omitted from the print edition but are available online.
@end ifset
@ifclear FOR_PRINT
@@ -1859,7 +1866,7 @@ The FSF published the first two editions under
the title @cite{The GNU Awk User's Guide}.
@ifset FOR_PRINT
SSC published two editions of the @value{DOCUMENT} under the
-title @cite{Effective awk Programming}, and in O'Reilly published
+title @cite{Effective awk Programming}, and O'Reilly published
the third edition in 2001.
@end ifset
@@ -1891,7 +1898,7 @@ for information on submitting problem reports electronically.
@unnumberedsec How to Stay Current
It may be you have a version of @command{gawk} which is newer than the
-one described in this @value{DOCUMENT}. To find out what has changed,
+one described here. To find out what has changed,
you should first look at the @file{NEWS} file in the @command{gawk}
distribution, which provides a high level summary of what changed in
each release.
@@ -2113,7 +2120,7 @@ take advantage of those opportunities.
Arnold Robbins @*
Nof Ayalon @*
ISRAEL @*
-May, 2014
+December, 2014
@end iftex
@ifnotinfo
@@ -2332,7 +2339,7 @@ to keep you from worrying about the complexities of computer
programming:
@example
-$ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"}
+$ @kbd{awk 'BEGIN @{ print "Don\47t Panic!" @}'}
@print{} Don't Panic!
@end example
@@ -2340,11 +2347,11 @@ $ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"}
reading any input. If there are no other statements in your program,
as is the case here, @command{awk} just stops, instead of trying to read
input it doesn't know how to process.
-The @samp{\47} is a magic way of getting a single quote into
+The @samp{\47} is a magic way (explained later) of getting a single quote into
the program, without having to engage in ugly shell quoting tricks.
@quotation NOTE
-As a side note, if you use Bash as your shell, you should execute the
+If you use Bash as your shell, you should execute the
command @samp{set +H} before running this program interactively, to
disable the C shell-style command history, which treats @samp{!} as a
special character. We recommend putting this command into your personal
@@ -2374,7 +2381,7 @@ $ @kbd{awk '@{ print @}'}
@cindex @command{awk} programs, running
@cindex @command{awk} programs, lengthy
@cindex files, @command{awk} programs in
-Sometimes your @command{awk} programs can be very long. In this case, it is
+Sometimes @command{awk} programs are very long. In these cases, it is
more convenient to put the program into a separate file. In order to tell
@command{awk} to use that file for its program, you type:
@@ -2404,7 +2411,7 @@ awk -f advice
does the same thing as this one:
@example
-awk "BEGIN @{ print \"Don't Panic!\" @}"
+awk 'BEGIN @{ print "Don\47t Panic!" @}'
@end example
@cindex quoting in @command{gawk} command lines
@@ -2416,6 +2423,8 @@ specify with @option{-f}, because most @value{FN}s don't contain any of the shel
special characters. Notice that in @file{advice}, the @command{awk}
program did not have single quotes around it. The quotes are only needed
for programs that are provided on the @command{awk} command line.
+(Also, placing the program in a file allows us to use a literal single quote in the program
+text, instead of the magic @samp{\47}.)
@c STARTOFRANGE sq1x
@cindex single quote (@code{'}) in @command{gawk} command lines
@@ -2474,7 +2483,7 @@ written in @command{awk}.
according to the instructions in your program. (This is different
from a @dfn{compiled} language such as C, where your program is first
compiled into machine code that is executed directly by your system's
-hardware.) The @command{awk} utility is thus termed an @dfn{interpreter}.
+processor.) The @command{awk} utility is thus termed an @dfn{interpreter}.
Many modern languages are interperted.
The line beginning with @samp{#!} lists the full @value{FN} of an
@@ -2483,9 +2492,9 @@ to pass to that interpreter. The operating system then runs the
interpreter with the given argument and the full argument list of the
executed program. The first argument in the list is the full @value{FN}
of the @command{awk} program. The rest of the argument list contains
-either options to @command{awk}, or @value{DF}s, or both. Note that on
+either options to @command{awk}, or @value{DF}s, or both. (Note that on
many systems @command{awk} may be found in @file{/usr/bin} instead of
-in @file{/bin}. Caveat Emptor.
+in @file{/bin}.)
Some systems limit the length of the interpreter name to 32 characters.
Often, this can be dealt with by using a symbolic link.
@@ -2663,8 +2672,14 @@ Thus, the example seen
@ifnotinfo
previously
@end ifnotinfo
-in @ref{Read Terminal},
-is applicable:
+in @ref{Read Terminal}:
+
+@example
+awk 'BEGIN @{ print "Don\47t Panic!" @}'
+@end example
+
+@noindent
+could instead be written this way:
@example
$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"}
@@ -2759,6 +2774,9 @@ $ awk -v sq="'" 'BEGIN @{ print "Here is a single quote <" sq ">" @}'
@print{} Here is a single quote <'>
@end example
+(Here, the two string constants and the value of @code{sq} are concatenated
+into a single string which is printed by @code{print}.)
+
If you really need both single and double quotes in your @command{awk}
program, it is probably best to move it into a separate file, where
the shell won't be part of the picture, and you can say what you mean.
@@ -2822,7 +2840,7 @@ The second @value{DF}, called @file{inventory-shipped}, contains
information about monthly shipments. In both files,
each line is considered to be one @dfn{record}.
-In the @value{DF} @file{mail-list}, each record contains the name of a person,
+In @file{mail-list}, each record contains the name of a person,
his/her phone number, his/her email-address, and a code for their relationship
with the author of the list.
The columns are aligned using spaces.
@@ -2982,7 +3000,7 @@ Print the length of the longest line in @file{data}:
@example
expand data | awk '@{ if (x < length($0)) x = length($0) @}
- END @{ print "maximum line length is " x @}'
+ END @{ print "maximum line length is " x @}'
@end example
This example differs slightly from the previous one:
@@ -3014,7 +3032,7 @@ Print the total number of bytes used by @var{files}:
@example
ls -l @var{files} | awk '@{ x += $5 @}
- END @{ print "total bytes: " x @}'
+ END @{ print "total bytes: " x @}'
@end example
@item
@@ -3058,7 +3076,7 @@ the program would print the odd-numbered lines.
@cindex @command{awk} programs
The @command{awk} utility reads the input files one line at a
-time. For each line, @command{awk} tries the patterns of each of the rules.
+time. For each line, @command{awk} tries the patterns of each rule.
If several patterns match, then several actions execute in the order in
which they appear in the @command{awk} program. If no patterns match, then
no actions run.
@@ -3066,7 +3084,7 @@ no actions run.
After processing all the rules that match the line (and perhaps there are none),
@command{awk} reads the next line. (However,
@pxref{Next Statement},
-and also @pxref{Nextfile Statement}).
+and also @pxref{Nextfile Statement}.)
This continues until the program reaches the end of the file.
For example, the following @command{awk} program contains two rules:
@@ -3140,13 +3158,12 @@ the file was last modified. Its output looks like this:
@noindent
@cindex line continuations, with C shell
The first field contains read-write permissions, the second field contains
-the number of links to the file, and the third field identifies the owner of
-the file. The fourth field identifies the group of the file.
-The fifth field contains the size of the file in bytes. The
+the number of links to the file, and the third field identifies the file's owner.
+The fourth field identifies the file's group.
+The fifth field contains the file's size in bytes. The
sixth, seventh, and eighth fields contain the month, day, and time,
respectively, that the file was last modified. Finally, the ninth field
-contains the @value{FN}.@footnote{The @samp{LC_ALL=C} is
-needed to produce this traditional-style output from @command{ls}.}
+contains the @value{FN}.
@c @cindex automatic initialization
@cindex initialization, automatic
@@ -3556,7 +3573,7 @@ more than once, setting another variable each time, like this:
Using @option{-v} to set the values of the built-in
variables may lead to surprising results. @command{awk} will reset the
values of those variables as it needs to, possibly ignoring any
-predefined value you may have given.
+initial value you may have given.
@end quotation
@item -W @var{gawk-opt}
@@ -3639,7 +3656,7 @@ Print the short version of the General Public License and then exit.
@cindex variables, global, printing list of
Print a sorted list of global variables, their types, and final values
to @var{file}. If no @var{file} is provided, print this
-list to the file named @file{awkvars.out} in the current directory.
+list to a file named @file{awkvars.out} in the current directory.
No space is allowed between the @option{-d} and @var{file}, if
@var{file} is supplied.
@@ -3735,7 +3752,7 @@ that @command{gawk} accepts and then exit.
@cindex @option{-i} option
@cindex @option{--include} option
@cindex @command{awk} programs, location of
-Read @command{awk} source library from @var{source-file}. This option
+Read an @command{awk} source library from @var{source-file}. This option
is completely equivalent to using the @code{@@include} directive inside
your program. This option is very similar to the @option{-f} option,
but there are two important differences. First, when @option{-i} is
@@ -3759,7 +3776,7 @@ environment variable. The correct library suffix for your platform will be
supplied by default, so it need not be specified in the extension name.
The extension initialization routine should be named @code{dl_load()}.
An alternative is to use the @code{@@load} keyword inside the program to load
-a shared library. This feature is described in detail in @ref{Dynamic Extensions}.
+a shared library. This advanced feature is described in detail in @ref{Dynamic Extensions}.
@item @option{-L}[@var{value}]
@itemx @option{--lint}[@code{=}@var{value}]
@@ -3808,6 +3825,8 @@ values in input data
@quotation CAUTION
This option can severely break old programs.
Use with care.
+
+This option may disappear in a future version of @command{gawk}.
@end quotation
@item @option{-N}
@@ -3971,6 +3990,7 @@ if they had been concatenated together into one big file. This is
useful for creating libraries of @command{awk} functions. These functions
can be written once and then retrieved from a standard place, instead
of having to be included into each individual program.
+The @option{-i} option is similar in this regard.
(As mentioned in
@ref{Definition Syntax},
function names must be unique.)
@@ -4044,15 +4064,18 @@ Any additional arguments on the command line are normally treated as
input files to be processed in the order specified. However, an
argument that has the form @code{@var{var}=@var{value}}, assigns
the value @var{value} to the variable @var{var}---it does not specify a
-file at all.
-(See
-@ref{Assignment Options}.)
+file at all. (See @ref{Assignment Options}.) In the following example,
+@var{count=1} is a variable assignment, not a @value{FN}:
+
+@example
+awk -f program.awk file1 count=1 file2
+@end example
@cindex @command{gawk}, @code{ARGIND} variable in
@cindex @code{ARGIND} variable, command-line arguments
@cindex @code{ARGV} array, indexing into
@cindex @code{ARGC}/@code{ARGV} variables, command-line arguments
-All these arguments are made available to your @command{awk} program in the
+All the command-line arguments are made available to your @command{awk} program in the
@code{ARGV} array (@pxref{Built-in Variables}). Command-line options
and the program text (if present) are omitted from @code{ARGV}.
All other arguments, including variable assignments, are
@@ -4183,15 +4206,15 @@ separated by colons@footnote{Semicolons on MS-Windows and MS-DOS.}. @command{ga
@samp{.:/usr/local/share/awk}.@footnote{Your version of @command{gawk}
may use a different directory; it
will depend upon how @command{gawk} was built and installed. The actual
-directory is the value of @samp{$(datadir)} generated when
+directory is the value of @code{$(datadir)} generated when
@command{gawk} was configured. You probably don't need to worry about this,
though.}
The search path feature is particularly helpful for building libraries
of useful @command{awk} functions. The library files can be placed in a
standard directory in the default path and then specified on
-the command line with a short @value{FN}. Otherwise, the full @value{FN}
-would have to be typed for each file.
+the command line with a short @value{FN}. Otherwise, you would have to
+type the full @value{FN} for each file.
By using the @option{-i} option, or the @option{-e} and @option{-f} options, your command-line
@command{awk} programs can use facilities in @command{awk} library files
@@ -4200,25 +4223,23 @@ Path searching is not done if @command{gawk} is in compatibility mode.
This is true for both @option{--traditional} and @option{--posix}.
@xref{Options}.
-If the source code is not found after the initial search, the path is searched
+If the source code file is not found after the initial search, the path is searched
again after adding the default @samp{.awk} suffix to the @value{FN}.
-@quotation NOTE
-@c 4/2014:
-@c using @samp{.} to get quotes, since @file{} no longer supplies them.
-To include
-the current directory in the path, either place
-@samp{.} explicitly in the path or write a null entry in the
-path. (A null entry is indicated by starting or ending the path with a
-colon or by placing two colons next to each other [@samp{::}].)
-This path search mechanism is similar
+@command{gawk}'s path search mechanism is similar
to the shell's.
(See @uref{http://www.gnu.org/software/bash/manual/,
-@cite{The Bourne-Again SHell manual}.})
+@cite{The Bourne-Again SHell manual}}.)
+It treats a null entry in the path as indicating the current
+directory.
+(A null entry is indicated by starting or ending the path with a
+colon or by placing two colons next to each other [@samp{::}].)
-However, @command{gawk} always looks in the current directory @emph{before}
-searching @env{AWKPATH}, so there is no real reason to include
-the current directory in the search path.
+@quotation NOTE
+@command{gawk} always looks in the current directory @emph{before}
+searching @env{AWKPATH}. Thus, while you can include the current directory
+in the search path, either explicitly or with a null entry, there is no
+real reason to do so.
@c Prior to 4.0, gawk searched the current directory after the
@c path search, but it's not worth documenting it.
@end quotation
@@ -4259,16 +4280,6 @@ behavior, but they are more specialized. Those in the following
list are meant to be used by regular users.
@table @env
-@item POSIXLY_CORRECT
-Causes @command{gawk} to switch to POSIX compatibility
-mode, disabling all traditional and GNU extensions.
-@xref{Options}.
-
-@item GAWK_SOCK_RETRIES
-Controls the number of times @command{gawk} attempts to
-retry a two-way TCP/IP (socket) connection before giving up.
-@xref{TCP/IP Networking}.
-
@item GAWK_MSEC_SLEEP
Specifies the interval between connection retries,
in milliseconds. On systems that do not support
@@ -4279,6 +4290,16 @@ the value is rounded up to an integral number of seconds.
Specifies the time, in milliseconds, for @command{gawk} to
wait for input before returning with an error.
@xref{Read Timeout}.
+
+@item GAWK_SOCK_RETRIES
+Controls the number of times @command{gawk} attempts to
+retry a two-way TCP/IP (socket) connection before giving up.
+@xref{TCP/IP Networking}.
+
+@item POSIXLY_CORRECT
+Causes @command{gawk} to switch to POSIX compatibility
+mode, disabling all traditional and GNU extensions.
+@xref{Options}.
@end table
The environment variables in the following list are meant
@@ -4293,7 +4314,7 @@ file as the size of the memory buffer to allocate for I/O. Otherwise,
the value should be a number, and @command{gawk} uses that number as
the size of the buffer to allocate. (When this variable is not set,
@command{gawk} uses the smaller of the file's size and the ``default''
-blocksize, which is usually the filesystems I/O blocksize.)
+blocksize, which is usually the filesystem's I/O blocksize.)
@item AWK_HASH
If this variable exists with a value of @samp{gst}, @command{gawk}
@@ -4308,10 +4329,11 @@ for debugging problems on filesystems on non-POSIX operating systems
where I/O is performed in records, not in blocks.
@item GAWK_MSG_SRC
-If this variable exists, @command{gawk} includes the source file
-name and line number from which warning and/or fatal messages
+If this variable exists, @command{gawk} includes the file
+name and line number within the @command{gawk} source code
+from which warning and/or fatal messages
are generated. Its purpose is to help isolate the source of a
-message, since there can be multiple places which produce the
+message, since there are multiple places which produce the
same warning or error message.
@item GAWK_NO_DFA
@@ -4524,6 +4546,7 @@ that requires access to an extension.
@ref{Dynamic Extensions}, describes how to write extensions (in C or C++)
that can be loaded with either @code{@@load} or the @option{-l} option.
+It also describes the @code{ordchr} extension.
@node Obsolete
@section Obsolete Options and/or Features
@@ -4592,15 +4615,15 @@ awk '@{ sum += $1 @} END @{ print sum @}'
@end example
@command{gawk} actually supports this but it is purposely undocumented
-because it is considered bad style. The correct way to write such a program
-is either
+because it is bad style. The correct way to write such a program
+is either:
@example
awk '@{ sum += $1 @} ; END @{ print sum @}'
@end example
@noindent
-or
+or:
@example
awk '@{ sum += $1 @}
@@ -4608,8 +4631,7 @@ awk '@{ sum += $1 @}
@end example
@noindent
-@xref{Statements/Lines}, for a fuller
-explanation.
+@xref{Statements/Lines}, for a fuller explanation.
You can insert newlines after the @samp{;} in @code{for} loops.
This seems to have been a long-undocumented feature in Unix @command{awk}.
@@ -4649,7 +4671,8 @@ affects how @command{awk} processes input.
@item
You can use a single minus sign (@samp{-}) to refer to standard input
-on the command line.
+on the command line. @command{gawk} also lets you use the special
+@value{FN} @file{/dev/stdin}.
@item
@command{gawk} pays attention to a number of environment variables.
@@ -4838,7 +4861,7 @@ such as TAB or newline. While there is nothing to stop you from entering most
unprintable characters directly in a string constant or regexp constant,
they may look ugly.
-The following table lists
+The following list presents
all the escape sequences used in @command{awk} and
what they represent. Unless noted otherwise, all these escape
sequences apply to both string constants and regexp constants:
@@ -4954,13 +4977,13 @@ characters @samp{a+b}.
@cindex @code{\} (backslash), in escape sequences
@cindex portability
For complete portability, do not use a backslash before any character not
-shown in the previous list.
+shown in the previous list and that is not an operator.
To summarize:
@itemize @value{BULLET}
@item
-The escape sequences in the table above are always processed first,
+The escape sequences in the list above are always processed first,
for both string constants and regexp constants. This happens very early,
as soon as @command{awk} reads your program.
@@ -5050,7 +5073,7 @@ are recognized and converted into corresponding real characters as
the very first step in processing regexps.
Here is a list of metacharacters. All characters that are not escape
-sequences and that are not listed in the table stand for themselves:
+sequences and that are not listed in the following stand for themselves:
@c Use @asis so the docbook comes out ok. Sigh.
@table @asis
@@ -5307,7 +5330,7 @@ characters to be matched.
@cindex Extended Regular Expressions (EREs)
@cindex EREs (Extended Regular Expressions)
@cindex @command{egrep} utility
-This treatment of @samp{\} in bracket expressions
+The treatment of @samp{\} in bracket expressions
is compatible with other @command{awk}
implementations and is also mandated by POSIX.
The regular expressions in @command{awk} are a superset
@@ -5424,11 +5447,11 @@ Consider the following:
echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
@end example
-This example uses the @code{sub()} function (which we haven't discussed yet;
-@pxref{String Functions})
-to make a change to the input record. Here, the regexp @code{/a+/}
-indicates ``one or more @samp{a} characters,'' and the replacement
-text is @samp{<A>}.
+This example uses the @code{sub()} function to make a change to the input
+record. (@code{sub()} replaces the first instance of any text matched
+by the first argument with the string provided as the second argument;
+@pxref{String Functions}). Here, the regexp @code{/a+/} indicates ``one
+or more @samp{a} characters,'' and the replacement text is @samp{<A>}.
The input contains four @samp{a} characters.
@command{awk} (and POSIX) regular expressions always match
@@ -5539,7 +5562,7 @@ intend a regexp match.
@cindex regular expressions, dynamic, with embedded newlines
@cindex newlines, in dynamic regexps
-Some versions of @command{awk} do not allow the newline
+Some older versions of @command{awk} do not allow the newline
character to be used inside a bracket expression for a dynamic regexp:
@example
@@ -5548,7 +5571,7 @@ $ @kbd{awk '$0 ~ "[ \t\n]"'}
@error{} ]...
@error{} source line number 1
@error{} context is
-@error{} >>> <<<
+@error{} $0 ~ "[ >>> \t\n]" <<<
@end example
@cindex newlines, in regexp constants
@@ -5871,11 +5894,6 @@ Within bracket expressions, POSIX character classes let you specify
certain groups of characters in a locale-independent fashion.
@item
-@command{gawk}'s @code{IGNORECASE} variable lets you control the
-case sensitivity of regexp matching. In other @command{awk}
-versions, use @code{tolower()} or @code{toupper()}.
-
-@item
Regular expressions match the leftmost longest text in the string being
matched. This matters for cases where you need to know the extent of
the match, such as for text substitution and when the record separator
@@ -5885,6 +5903,11 @@ is a regexp.
Matching expressions may use dynamic regexps, that is, string values
treated as regular expressions.
+@item
+@command{gawk}'s @code{IGNORECASE} variable lets you control the
+case sensitivity of regexp matching. In other @command{awk}
+versions, use @code{tolower()} or @code{toupper()}.
+
@end itemize
@c ENDOFRANGE regexp
@@ -5952,7 +5975,7 @@ used with it do not have to be named on the @command{awk} command line
@command{awk} divides the input for your program into records and fields.
It keeps track of the number of records that have been read so far from
the current input file. This value is stored in a built-in variable
-called @code{FNR} which is reset to zero when a new file is started.
+called @code{FNR} which is reset to zero every time a new file is started.
Another built-in variable, @code{NR}, records the total number of input
records read so far from all @value{DF}s. It starts at zero, but is
never automatically reset to zero.
@@ -6082,7 +6105,8 @@ Using an unusual character such as @samp{/} is more likely to
produce correct behavior in the majority of cases, but there
are no guarantees. The moral is: Know Your Data.
-There is one unusual case, that occurs when @command{gawk} is
+When using regular characters as the record separator,
+there is one unusual case that occurs when @command{gawk} is
being fully POSIX-compliant (@pxref{Options}).
Then, the following (extreme) pipeline prints a surprising @samp{1}:
@@ -6171,7 +6195,7 @@ $ @kbd{echo record 1 AAAA record 2 BBBB record 3 |}
@noindent
The square brackets delineate the contents of @code{RT}, letting you
-see the leading and trailing whitespace. The final value of @code{RT}
+see the leading and trailing whitespace. The final value of
@code{RT} is a newline.
@xref{Simple Sed}, for a more useful example
of @code{RS} as a regexp and @code{RT}.
@@ -6190,7 +6214,7 @@ metacharacters match the beginning and end of a @emph{string}, and not
the beginning and end of a @emph{line}. As a result, something like
@samp{RS = "^[[:upper:]]"} can only match at the beginning of a file.
This is because @command{gawk} views the input file as one long string
-that happens to contain newline characters in it.
+that happens to contain newline characters.
It is thus best to avoid anchor characters in the value of @code{RS}.
@end quotation
@@ -6200,7 +6224,7 @@ variable are @command{gawk} extensions; they are not available in
compatibility mode
(@pxref{Options}).
In compatibility mode, only the first character of the value of
-@code{RS} is used to determine the end of the record.
+@code{RS} determines the end of the record.
@sidebar @code{RS = "\0"} Is Not Portable
@cindex portability, data files as single record
@@ -6236,10 +6260,11 @@ about.} store strings internally as C-style strings. C strings use the
It happens that recent versions of @command{mawk} can use the @value{NUL}
character as a record separator. However, this is a special case:
@command{mawk} does not allow embedded @value{NUL} characters in strings.
+(This may change in a future version of @command{mawk}.)
@cindex records, treating files as
@cindex treating files, as single records
-@xref{Readfile Function}, for an interesting, portable way to read
+@xref{Readfile Function}, for an interesting way to read
whole files. If you are using @command{gawk}, see @ref{Extension Sample
Readfile}, for another option.
@end sidebar
@@ -6320,15 +6345,11 @@ $ @kbd{awk '$1 ~ /li/ @{ print $0 @}' mail-list}
@noindent
This example prints each record in the file @file{mail-list} whose first
-field contains the string @samp{li}. The operator @samp{~} is called a
-@dfn{matching operator}
-(@pxref{Regexp Usage});
-it tests whether a string (here, the field @code{$1}) matches a given regular
-expression.
+field contains the string @samp{li}.
-By contrast, the following example
-looks for @samp{li} in @emph{the entire record} and prints the first
-field and the last field for each matching input record:
+By contrast, the following example looks for @samp{li} in @emph{the
+entire record} and prints the first and last fields for each matching
+input record:
@example
$ @kbd{awk '/li/ @{ print $1, $NF @}' mail-list}
@@ -6451,8 +6472,8 @@ It is also possible to also assign contents to fields that are out
of range. For example:
@example
-$ awk '@{ $6 = ($5 + $4 + $3 + $2)
-> print $6 @}' inventory-shipped
+$ @kbd{awk '@{ $6 = ($5 + $4 + $3 + $2)}
+> @kbd{ print $6 @}' inventory-shipped}
@print{} 168
@print{} 297
@print{} 301
@@ -6541,7 +6562,7 @@ Here is an example:
@example
$ echo a b c d e f | awk '@{ print "NF =", NF;
-> NF = 3; print $0 @}'
+> NF = 3; print $0 @}'
@print{} NF = 6
@print{} a b c
@end example
@@ -6549,7 +6570,7 @@ $ echo a b c d e f | awk '@{ print "NF =", NF;
@cindex portability, @code{NF} variable@comma{} decrementing
@quotation CAUTION
Some versions of @command{awk} don't
-rebuild @code{$0} when @code{NF} is decremented. Caveat emptor.
+rebuild @code{$0} when @code{NF} is decremented.
@end quotation
Finally, there are times when it is convenient to force
@@ -6580,7 +6601,7 @@ record, exactly as it was read from the input. This includes
any leading or trailing whitespace, and the exact whitespace (or other
characters) that separate the fields.
-It is a not-uncommon error to try to change the field separators
+It is a common error to try to change the field separators
in a record simply by setting @code{FS} and @code{OFS}, and then
expecting a plain @samp{print} or @samp{print $0} to print the
modified record.
@@ -6783,9 +6804,10 @@ $ @kbd{echo ' a b c d' | awk '@{ print; $2 = $2; print @}'}
The first @code{print} statement prints the record as it was read,
with leading whitespace intact. The assignment to @code{$2} rebuilds
@code{$0} by concatenating @code{$1} through @code{$NF} together,
-separated by the value of @code{OFS}. Because the leading whitespace
-was ignored when finding @code{$1}, it is not part of the new @code{$0}.
-Finally, the last @code{print} statement prints the new @code{$0}.
+separated by the value of @code{OFS} (which is a space by default).
+Because the leading whitespace was ignored when finding @code{$1},
+it is not part of the new @code{$0}. Finally, the last @code{print}
+statement prints the new @code{$0}.
@cindex @code{FS}, containing @code{^}
@cindex @code{^} (caret), in @code{FS}
@@ -6807,7 +6829,7 @@ also works this way. For example:
@example
$ @kbd{echo 'xxAA xxBxx C' |}
> @kbd{gawk -F '(^x+)|( +)' '@{ for (i = 1; i <= NF; i++)}
-> @kbd{printf "-->%s<--\n", $i @}'}
+> @kbd{ printf "-->%s<--\n", $i @}'}
@print{} --><--
@print{} -->AA<--
@print{} -->xxBxx<--
@@ -6870,12 +6892,7 @@ awk -F, '@var{program}' @var{input-files}
@noindent
sets @code{FS} to the @samp{,} character. Notice that the option uses
an uppercase @samp{F} instead of a lowercase @samp{f}. The latter
-option (@option{-f}) specifies a file
-containing an @command{awk} program. Case is significant in command-line
-options:
-the @option{-F} and @option{-f} options have nothing to do with each other.
-You can use both options at the same time to set the @code{FS} variable
-@emph{and} get an @command{awk} program from a file.
+option (@option{-f}) specifies a file containing an @command{awk} program.
The value used for the argument to @option{-F} is processed in exactly the
same way as assignments to the built-in variable @code{FS}.
@@ -6989,7 +7006,7 @@ to @code{FS} (the backslash is stripped). This creates a regexp meaning
If instead you want fields to be separated by a literal period followed
by any single character, use @samp{FS = "\\.."}.
-The following table summarizes how fields are split, based on the value
+The following list summarizes how fields are split, based on the value
of @code{FS} (@samp{==} means ``is equal to''):
@table @code
@@ -7010,8 +7027,7 @@ Leading and trailing matches of @var{regexp} delimit empty fields.
@item FS == ""
Each individual character in the record becomes a separate field.
-(This is a @command{gawk} extension; it is not specified by the
-POSIX standard.)
+(This is a common extension; it is not specified by the POSIX standard.)
@end table
@sidebar Changing @code{FS} Does Not Affect the Fields
@@ -7463,7 +7479,7 @@ BEGIN @{ RS = "" ; FS = "\n" @}
Running the program produces the following output:
@example
-$ awk -f addrs.awk addresses
+$ @kbd{awk -f addrs.awk addresses}
@print{} Name is: Jane Doe
@print{} Address is: 123 Main Street
@print{} City and State are: Anywhere, SE 12345-6789
@@ -7475,12 +7491,9 @@ $ awk -f addrs.awk addresses
@dots{}
@end example
-@xref{Labels Program}, for a more realistic
-program that deals with address lists.
-The following
-table
-summarizes how records are split, based on the
-value of
+@xref{Labels Program}, for a more realistic program that deals with
+address lists. The following list summarizes how records are split,
+based on the value of
@ifinfo
@code{RS}.
(@samp{==} means ``is equal to.'')
@@ -7515,8 +7528,8 @@ POSIX standard.)
@cindex @command{gawk}, @code{RT} variable in
@cindex @code{RT} variable
-In all cases, @command{gawk} sets @code{RT} to the input text that matched the
-value specified by @code{RS}.
+If not in compatibility mode (@pxref{Options}), @command{gawk} sets
+@code{RT} to the input text that matched the value specified by @code{RS}.
But if the input file ended without any text that matches @code{RS},
then @command{gawk} sets @code{RT} to the null string.
@c ENDOFRANGE recm
@@ -7614,9 +7627,7 @@ processing on the next record @emph{right now}. For example:
while (j == 0) @{
# get more text
if (getline <= 0) @{
- m = "unexpected EOF or error"
- m = (m ": " ERRNO)
- print m > "/dev/stderr"
+ print("unexpected EOF or error:", ERRNO) > "/dev/stderr"
exit
@}
# build up the line using string concatenation
@@ -7885,7 +7896,7 @@ bletch
@end example
@noindent
-Notice that this program ran the command @command{who} and printed the previous result.
+Notice that this program ran the command @command{who} and printed the result.
(If you try this program yourself, you will of course get different results,
depending upon who is logged in on your system.)
@@ -7910,7 +7921,7 @@ Unfortunately, @command{gawk} has not been consistent in its treatment
of a construct like @samp{@w{"echo "} "date" | getline}.
Most versions, including the current version, treat it at as
@samp{@w{("echo "} "date") | getline}.
-(This how BWK @command{awk} behaves.)
+(This is also how BWK @command{awk} behaves.)
Some versions changed and treated it as
@samp{@w{"echo "} ("date" | getline)}.
(This is how @command{mawk} behaves.)
@@ -7938,7 +7949,7 @@ BEGIN @{
@end example
In this version of @code{getline}, none of the built-in variables are
-changed and the record is not split into fields.
+changed and the record is not split into fields. However, @code{RT} is set.
@ifinfo
@c Thanks to Paul Eggert for initial wording here
@@ -8046,7 +8057,7 @@ causes @command{awk} to set the value of @code{FILENAME}. Normally,
@code{FILENAME} does not have a value inside @code{BEGIN} rules, because you
have not yet started to process the command-line @value{DF}s.
@value{DARKCORNER}
-(@xref{BEGIN/END},
+(See @ref{BEGIN/END};
also @pxref{Auto-set}.)
@item
@@ -8093,7 +8104,7 @@ end of file is encountered, before the element in @code{a} is assigned?
@command{gawk} treats @code{getline} like a function call, and evaluates
the expression @samp{a[++c]} before attempting to read from @file{f}.
However, some versions of @command{awk} only evaluate the expression once they
-know that there is a string value to be assigned. Caveat Emptor.
+know that there is a string value to be assigned.
@end itemize
@node Getline Summary
@@ -8109,15 +8120,15 @@ Note: for each variant, @command{gawk} sets the @code{RT} built-in variable.
@float Table,table-getline-variants
@caption{@code{getline} Variants and What They Set}
@multitable @columnfractions .33 .38 .27
-@headitem Variant @tab Effect @tab Standard / Extension
-@item @code{getline} @tab Sets @code{$0}, @code{NF}, @code{FNR}, @code{NR}, and @code{RT} @tab Standard
-@item @code{getline} @var{var} @tab Sets @var{var}, @code{FNR}, @code{NR}, and @code{RT} @tab Standard
-@item @code{getline <} @var{file} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab Standard
-@item @code{getline @var{var} < @var{file}} @tab Sets @var{var} and @code{RT} @tab Standard
-@item @var{command} @code{| getline} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab Standard
-@item @var{command} @code{| getline} @var{var} @tab Sets @var{var} and @code{RT} @tab Standard
-@item @var{command} @code{|& getline} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab Extension
-@item @var{command} @code{|& getline} @var{var} @tab Sets @var{var} and @code{RT} @tab Extension
+@headitem Variant @tab Effect @tab @command{awk} / @command{gawk}
+@item @code{getline} @tab Sets @code{$0}, @code{NF}, @code{FNR}, @code{NR}, and @code{RT} @tab @command{awk}
+@item @code{getline} @var{var} @tab Sets @var{var}, @code{FNR}, @code{NR}, and @code{RT} @tab @command{awk}
+@item @code{getline <} @var{file} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab @command{awk}
+@item @code{getline @var{var} < @var{file}} @tab Sets @var{var} and @code{RT} @tab @command{awk}
+@item @var{command} @code{| getline} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab @command{awk}
+@item @var{command} @code{| getline} @var{var} @tab Sets @var{var} and @code{RT} @tab @command{awk}
+@item @var{command} @code{|& getline} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab @command{gawk}
+@item @var{command} @code{|& getline} @var{var} @tab Sets @var{var} and @code{RT} @tab @command{gawk}
@end multitable
@end float
@c ENDOFRANGE getl
@@ -8134,7 +8145,7 @@ This @value{SECTION} describes a feature that is specific to @command{gawk}.
You may specify a timeout in milliseconds for reading input from the keyboard,
a pipe, or two-way communication, including TCP/IP sockets. This can be done
on a per input, command or connection basis, by setting a special element
-in the @code{PROCINFO} (@pxref{Auto-set}) array:
+in the @code{PROCINFO} array (@pxref{Auto-set}):
@example
PROCINFO["input_name", "READ_TIMEOUT"] = @var{timeout in milliseconds}
@@ -8166,7 +8177,7 @@ while ((getline < "/dev/stdin") > 0)
@command{gawk} terminates the read operation if input does not
arrive after waiting for the timeout period, returns failure
-and sets the @code{ERRNO} variable to an appropriate string value.
+and sets @code{ERRNO} to an appropriate string value.
A negative or zero value for the timeout is the same as specifying
no timeout at all.
@@ -8273,6 +8284,10 @@ The possibilities are as follows:
@end multitable
@item
+@code{FNR} indicates how many records have been read from the current input file;
+@code{NR} indicates how many records have been read in total.
+
+@item
@command{gawk} sets @code{RT} to the text matched by @code{RS}.
@item
@@ -8283,7 +8298,7 @@ fields there are. The default way to split fields is between whitespace
characters.
@item
-Fields may be referenced using a variable, as in @samp{$NF}. Fields
+Fields may be referenced using a variable, as in @code{$NF}. Fields
may also be assigned values, which causes the value of @code{$0} to be
recomputed when it is later referenced. Assigning to a field with a number
greater than @code{NF} creates the field and rebuilds the record, using
@@ -8293,16 +8308,17 @@ thing. Decrementing @code{NF} throws away fields and rebuilds the record.
@item
Field splitting is more complicated than record splitting.
-@multitable @columnfractions .40 .40 .20
+@multitable @columnfractions .40 .45 .15
@headitem Field separator value @tab Fields are split @dots{} @tab @command{awk} / @command{gawk}
@item @code{FS == " "} @tab On runs of whitespace @tab @command{awk}
@item @code{FS == @var{any single character}} @tab On that character @tab @command{awk}
@item @code{FS == @var{regexp}} @tab On text matching the regexp @tab @command{awk}
@item @code{FS == ""} @tab Each individual character is a separate field @tab @command{gawk}
@item @code{FIELDWIDTHS == @var{list of columns}} @tab Based on character position @tab @command{gawk}
-@item @code{FPAT == @var{regexp}} @tab On text around text matching the regexp @tab @command{gawk}
+@item @code{FPAT == @var{regexp}} @tab On the text surrounding text matching the regexp @tab @command{gawk}
@end multitable
+@item
Using @samp{FS = "\n"} causes the entire record to be a single field
(assuming that newlines separate records).
@@ -8311,11 +8327,11 @@ Using @samp{FS = "\n"} causes the entire record to be a single field
This can also be done using command-line variable assignment.
@item
-@code{PROCINFO["FS"]} can be used to see how fields are being split.
+Use @code{PROCINFO["FS"]} to see how fields are being split.
@item
Use @code{getline} in its various forms to read additional records,
-from the default input stream, from a file, or from a pipe or co-process.
+from the default input stream, from a file, or from a pipe or coprocess.
@item
Use @code{PROCINFO[@var{file}, "READ_TIMEOUT"]} to cause reads to timeout
@@ -8384,6 +8400,7 @@ and discusses the @code{close()} built-in function.
* Printf:: The @code{printf} statement.
* Redirection:: How to redirect output to multiple files and
pipes.
+* Special FD:: Special files for I/O.
* Special Files:: File name interpretation in @command{gawk}.
@command{gawk} allows access to inherited file
descriptors.
@@ -8395,7 +8412,7 @@ and discusses the @code{close()} built-in function.
@node Print
@section The @code{print} Statement
-The @code{print} statement is used for producing output with simple, standardized
+Use the @code{print} statement to produce output with simple, standardized
formatting. You specify only the strings or numbers to print, in a
list separated by commas. They are output, separated by single spaces,
followed by a newline. The statement looks like this:
@@ -8419,7 +8436,7 @@ expression. Numeric values are converted to strings and then printed.
@cindex text, printing
The simple statement @samp{print} with no items is equivalent to
@samp{print $0}: it prints the entire current record. To print a blank
-line, use @samp{print ""}, where @code{""} is the empty string.
+line, use @samp{print ""}.
To print a fixed piece of text, use a string constant, such as
@w{@code{"Don't Panic"}}, as one item. If you forget to use the
double-quote characters, your text is taken as an @command{awk}
@@ -8427,8 +8444,8 @@ expression, and you will probably get an error. Keep in mind that a
space is printed between any two items.
Note that the @code{print} statement is a statement and not an
-expression---you can't use it the pattern part of a pattern-action
-statement, for example.
+expression---you can't use it in the pattern part of a
+@var{pattern}-@var{action} statement, for example.
@node Print Examples
@section @code{print} Statement Examples
@@ -8439,9 +8456,22 @@ newline, the newline is output along with the rest of the string. A
single @code{print} statement can make any number of lines this way.
@cindex newlines, printing
-The following is an example of printing a string that contains embedded newlines
+The following is an example of printing a string that contains embedded
+@ifinfo
+newlines
(the @samp{\n} is an escape sequence, used to represent the newline
character; @pxref{Escape Sequences}):
+@end ifinfo
+@ifhtml
+newlines
+(the @samp{\n} is an escape sequence, used to represent the newline
+character; @pxref{Escape Sequences}):
+@end ifhtml
+@ifnotinfo
+@ifnothtml
+newlines:
+@end ifnothtml
+@end ifnotinfo
@example
$ @kbd{awk 'BEGIN @{ print "line one\nline two\nline three" @}'}
@@ -8621,13 +8651,13 @@ more fully in
@cindexawkfunc{sprintf}
@cindex @code{OFMT} variable
@cindex output, format specifier@comma{} @code{OFMT}
-The built-in variable @code{OFMT} contains the default format specification
+The built-in variable @code{OFMT} contains the format specification
that @code{print} uses with @code{sprintf()} when it wants to convert a
number to a string for printing.
The default value of @code{OFMT} is @code{"%.6g"}.
The way @code{print} prints numbers can be changed
-by supplying different format specifications
-as the value of @code{OFMT}, as shown in the following example:
+by supplying a different format specification
+for the value of @code{OFMT}, as shown in the following example:
@example
$ @kbd{awk 'BEGIN @{}
@@ -8657,9 +8687,7 @@ With @code{printf} you can
specify the width to use for each item, as well as various
formatting choices for numbers (such as what output base to use, whether to
print an exponent, whether to print a sign, and how many digits to print
-after the decimal point). You do this by supplying a string, called
-the @dfn{format string}, that controls how and where to print the other
-arguments.
+after the decimal point).
@menu
* Basic Printf:: Syntax of the @code{printf} statement.
@@ -8679,10 +8707,10 @@ printf @var{format}, @var{item1}, @var{item2}, @dots{}
@end example
@noindent
-The entire list of arguments may optionally be enclosed in parentheses. The
-parentheses are necessary if any of the item expressions use the @samp{>}
-relational operator; otherwise, it can be confused with an output redirection
-(@pxref{Redirection}).
+As print @code{print}, the entire list of arguments may optionally be
+enclosed in parentheses. Here too, the parentheses are necessary if any
+of the item expressions use the @samp{>} relational operator; otherwise,
+it can be confused with an output redirection (@pxref{Redirection}).
@cindex format specifiers
The difference between @code{printf} and @code{print} is the @var{format}
@@ -8705,10 +8733,10 @@ on @code{printf} statements. For example:
@example
$ @kbd{awk 'BEGIN @{}
> @kbd{ORS = "\nOUCH!\n"; OFS = "+"}
-> @kbd{msg = "Dont Panic!"}
+> @kbd{msg = "Don\47t Panic!"}
> @kbd{printf "%s\n", msg}
> @kbd{@}'}
-@print{} Dont Panic!
+@print{} Don't Panic!
@end example
@noindent
@@ -8730,7 +8758,7 @@ the field width. Here is a list of the format-control letters:
@c @asis for docbook to come out right
@table @asis
@item @code{%c}
-Print a number as an ASCII character; thus, @samp{printf "%c",
+Print a number as a character; thus, @samp{printf "%c",
65} outputs the letter @samp{A}. The output for a string value is
the first character of the string.
@@ -8756,7 +8784,7 @@ a single byte (0--255).
@item @code{%d}, @code{%i}
Print a decimal integer.
The two control letters are equivalent.
-(The @samp{%i} specification is for compatibility with ISO C.)
+(The @code{%i} specification is for compatibility with ISO C.)
@item @code{%e}, @code{%E}
Print a number in scientific (exponential) notation;
@@ -8771,7 +8799,7 @@ prints @samp{1.950e+03}, with a total of four significant figures, three of
which follow the decimal point.
(The @samp{4.3} represents two modifiers,
discussed in the next @value{SUBSECTION}.)
-@samp{%E} uses @samp{E} instead of @samp{e} in the output.
+@code{%E} uses @samp{E} instead of @samp{e} in the output.
@item @code{%f}
Print a number in floating-point notation.
@@ -8797,16 +8825,16 @@ The special ``not a number'' value formats as @samp{-nan} or @samp{nan}
(@pxref{Math Definitions}).
@item @code{%F}
-Like @samp{%f} but the infinity and ``not a number'' values are spelled
+Like @code{%f} but the infinity and ``not a number'' values are spelled
using uppercase letters.
-The @samp{%F} format is a POSIX extension to ISO C; not all systems
-support it. On those that don't, @command{gawk} uses @samp{%f} instead.
+The @code{%F} format is a POSIX extension to ISO C; not all systems
+support it. On those that don't, @command{gawk} uses @code{%f} instead.
@item @code{%g}, @code{%G}
Print a number in either scientific notation or in floating-point
notation, whichever uses fewer characters; if the result is printed in
-scientific notation, @samp{%G} uses @samp{E} instead of @samp{e}.
+scientific notation, @code{%G} uses @samp{E} instead of @samp{e}.
@item @code{%o}
Print an unsigned octal integer
@@ -8822,7 +8850,7 @@ are floating-point; it is provided primarily for compatibility with C.)
@item @code{%x}, @code{%X}
Print an unsigned hexadecimal integer;
-@samp{%X} uses the letters @samp{A} through @samp{F}
+@code{%X} uses the letters @samp{A} through @samp{F}
instead of @samp{a} through @samp{f}
(@pxref{Nondecimal-numbers}).
@@ -8837,7 +8865,7 @@ argument and it ignores any modifiers.
@quotation NOTE
When using the integer format-control letters for values that are
outside the range of the widest C integer type, @command{gawk} switches to
-the @samp{%g} format specifier. If @option{--lint} is provided on the
+the @code{%g} format specifier. If @option{--lint} is provided on the
command line (@pxref{Options}), @command{gawk}
warns about this. Other versions of @command{awk} may print invalid
values or do something else entirely.
@@ -8853,7 +8881,7 @@ values or do something else entirely.
A format specification can also include @dfn{modifiers} that can control
how much of the item's value is printed, as well as how much space it gets.
The modifiers come between the @samp{%} and the format-control letter.
-We will use the bullet symbol ``@bullet{}'' in the following examples to
+We use the bullet symbol ``@bullet{}'' in the following examples to
represent
spaces in the output. Here are the possible modifiers, in the order in
which they may appear:
@@ -8884,7 +8912,7 @@ It is in fact a @command{gawk} extension, intended for use in translating
messages at runtime.
@xref{Printf Ordering},
which describes how and why to use positional specifiers.
-For now, we will not use them.
+For now, we ignore them.
@item -
The minus sign, used before the width modifier (see later on in
@@ -8912,15 +8940,15 @@ to format is positive. The @samp{+} overrides the space modifier.
@item #
Use an ``alternate form'' for certain control letters.
-For @samp{%o}, supply a leading zero.
-For @samp{%x} and @samp{%X}, supply a leading @samp{0x} or @samp{0X} for
+For @code{%o}, supply a leading zero.
+For @code{%x} and @code{%X}, supply a leading @code{0x} or @samp{0X} for
a nonzero result.
-For @samp{%e}, @samp{%E}, @samp{%f}, and @samp{%F}, the result always
+For @code{%e}, @code{%E}, @code{%f}, and @code{%F}, the result always
contains a decimal point.
-For @samp{%g} and @samp{%G}, trailing zeros are not removed from the result.
+For @code{%g} and @code{%G}, trailing zeros are not removed from the result.
@item 0
-A leading @samp{0} (zero) acts as a flag that indicates that output should be
+A leading @samp{0} (zero) acts as a flag indicating that output should be
padded with zeros instead of spaces.
This applies only to the numeric output formats.
This flag only has an effect when the field width is wider than the
@@ -9106,7 +9134,7 @@ the @command{awk} program:
@example
awk 'BEGIN @{ print "Name Number"
print "---- ------" @}
- @{ printf "%-10s %s\n", $1, $2 @}' mail-list
+ @{ printf "%-10s %s\n", $1, $2 @}' mail-list
@end example
The above example mixes @code{print} and @code{printf} statements in
@@ -9116,7 +9144,7 @@ same results:
@example
awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number"
printf "%-10s %s\n", "----", "------" @}
- @{ printf "%-10s %s\n", $1, $2 @}' mail-list
+ @{ printf "%-10s %s\n", $1, $2 @}' mail-list
@end example
@noindent
@@ -9131,7 +9159,7 @@ emphasized by storing it in a variable, like this:
awk 'BEGIN @{ format = "%-10s %s\n"
printf format, "Name", "Number"
printf format, "----", "------" @}
- @{ printf format, $1, $2 @}' mail-list
+ @{ printf format, $1, $2 @}' mail-list
@end example
@c ENDOFRANGE printfs
@@ -9152,7 +9180,7 @@ This is called @dfn{redirection}.
@quotation NOTE
When @option{--sandbox} is specified (@pxref{Options}),
-redirecting output to files and pipes is disabled.
+redirecting output to files, pipes and coprocesses is disabled.
@end quotation
A redirection appears after the @code{print} or @code{printf} statement.
@@ -9249,17 +9277,11 @@ in an @command{awk} script run periodically for system maintenance:
@example
report = "mail bug-system"
-print "Awk script failed:", $0 | report
-m = ("at record number " FNR " of " FILENAME)
-print m | report
+print("Awk script failed:", $0) | report
+print("at record number", FNR, "of", FILENAME) | report
close(report)
@end example
-The message is built using string concatenation and saved in the variable
-@code{m}. It's then sent down the pipeline to the @command{mail} program.
-(The parentheses group the items to concatenate---see
-@ref{Concatenation}.)
-
The @code{close()} function is called here because it's a good idea to close
the pipe as soon as all the intended output has been sent to it.
@xref{Close Files And Pipes},
@@ -9364,23 +9386,8 @@ It then sends the list to the shell for execution.
@c ENDOFRANGE outre
@c ENDOFRANGE reout
-@node Special Files
-@section Special @value{FFN}s in @command{gawk}
-@c STARTOFRANGE gfn
-@cindex @command{gawk}, file names in
-
-@command{gawk} provides a number of special @value{FN}s that it interprets
-internally. These @value{FN}s provide access to standard file descriptors
-and TCP/IP networking.
-
-@menu
-* Special FD:: Special files for I/O.
-* Special Network:: Special files for network communications.
-* Special Caveats:: Things to watch out for.
-@end menu
-
@node Special FD
-@subsection Special Files for Standard Descriptors
+@section Special Files for Standard Pre-Opened Data Streams
@cindex standard input
@cindex input, standard
@cindex standard output
@@ -9391,9 +9398,12 @@ and TCP/IP networking.
@cindex files, descriptors, See file descriptors
Running programs conventionally have three input and output streams
-already available to them for reading and writing. These are known as
-the @dfn{standard input}, @dfn{standard output}, and @dfn{standard error
-output}. These streams are, by default, connected to your keyboard and screen, but
+already available to them for reading and writing. These are known
+as the @dfn{standard input}, @dfn{standard output}, and @dfn{standard
+error output}. These open streams (and any other open file or pipe)
+are often referred to by the technical term @dfn{file descriptors}.
+
+These streams are, by default, connected to your keyboard and screen, but
they are often redirected with the shell, via the @samp{<}, @samp{<<},
@samp{>}, @samp{>>}, @samp{>&}, and @samp{|} operators. Standard error
is typically used for writing error messages; the reason there are two separate
@@ -9402,7 +9412,7 @@ redirected separately.
@cindex differences in @command{awk} and @command{gawk}, error messages
@cindex error handling
-In other implementations of @command{awk}, the only way to write an error
+In traditional implementations of @command{awk}, the only way to write an error
message to standard error in an @command{awk} program is as follows:
@example
@@ -9428,19 +9438,19 @@ that is connected to your keyboard and screen. It represents the
``terminal,''@footnote{The ``tty'' in @file{/dev/tty} stands for
``Teletype,'' a serial terminal.} which on modern systems is a keyboard
and screen, not a serial console.)
-This usually has the same effect but not always: although the
+This generally has the same effect but not always: although the
standard error stream is usually the screen, it can be redirected; when
that happens, writing to the screen is not correct. In fact, if
@command{awk} is run from a background job, it may not have a
terminal at all.
Then opening @file{/dev/tty} fails.
-@command{gawk} provides special @value{FN}s for accessing the three standard
-streams. @value{COMMONEXT} It also provides syntax for accessing
-any other inherited open files. If the @value{FN} matches
-one of these special names when @command{gawk} redirects input or output,
-then it directly uses the stream that the @value{FN} stands for.
-These special @value{FN}s work for all operating systems that @command{gawk}
+@command{gawk}, BWK @command{awk} and @command{mawk} provide
+special @value{FN}s for accessing the three standard streams.
+If the @value{FN} matches one of these special names when @command{gawk}
+(or one of the others) redirects input or output, then it directly uses
+the descriptor that the @value{FN} stands for. These special
+@value{FN}s work for all operating systems that @command{gawk}
has been ported to, not just those that are POSIX-compliant:
@cindex common extensions, @code{/dev/stdin} special file
@@ -9462,19 +9472,10 @@ The standard output (file descriptor 1).
@item /dev/stderr
The standard error output (file descriptor 2).
-
-@item /dev/fd/@var{N}
-The file associated with file descriptor @var{N}. Such a file must
-be opened by the program initiating the @command{awk} execution (typically
-the shell). Unless special pains are taken in the shell from which
-@command{gawk} is invoked, only descriptors 0, 1, and 2 are available.
@end table
-The @value{FN}s @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
-are aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and @file{/dev/fd/2},
-respectively. However, they are more self-explanatory.
-The proper way to write an error message in a @command{gawk} program
-is to use @file{/dev/stderr}, like this:
+With these facilities,
+the proper way to write an error message then becomes:
@example
print "Serious error detected!" > "/dev/stderr"
@@ -9486,14 +9487,51 @@ Like any other redirection, the value must be a string.
It is a common error to omit the quotes, which leads
to confusing results.
-Finally, using the @code{close()} function on a @value{FN} of the
+@command{gawk} does not treat these @value{FN}s as special when
+in POSIX compatibility mode. However, since BWK @command{awk}
+supports them, @command{gawk} does support them even when
+invoked with the @option{--traditional} option (@pxref{Options}).
+
+@node Special Files
+@section Special @value{FFN}s in @command{gawk}
+@c STARTOFRANGE gfn
+@cindex @command{gawk}, file names in
+
+Besides access to standard input, stanard output, and standard error,
+@command{gawk} provides access to any open file descriptor.
+Additionally, there are special @value{FN}s reserved for
+TCP/IP networking.
+
+@menu
+* Other Inherited Files:: Accessing other open files with
+ @command{gawk}.
+* Special Network:: Special files for network communications.
+* Special Caveats:: Things to watch out for.
+@end menu
+
+@node Other Inherited Files
+@subsection Accessing Other Open Files With @command{gawk}
+
+Besides the @code{/dev/stdin}, @code{/dev/stdout}, and @code{/dev/stderr}
+special @value{FN}s mentioned earlier, @command{gawk} provides syntax
+for accessing any other inherited open file:
+
+@table @file
+@item /dev/fd/@var{N}
+The file associated with file descriptor @var{N}. Such a file must
+be opened by the program initiating the @command{awk} execution (typically
+the shell). Unless special pains are taken in the shell from which
+@command{gawk} is invoked, only descriptors 0, 1, and 2 are available.
+@end table
+
+The @value{FN}s @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
+are essentially aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and
+@file{/dev/fd/2}, respectively. However, those names are more self-explanatory.
+
+Note that using @code{close()} on a @value{FN} of the
form @code{"/dev/fd/@var{N}"}, for file descriptor numbers
above two, does actually close the given file descriptor.
-The @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
-special files are also recognized internally by several other
-versions of @command{awk}.
-
@node Special Network
@subsection Special Files for Network Communications
@cindex networks, support for
@@ -9522,15 +9560,20 @@ Full discussion is delayed until
@node Special Caveats
@subsection Special @value{FFN} Caveats
-Here is a list of things to bear in mind when using the
+Here are some things to bear in mind when using the
special @value{FN}s that @command{gawk} provides:
@itemize @value{BULLET}
@cindex compatibility mode (@command{gawk}), file names
@cindex file names, in compatibility mode
@item
-Recognition of these special @value{FN}s is disabled if @command{gawk} is in
-compatibility mode (@pxref{Options}).
+Recognition of the @value{FN}s for the three standard pre-opened
+files is disabled only in POSIX mode.
+
+@item
+Recognition of the other special @value{FN}s is disabled if @command{gawk} is in
+compatibility mode (either @option{--traditional} or @option{--posix};
+@pxref{Options}).
@item
@command{gawk} @emph{always}
@@ -9700,7 +9743,8 @@ to a string indicating the error.
Note also that @samp{close(FILENAME)} has no ``magic'' effects on the
implicit loop that reads through the files named on the command line.
It is, more likely, a close of a file that was never opened with a
-redirection, so @command{awk} silently does nothing.
+redirection, so @command{awk} silently does nothing, except return
+a negative value.
@cindex @code{|} (vertical bar), @code{|&} operator (I/O), pipes@comma{} closing
When using the @samp{|&} operator to communicate with a coprocess,
@@ -9712,10 +9756,10 @@ the first argument is the name of the command or special file used
to start the coprocess.
The second argument should be a string, with either of the values
@code{"to"} or @code{"from"}. Case does not matter.
-As this is an advanced feature, a more complete discussion is
+As this is an advanced feature, discussion is
delayed until
@ref{Two-way I/O},
-which discusses it in more detail and gives an example.
+which describes it in more detail and gives an example.
@sidebar Using @code{close()}'s Return Value
@cindex dark corner, @code{close()} function
@@ -9787,15 +9831,15 @@ that modify the behavior of the format control letters.
@item
Output from both @code{print} and @code{printf} may be redirected to
-files, pipes, and co-processes.
+files, pipes, and coprocesses.
@item
@command{gawk} provides special file names for access to standard input,
output and error, and for network communications.
@item
-Use @code{close()} to close open file, pipe and co-process redirections.
-For co-processes, it is possible to close only one direction of the
+Use @code{close()} to close open file, pipe and coprocess redirections.
+For coprocesses, it is possible to close only one direction of the
communications.
@end itemize
@@ -10080,7 +10124,7 @@ if (/barfly/ || /camelot/)
@noindent
are exactly equivalent.
One rather bizarre consequence of this rule is that the following
-Boolean expression is valid, but does not do what the user probably
+Boolean expression is valid, but does not do what its author probably
intended:
@example
@@ -10126,10 +10170,9 @@ Modern implementations of @command{awk}, including @command{gawk}, allow
the third argument of @code{split()} to be a regexp constant, but some
older implementations do not.
@value{DARKCORNER}
-This can lead to confusion when attempting to use regexp constants
-as arguments to user-defined functions
-(@pxref{User-defined}).
-For example:
+Because some built-in functions accept regexp constants as arguments,
+it can be confusing when attempting to use regexp constants as arguments
+to user-defined functions (@pxref{User-defined}). For example:
@example
function mysub(pat, repl, str, global)
@@ -10197,8 +10240,8 @@ variable's current value. Variables are given new values with
@dfn{decrement operators}.
@xref{Assignment Ops}.
In addition, the @code{sub()} and @code{gsub()} functions can
-change a variable's value, and the @code{match()}, @code{patsplit()}
-and @code{split()} functions can change the contents of their
+change a variable's value, and the @code{match()}, @code{split()}
+and @code{patsplit()} functions can change the contents of their
array parameters. @xref{String Functions}.
@cindex variables, built-in
@@ -10214,7 +10257,7 @@ Variables in @command{awk} can be assigned either numeric or string values.
The kind of value a variable holds can change over the life of a program.
By default, variables are initialized to the empty string, which
is zero if converted to a number. There is no need to explicitly
-``initialize'' a variable in @command{awk},
+initialize a variable in @command{awk},
which is what you would do in C and in most other traditional languages.
@node Assignment Options
@@ -10422,7 +10465,7 @@ $ @kbd{echo 4,321 | LC_ALL=en_DK.utf-8 gawk '@{ print $1 + 1 @}'}
@noindent
The @code{en_DK.utf-8} locale is for English in Denmark, where the comma acts as
the decimal point separator. In the normal @code{"C"} locale, @command{gawk}
-treats @samp{4,321} as @samp{4}, while in the Danish locale, it's treated
+treats @samp{4,321} as 4, while in the Danish locale, it's treated
as the full number, 4.321.
Some earlier versions of @command{gawk} fully complied with this aspect
@@ -10974,7 +11017,7 @@ awk '/[=]=/' /dev/null
@end example
@command{gawk} does not have this problem; BWK @command{awk}
-and @command{mawk} also do not (@pxref{Other Versions}).
+and @command{mawk} also do not.
@end sidebar
@c ENDOFRANGE exas
@c ENDOFRANGE opas
@@ -11227,7 +11270,7 @@ attribute.
@item
Fields, @code{getline} input, @code{FILENAME}, @code{ARGV} elements,
@code{ENVIRON} elements, and the elements of an array created by
-@code{patsplit()}, @code{split()} and @code{match()} that are numeric
+@code{match()}, @code{split()} and @code{patsplit()} that are numeric
strings have the @var{strnum} attribute. Otherwise, they have
the @var{string} attribute. Uninitialized variables also have the
@var{strnum} attribute.
@@ -11382,22 +11425,23 @@ Thus, the six-character input string @w{@samp{ +3.14}} receives the
The following examples print @samp{1} when the comparison between
the two different constants is true, @samp{0} otherwise:
+@c 22.9.2014: Tested with mawk and BWK awk, got same results.
@example
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == " +3.14" @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == " +3.14") @}'} @ii{True}
@print{} 1
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == "+3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == "+3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == "3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == "3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == 3.14 @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == 3.14) @}'} @ii{True}
@print{} 1
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == " +3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == " +3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == "+3.14" @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == "+3.14") @}'} @ii{True}
@print{} 1
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == "3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == "3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == 3.14 @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == 3.14) @}'} @ii{True}
@print{} 1
@end example
@@ -11471,9 +11515,8 @@ part of the test always succeeds. Because the operators are
so similar, this kind of error is very difficult to spot when
scanning the source code.
-@cindex @command{gawk}, comparison operators and
-The following table of expressions illustrates the kind of comparison
-@command{gawk} performs, as well as what the result of the comparison is:
+The following list of expressions illustrates the kinds of comparisons
+@command{awk} performs, as well as what the result of each comparison is:
@table @code
@item 1.5 <= 2.0
@@ -11546,7 +11589,7 @@ dynamic regexp (@pxref{Regexp Usage}; also
@cindex @command{awk}, regexp constants and
@cindex regexp constants
-In modern implementations of @command{awk}, a constant regular
+A constant regular
expression in slashes by itself is also an expression. The regexp
@code{/@var{regexp}/} is an abbreviation for the following comparison expression:
@@ -11566,7 +11609,7 @@ where this is discussed in more detail.
The POSIX standard says that string comparison is performed based
on the locale's @dfn{collating order}. This is the order in which
characters sort, as defined by the locale (for more discussion,
-@pxref{Ranges and Locales}). This order is usually very different
+@pxref{Locales}). This order is usually very different
from the results obtained when doing straight character-by-character
comparison.@footnote{Technically, string comparison is supposed
to behave the same way as if the strings are compared with the C
@@ -11646,7 +11689,7 @@ no substring @samp{foo} in the record.
True if at least one of @var{boolean1} or @var{boolean2} is true.
For example, the following statement prints all records in the input
that contain @emph{either} @samp{edu} or
-@samp{li} or both:
+@samp{li}:
@example
if ($0 ~ /edu/ || $0 ~ /li/) print
@@ -11655,6 +11698,9 @@ if ($0 ~ /edu/ || $0 ~ /li/) print
The subexpression @var{boolean2} is evaluated only if @var{boolean1}
is false. This can make a difference when @var{boolean2} contains
expressions that have side effects.
+(Thus, this test never really distinguishes records that contain both
+@samp{edu} and @samp{li}---as soon as @samp{edu} is matched,
+the full test succeeds.)
@item ! @var{boolean}
True if @var{boolean} is false. For example,
@@ -11664,7 +11710,7 @@ variable is not defined:
@example
BEGIN @{ if (! ("HOME" in ENVIRON))
- print "no home!" @}
+ print "no home!" @}
@end example
(The @code{in} operator is described in
@@ -11963,7 +12009,7 @@ expression because the first @samp{$} has higher precedence than the
@samp{++}; to avoid the problem the expression can be rewritten as
@samp{$($0++)--}.
-This table presents @command{awk}'s operators, in order of highest
+This list presents @command{awk}'s operators, in order of highest
to lowest precedence:
@c @asis for docbook to come out right
@@ -12120,8 +12166,8 @@ system about the local character set and language. The ISO C standard
defines a default @code{"C"} locale, which is an environment that is
typical of what many C programmers are used to.
-Once upon a time, the locale setting used to affect regexp matching
-(@pxref{Ranges and Locales}), but this is no longer true.
+Once upon a time, the locale setting used to affect regexp matching,
+but this is no longer true (@pxref{Ranges and Locales}).
Locales can affect record splitting. For the normal case of @samp{RS =
"\n"}, the locale is largely irrelevant. For other single-character
@@ -12133,7 +12179,7 @@ character}, to find the record terminator.
Locales can affect how dates and times are formatted (@pxref{Time
Functions}). For example, a common way to abbreviate the date September
4, 2015 in the United States is ``9/4/15.'' In many countries in
-Europe, however, it is abbreviated ``4.9.15.'' Thus, the @samp{%x}
+Europe, however, it is abbreviated ``4.9.15.'' Thus, the @code{%x}
specification in a @code{"US"} locale might produce @samp{9/4/15},
while in a @code{"EUROPE"} locale, it might produce @samp{4.9.15}.
@@ -12175,7 +12221,8 @@ Locales can influence the conversions.
@item
@command{awk} provides the usual arithmetic operators (addition,
subtraction, multiplication, division, modulus), and unary plus and minus.
-It also provides comparison operators, boolean operators, and regexp
+It also provides comparison operators, boolean operators, array membership
+testing, and regexp
matching operators. String concatenation is accomplished by placing
two expressions next to each other; there is no explicit operator.
The three-operand @samp{?:} operator provides an ``if-else'' test within
@@ -12190,7 +12237,7 @@ In @command{awk}, a value is considered to be true if it is non-zero
@emph{or} non-null. Otherwise, the value is false.
@item
-A value's type is set upon each assignment and may change over its
+A variable's type is set upon each assignment and may change over its
lifetime. The type determines how it behaves in comparisons (string
or numeric).
@@ -12270,7 +12317,7 @@ is nonzero (if a number) or non-null (if a string).
(@xref{Expression Patterns}.)
@item @var{begpat}, @var{endpat}
-A pair of patterns separated by a comma, specifying a range of records.
+A pair of patterns separated by a comma, specifying a @dfn{range} of records.
The range includes both the initial record that matches @var{begpat} and
the final record that matches @var{endpat}.
(@xref{Ranges}.)
@@ -12360,8 +12407,8 @@ $ @kbd{awk '$1 ~ /li/ @{ print $2 @}' mail-list}
@cindex regexp constants, as patterns
@cindex patterns, regexp constants as
A regexp constant as a pattern is also a special case of an expression
-pattern. The expression @code{/li/} has the value one if @samp{li}
-appears in the current input record. Thus, as a pattern, @code{/li/}
+pattern. The expression @samp{/li/} has the value one if @samp{li}
+appears in the current input record. Thus, as a pattern, @samp{/li/}
matches any record containing @samp{li}.
@cindex Boolean expressions, as patterns
@@ -12543,7 +12590,7 @@ input is read. For example:
@example
$ @kbd{awk '}
> @kbd{BEGIN @{ print "Analysis of \"li\"" @}}
-> @kbd{/li/ @{ ++n @}}
+> @kbd{/li/ @{ ++n @}}
> @kbd{END @{ print "\"li\" appears in", n, "records." @}' mail-list}
@print{} Analysis of "li"
@print{} "li" appears in 4 records.
@@ -12623,9 +12670,10 @@ The POSIX standard specifies that @code{NF} is available in an @code{END}
rule. It contains the number of fields from the last input record.
Most probably due to an oversight, the standard does not say that @code{$0}
is also preserved, although logically one would think that it should be.
-In fact, @command{gawk} does preserve the value of @code{$0} for use in
-@code{END} rules. Be aware, however, that BWK @command{awk}, and possibly
-other implementations, do not.
+In fact, all of BWK @command{awk}, @command{mawk}, and @command{gawk}
+preserve the value of @code{$0} for use in @code{END} rules. Be aware,
+however, that some other implementations and many older versions
+of Unix @command{awk} do not.
The third point follows from the first two. The meaning of @samp{print}
inside a @code{BEGIN} or @code{END} rule is the same as always:
@@ -12720,8 +12768,8 @@ level of the @command{awk} program.
@cindex @code{next} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
The @code{next} statement (@pxref{Next Statement}) is not allowed inside
-either a @code{BEGINFILE} or and @code{ENDFILE} rule. The @code{nextfile}
-statement (@pxref{Nextfile Statement}) is allowed only inside a
+either a @code{BEGINFILE} or an @code{ENDFILE} rule. The @code{nextfile}
+statement is allowed only inside a
@code{BEGINFILE} rule, but not inside an @code{ENDFILE} rule.
@cindex @code{getline} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
@@ -12785,7 +12833,7 @@ There are two ways to get the value of the shell variable
into the body of the @command{awk} program.
@cindex shells, quoting
-The most common method is to use shell quoting to substitute
+A common method is to use shell quoting to substitute
the variable's value into the program inside the script.
For example, consider the following program:
@@ -13042,20 +13090,21 @@ If the @var{condition} is true, it executes the statement @var{body}.
is not zero and not a null string.)
@end ifinfo
After @var{body} has been executed,
-@var{condition} is tested again, and if it is still true, @var{body} is
-executed again. This process repeats until the @var{condition} is no longer
-true. If the @var{condition} is initially false, the body of the loop is
-never executed and @command{awk} continues with the statement following
+@var{condition} is tested again, and if it is still true, @var{body}
+executes again. This process repeats until the @var{condition} is no longer
+true. If the @var{condition} is initially false, the body of the loop
+never executes and @command{awk} continues with the statement following
the loop.
This example prints the first three fields of each record, one per line:
@example
-awk '@{
- i = 1
- while (i <= 3) @{
- print $i
- i++
- @}
+awk '
+@{
+ i = 1
+ while (i <= 3) @{
+ print $i
+ i++
+ @}
@}' inventory-shipped
@end example
@@ -13089,14 +13138,14 @@ do
while (@var{condition})
@end example
-Even if the @var{condition} is false at the start, the @var{body} is
-executed at least once (and only once, unless executing @var{body}
+Even if the @var{condition} is false at the start, the @var{body}
+executes at least once (and only once, unless executing @var{body}
makes @var{condition} true). Contrast this with the corresponding
@code{while} statement:
@example
while (@var{condition})
- @var{body}
+ @var{body}
@end example
@noindent
@@ -13106,11 +13155,11 @@ The following is an example of a @code{do} statement:
@example
@{
- i = 1
- do @{
- print $0
- i++
- @} while (i <= 10)
+ i = 1
+ do @{
+ print $0
+ i++
+ @} while (i <= 10)
@}
@end example
@@ -13147,9 +13196,10 @@ compares it against the desired number of iterations.
For example:
@example
-awk '@{
- for (i = 1; i <= 3; i++)
- print $i
+awk '
+@{
+ for (i = 1; i <= 3; i++)
+ print $i
@}' inventory-shipped
@end example
@@ -13177,7 +13227,7 @@ between 1 and 100:
@example
for (i = 1; i <= 100; i *= 2)
- print i
+ print i
@end example
If there is nothing to be done, any of the three expressions in the
@@ -13497,7 +13547,7 @@ The @code{next} statement is not allowed inside @code{BEGINFILE} and
@cindex functions, user-defined, @code{next}/@code{nextfile} statements and
According to the POSIX standard, the behavior is undefined if the
@code{next} statement is used in a @code{BEGIN} or @code{END} rule.
-@command{gawk} treats it as a syntax error. Although POSIX permits it,
+@command{gawk} treats it as a syntax error. Although POSIX does not disallow it,
most other @command{awk} implementations don't allow the @code{next}
statement inside function bodies (@pxref{User-defined}). Just as with any
other @code{next} statement, a @code{next} statement inside a function
@@ -13552,7 +13602,7 @@ opened with redirections. It is not related to the main processing that
@quotation NOTE
For many years, @code{nextfile} was a
-@command{gawk} extension. As of September, 2012, it was accepted for
+common extension. In September, 2012, it was accepted for
inclusion into the POSIX standard.
See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
@end quotation
@@ -13561,8 +13611,8 @@ See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
@cindex @code{nextfile} statement, user-defined functions and
@cindex Brian Kernighan's @command{awk}
@cindex @command{mawk} utility
-The current version of BWK @command{awk}, and @command{mawk} (@pxref{Other
-Versions}) also support @code{nextfile}. However, they don't allow the
+The current version of BWK @command{awk}, and @command{mawk}
+also support @code{nextfile}. However, they don't allow the
@code{nextfile} statement inside function bodies (@pxref{User-defined}).
@command{gawk} does; a @code{nextfile} inside a function body reads the
next record and starts processing it with the first rule in the program,
@@ -13594,8 +13644,8 @@ the program to stop immediately.
An @code{exit} statement that is not part of a @code{BEGIN} or @code{END}
rule stops the execution of any further automatic rules for the current
record, skips reading any remaining input records, and executes the
-@code{END} rule if there is one.
-Any @code{ENDFILE} rules are also skipped; they are not executed.
+@code{END} rule if there is one. @command{gawk} also skips
+any @code{ENDFILE} rules; they do not execute.
In such a case,
if you don't want the @code{END} rule to do its job, set a variable
@@ -13703,7 +13753,7 @@ respectively, should use binary I/O. A string value of @code{"rw"} or
@code{"wr"} indicates that all files should use binary I/O. Any other
string value is treated the same as @code{"rw"}, but causes @command{gawk}
to generate a warning message. @code{BINMODE} is described in more
-detail in @ref{PC Using}. @command{mawk} @pxref{Other Versions}),
+detail in @ref{PC Using}. @command{mawk} (@pxref{Other Versions}),
also supports this variable, but only using numeric values.
@cindex @code{CONVFMT} variable
@@ -13830,7 +13880,7 @@ printing with the @code{print} statement. It works by being passed
as the first argument to the @code{sprintf()} function
(@pxref{String Functions}).
Its default value is @code{"%.6g"}. Earlier versions of @command{awk}
-also used @code{OFMT} to specify the format for converting numbers to
+used @code{OFMT} to specify the format for converting numbers to
strings in general expressions; this is now done by @code{CONVFMT}.
@cindex @code{sprintf()} function, @code{OFMT} variable and
@@ -13982,8 +14032,8 @@ successive instances of the same @value{FN} on the command line.
@cindex file names, distinguishing
While you can change the value of @code{ARGIND} within your @command{awk}
-program, @command{gawk} automatically sets it to a new value when the
-next file is opened.
+program, @command{gawk} automatically sets it to a new value when it
+opens the next file.
@cindex @code{ENVIRON} array
@cindex environment variables, in @code{ENVIRON} array
@@ -14048,10 +14098,10 @@ can give @code{FILENAME} a value.
@cindex @code{FNR} variable
@item @code{FNR}
-The current record number in the current file. @code{FNR} is
-incremented each time a new record is read
-(@pxref{Records}). It is reinitialized
-to zero each time a new input file is started.
+The current record number in the current file. @command{awk} increments
+@code{FNR} each time it reads a new record (@pxref{Records}).
+@command{awk} resets @code{FNR} to zero each time it starts a new
+input file.
@cindex @code{NF} variable
@item @code{NF}
@@ -14083,7 +14133,7 @@ array causes a fatal error. Any attempt to assign to an element of
The number of input records @command{awk} has processed since
the beginning of the program's execution
(@pxref{Records}).
-@code{NR} is incremented each time a new record is read.
+@command{awk} increments @code{NR} each time it reads a new record.
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array
@@ -14163,7 +14213,7 @@ The parent process ID of the current process.
@item PROCINFO["sorted_in"]
If this element exists in @code{PROCINFO}, its value controls the
order in which array indices will be processed by
-@samp{for (@var{index} in @var{array})} loops.
+@samp{for (@var{indx} in @var{array})} loops.
Since this is an advanced feature, we defer the
full description until later; see
@ref{Scanning an Array}.
@@ -14184,7 +14234,7 @@ The version of @command{gawk}.
The following additional elements in the array
are available to provide information about the MPFR and GMP libraries
-if your version of @command{gawk} supports arbitrary precision numbers
+if your version of @command{gawk} supports arbitrary precision arithmetic
(@pxref{Arbitrary Precision Arithmetic}):
@table @code
@@ -14233,14 +14283,14 @@ The @code{PROCINFO} array has the following additional uses:
@itemize @value{BULLET}
@item
-It may be used to cause coprocesses to communicate over pseudo-ttys
-instead of through two-way pipes; this is discussed further in
-@ref{Two-way I/O}.
-
-@item
It may be used to provide a timeout when reading from any
open input file, pipe, or coprocess.
@xref{Read Timeout}, for more information.
+
+@item
+It may be used to cause coprocesses to communicate over pseudo-ttys
+instead of through two-way pipes; this is discussed further in
+@ref{Two-way I/O}.
@end itemize
@cindex @code{RLENGTH} variable
@@ -14482,6 +14532,12 @@ following @option{-v} are passed on to the @command{awk} program.
(@xref{Getopt Function}, for an @command{awk} library function that
parses command-line options.)
+When designing your program, you should choose options that don't
+conflict with @command{gawk}'s, since it will process any options
+that it accepts before passing the rest of the command line on to
+your program. Using @samp{#!} with the @option{-E} option may help
+(@pxref{Executable Scripts}, and @pxref{Options}).
+
@node Pattern Action Summary
@section Summary
@@ -14516,7 +14572,7 @@ input and output statements, and deletion statements.
The control statements in @command{awk} are @code{if}-@code{else},
@code{while}, @code{for}, and @code{do}-@code{while}. @command{gawk}
adds the @code{switch} statement. There are two flavors of @code{for}
-statement: one for for performing general looping, and the other iterating
+statement: one for performing general looping, and the other for iterating
through an array.
@item
@@ -14533,12 +14589,17 @@ The @code{exit} statement terminates your program. When executed
from an action (or function body) it transfers control to the
@code{END} statements. From an @code{END} statement body, it exits
immediately. You may pass an optional numeric value to be used
-at @command{awk}'s exit status.
+as @command{awk}'s exit status.
@item
Some built-in variables provide control over @command{awk}, mainly for I/O.
Other variables convey information from @command{awk} to your program.
+@item
+@code{ARGC} and @code{ARGV} make the command-line arguments available
+to your program. Manipulating them from a @code{BEGIN} rule lets you
+control how @command{awk} will process the provided @value{DF}s.
+
@end itemize
@node Arrays
@@ -14559,24 +14620,13 @@ The @value{CHAPTER} moves on to discuss @command{gawk}'s facility
for sorting arrays, and ends with a brief description of @command{gawk}'s
ability to support true arrays of arrays.
-@cindex variables, names of
-@cindex functions, names of
-@cindex arrays, names of, and names of functions/variables
-@cindex names, arrays/variables
-@cindex namespace issues
-@command{awk} maintains a single set
-of names that may be used for naming variables, arrays, and functions
-(@pxref{User-defined}).
-Thus, you cannot have a variable and an array with the same name in the
-same @command{awk} program.
-
@menu
* Array Basics:: The basics of arrays.
-* Delete:: The @code{delete} statement removes an element
- from an array.
* Numeric Array Subscripts:: How to use numbers as subscripts in
@command{awk}.
* Uninitialized Subscripts:: Using Uninitialized variables as subscripts.
+* Delete:: The @code{delete} statement removes an element
+ from an array.
* Multidimensional:: Emulating multidimensional arrays in
@command{awk}.
* Arrays of Arrays:: True multidimensional arrays.
@@ -15004,14 +15054,14 @@ begin with a number:
@example
@c file eg/misc/arraymax.awk
@{
- if ($1 > max)
- max = $1
- arr[$1] = $0
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
@}
END @{
- for (x = 1; x <= max; x++)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ print arr[x]
@}
@c endfile
@end example
@@ -15051,9 +15101,9 @@ program's @code{END} rule, as follows:
@example
END @{
- for (x = 1; x <= max; x++)
- if (x in arr)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ if (x in arr)
+ print arr[x]
@}
@end example
@@ -15075,7 +15125,7 @@ an array:
@example
for (@var{var} in @var{array})
- @var{body}
+ @var{body}
@end example
@noindent
@@ -15148,7 +15198,7 @@ BEGIN @{
@}
@end example
-Here is what happens when run with @command{gawk}:
+Here is what happens when run with @command{gawk} (and @command{mawk}):
@example
$ @kbd{gawk -f loopcheck.awk}
@@ -15266,7 +15316,8 @@ does not affect the loop.
For example:
@example
-$ @kbd{gawk 'BEGIN @{}
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
> @kbd{ a[4] = 4}
> @kbd{ a[3] = 3}
> @kbd{ for (i in a)}
@@ -15274,7 +15325,8 @@ $ @kbd{gawk 'BEGIN @{}
> @kbd{@}'}
@print{} 4 4
@print{} 3 3
-$ @kbd{gawk 'BEGIN @{}
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
> @kbd{ PROCINFO["sorted_in"] = "@@ind_str_asc"}
> @kbd{ a[4] = 4}
> @kbd{ a[3] = 3}
@@ -15323,118 +15375,6 @@ the @code{delete} statement.
In addition, @command{gawk} provides built-in functions for
sorting arrays; see @ref{Array Sorting Functions}.
-@node Delete
-@section The @code{delete} Statement
-@cindex @code{delete} statement
-@cindex deleting elements in arrays
-@cindex arrays, elements, deleting
-@cindex elements in arrays, deleting
-
-To remove an individual element of an array, use the @code{delete}
-statement:
-
-@example
-delete @var{array}[@var{index-expression}]
-@end example
-
-Once an array element has been deleted, any value the element once
-had is no longer available. It is as if the element had never
-been referred to or been given a value.
-The following is an example of deleting elements in an array:
-
-@example
-for (i in frequencies)
- delete frequencies[i]
-@end example
-
-@noindent
-This example removes all the elements from the array @code{frequencies}.
-Once an element is deleted, a subsequent @code{for} statement to scan the array
-does not report that element and the @code{in} operator to check for
-the presence of that element returns zero (i.e., false):
-
-@example
-delete foo[4]
-if (4 in foo)
- print "This will never be printed"
-@end example
-
-@cindex null strings, and deleting array elements
-It is important to note that deleting an element is @emph{not} the
-same as assigning it a null value (the empty string, @code{""}).
-For example:
-
-@example
-foo[4] = ""
-if (4 in foo)
- print "This is printed, even though foo[4] is empty"
-@end example
-
-@cindex lint checking, array elements
-It is not an error to delete an element that does not exist.
-However, if @option{--lint} is provided on the command line
-(@pxref{Options}),
-@command{gawk} issues a warning message when an element that
-is not in the array is deleted.
-
-@cindex common extensions, @code{delete} to delete entire arrays
-@cindex extensions, common@comma{} @code{delete} to delete entire arrays
-@cindex arrays, deleting entire contents
-@cindex deleting entire arrays
-@cindex @code{delete} @var{array}
-@cindex differences in @command{awk} and @command{gawk}, array elements, deleting
-All the elements of an array may be deleted with a single statement
-by leaving off the subscript in the @code{delete} statement,
-as follows:
-
-
-@example
-delete @var{array}
-@end example
-
-Using this version of the @code{delete} statement is about three times
-more efficient than the equivalent loop that deletes each element one
-at a time.
-
-@cindex Brian Kernighan's @command{awk}
-@quotation NOTE
-For many years,
-using @code{delete} without a subscript was a @command{gawk} extension.
-As of September, 2012, it was accepted for
-inclusion into the POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
-the Austin Group website}. This form of the @code{delete} statement is also supported
-by BWK @command{awk} and @command{mawk}, as well as
-by a number of other implementations (@pxref{Other Versions}).
-@end quotation
-
-@cindex portability, deleting array elements
-@cindex Brennan, Michael
-The following statement provides a portable but nonobvious way to clear
-out an array:@footnote{Thanks to Michael Brennan for pointing this out.}
-
-@example
-split("", array)
-@end example
-
-@cindex @code{split()} function, array elements@comma{} deleting
-The @code{split()} function
-(@pxref{String Functions})
-clears out the target array first. This call asks it to split
-apart the null string. Because there is no data to split out, the
-function simply clears the array and then returns.
-
-@quotation CAUTION
-Deleting an array does not change its type; you cannot
-delete an array and then use the array's name as a scalar
-(i.e., a regular variable). For example, the following does not work:
-
-@example
-a[1] = 3
-delete a
-a = 3
-@end example
-@end quotation
-
@node Numeric Array Subscripts
@section Using Numbers to Subscript Arrays
@@ -15475,7 +15415,7 @@ since @code{"12.15"} is different from @code{"12.153"}.
@cindex integer array indices
According to the rules for conversions
(@pxref{Conversion}), integer
-values are always converted to strings as integers, no matter what the
+values always convert to strings as integers, no matter what the
value of @code{CONVFMT} may happen to be. So the usual case of
the following works:
@@ -15498,7 +15438,7 @@ and
all refer to the same element!
As with many things in @command{awk}, the majority of the time
-things work as one would expect them to. But it is useful to have a precise
+things work as you would expect them to. But it is useful to have a precise
knowledge of the actual rules since they can sometimes have a subtle
effect on your programs.
@@ -15562,6 +15502,119 @@ Even though it is somewhat unusual, the null string
if @option{--lint} is provided
on the command line (@pxref{Options}).
+@node Delete
+@section The @code{delete} Statement
+@cindex @code{delete} statement
+@cindex deleting elements in arrays
+@cindex arrays, elements, deleting
+@cindex elements in arrays, deleting
+
+To remove an individual element of an array, use the @code{delete}
+statement:
+
+@example
+delete @var{array}[@var{index-expression}]
+@end example
+
+Once an array element has been deleted, any value the element once
+had is no longer available. It is as if the element had never
+been referred to or been given a value.
+The following is an example of deleting elements in an array:
+
+@example
+for (i in frequencies)
+ delete frequencies[i]
+@end example
+
+@noindent
+This example removes all the elements from the array @code{frequencies}.
+Once an element is deleted, a subsequent @code{for} statement to scan the array
+does not report that element and the @code{in} operator to check for
+the presence of that element returns zero (i.e., false):
+
+@example
+delete foo[4]
+if (4 in foo)
+ print "This will never be printed"
+@end example
+
+@cindex null strings, and deleting array elements
+It is important to note that deleting an element is @emph{not} the
+same as assigning it a null value (the empty string, @code{""}).
+For example:
+
+@example
+foo[4] = ""
+if (4 in foo)
+ print "This is printed, even though foo[4] is empty"
+@end example
+
+@cindex lint checking, array elements
+It is not an error to delete an element that does not exist.
+However, if @option{--lint} is provided on the command line
+(@pxref{Options}),
+@command{gawk} issues a warning message when an element that
+is not in the array is deleted.
+
+@cindex common extensions, @code{delete} to delete entire arrays
+@cindex extensions, common@comma{} @code{delete} to delete entire arrays
+@cindex arrays, deleting entire contents
+@cindex deleting entire arrays
+@cindex @code{delete} @var{array}
+@cindex differences in @command{awk} and @command{gawk}, array elements, deleting
+All the elements of an array may be deleted with a single statement
+by leaving off the subscript in the @code{delete} statement,
+as follows:
+
+
+@example
+delete @var{array}
+@end example
+
+Using this version of the @code{delete} statement is about three times
+more efficient than the equivalent loop that deletes each element one
+at a time.
+
+This form of the @code{delete} statement is also supported
+by BWK @command{awk} and @command{mawk}, as well as
+by a number of other implementations.
+
+@cindex Brian Kernighan's @command{awk}
+@quotation NOTE
+For many years, using @code{delete} without a subscript was a common
+extension. In September, 2012, it was accepted for inclusion into the
+POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
+the Austin Group website}.
+@end quotation
+
+@cindex portability, deleting array elements
+@cindex Brennan, Michael
+The following statement provides a portable but nonobvious way to clear
+out an array:@footnote{Thanks to Michael Brennan for pointing this out.}
+
+@example
+split("", array)
+@end example
+
+@cindex @code{split()} function, array elements@comma{} deleting
+The @code{split()} function
+(@pxref{String Functions})
+clears out the target array first. This call asks it to split
+apart the null string. Because there is no data to split out, the
+function simply clears the array and then returns.
+
+@quotation CAUTION
+Deleting all the elements from an array does not change its type; you cannot
+clear an array and then use the array's name as a scalar
+(i.e., a regular variable). For example, the following does not work:
+
+@example
+a[1] = 3
+delete a
+a = 3
+@end example
+@end quotation
+
@node Multidimensional
@section Multidimensional Arrays
@@ -15573,7 +15626,7 @@ on the command line (@pxref{Options}).
@cindex arrays, multidimensional
A multidimensional array is an array in which an element is identified
by a sequence of indices instead of a single index. For example, a
-two-dimensional array requires two indices. The usual way (in most
+two-dimensional array requires two indices. The usual way (in many
languages, including @command{awk}) to refer to an element of a
two-dimensional array named @code{grid} is with
@code{grid[@var{x},@var{y}]}.
@@ -15748,8 +15801,9 @@ a[1][3][1, "name"] = "barney"
Each subarray and the main array can be of different length. In fact, the
elements of an array or its subarray do not all have to have the same
type. This means that the main array and any of its subarrays can be
-non-rectangular, or jagged in structure. One can assign a scalar value to
-the index @code{4} of the main array @code{a}:
+non-rectangular, or jagged in structure. You can assign a scalar value to
+the index @code{4} of the main array @code{a}, even though @code{a[1]}
+is itself an array and not a scalar:
@example
a[4] = "An element in a jagged array"
@@ -15831,6 +15885,8 @@ for (i in array) @{
print array[i][j]
@}
@}
+ else
+ print array[i]
@}
@end example
@@ -16115,8 +16171,9 @@ Often random integers are needed instead. Following is a user-defined function
that can be used to obtain a random non-negative integer less than @var{n}:
@example
-function randint(n) @{
- return int(n * rand())
+function randint(n)
+@{
+ return int(n * rand())
@}
@end example
@@ -16136,8 +16193,7 @@ function roll(n) @{ return 1 + int(rand() * n) @}
# Roll 3 six-sided dice and
# print total number of points.
@{
- printf("%d points\n",
- roll(6)+roll(6)+roll(6))
+ printf("%d points\n", roll(6) + roll(6) + roll(6))
@}
@end example
@@ -16226,7 +16282,7 @@ doing index calculations, particularly if you are used to C.
In the following list, optional parameters are enclosed in square brackets@w{ ([ ]).}
Several functions perform string substitution; the full discussion is
provided in the description of the @code{sub()} function, which comes
-towards the end since the list is presented in alphabetic order.
+towards the end since the list is presented alphabetically.
Those functions that are specific to @command{gawk} are marked with a
pound sign (@samp{#}). They are not available in compatibility mode
@@ -16270,6 +16326,7 @@ When comparing strings, @code{IGNORECASE} affects the sorting
(@pxref{Array Sorting Functions}). If the
@var{source} array contains subarrays as values (@pxref{Arrays of
Arrays}), they will come last, after all scalar values.
+Subarrays are @emph{not} recursively sorted.
For example, if the contents of @code{a} are as follows:
@@ -16406,7 +16463,10 @@ $ @kbd{awk 'BEGIN @{ print index("peanut", "an") @}'}
@noindent
If @var{find} is not found, @code{index()} returns zero.
-It is a fatal error to use a regexp constant for @var{find}.
+With BWK @command{awk} and @command{gawk},
+it is a fatal error to use a regexp constant for @var{find}.
+Other implementations allow it, simply treating the regexp
+constant as an expression meaning @samp{$0 ~ /regexp/}.
@item @code{length(}[@var{string}]@code{)}
@cindexawkfunc{length}
@@ -16520,13 +16580,12 @@ For example:
@example
@c file eg/misc/findpat.awk
@{
- if ($1 == "FIND")
- regex = $2
- else @{
- where = match($0, regex)
- if (where != 0)
- print "Match of", regex, "found at",
- where, "in", $0
+ if ($1 == "FIND")
+ regex = $2
+ else @{
+ where = match($0, regex)
+ if (where != 0)
+ print "Match of", regex, "found at", where, "in", $0
@}
@}
@c endfile
@@ -16622,7 +16681,7 @@ Any leading separator will be in @code{@var{seps}[0]}.
The @code{patsplit()} function splits strings into pieces in a
manner similar to the way input lines are split into fields using @code{FPAT}
-(@pxref{Splitting By Content}.
+(@pxref{Splitting By Content}).
Before splitting the string, @code{patsplit()} deletes any previously existing
elements in the arrays @var{array} and @var{seps}.
@@ -16635,8 +16694,7 @@ and store the pieces in @var{array} and the separator strings in the
@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
forth. The string value of the third argument, @var{fieldsep}, is
a regexp describing where to split @var{string} (much as @code{FS} can
-be a regexp describing where to split input records;
-@pxref{Regexp Field Splitting}).
+be a regexp describing where to split input records).
If @var{fieldsep} is omitted, the value of @code{FS} is used.
@code{split()} returns the number of elements created.
@var{seps} is a @command{gawk} extension with @code{@var{seps}[@var{i}]}
@@ -16931,6 +16989,26 @@ Nonalphabetic characters are left unchanged. For example,
@code{toupper("MiXeD cAsE 123")} returns @code{"MIXED CASE 123"}.
@end table
+@sidebar Matching the Null String
+@cindex matching, null strings
+@cindex null strings, matching
+@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
+@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
+
+In @command{awk}, the @samp{*} operator can match the null string.
+This is particularly important for the @code{sub()}, @code{gsub()},
+and @code{gensub()} functions. For example:
+
+@example
+$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
+@print{} XaXbXcX
+@end example
+
+@noindent
+Although this makes a certain amount of sense, it can be surprising.
+@end sidebar
+
+
@node Gory Details
@subsubsection More About @samp{\} and @samp{&} with @code{sub()}, @code{gsub()}, and @code{gensub()}
@@ -16944,7 +17022,7 @@ Nonalphabetic characters are left unchanged. For example,
@cindex ampersand (@code{&}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
@quotation CAUTION
-This section has been known to cause headaches.
+This subsubsection has been reported to cause headaches.
You might want to skip it upon first reading.
@end quotation
@@ -17235,25 +17313,6 @@ and the special cases for @code{sub()} and @code{gsub()},
we recommend the use of @command{gawk} and @code{gensub()} when you have
to do substitutions.
-@sidebar Matching the Null String
-@cindex matching, null strings
-@cindex null strings, matching
-@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
-@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
-
-In @command{awk}, the @samp{*} operator can match the null string.
-This is particularly important for the @code{sub()}, @code{gsub()},
-and @code{gensub()} functions. For example:
-
-@example
-$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
-@print{} XaXbXcX
-@end example
-
-@noindent
-Although this makes a certain amount of sense, it can be surprising.
-@end sidebar
-
@node I/O Functions
@subsection Input/Output Functions
@cindex input/output functions
@@ -17306,10 +17365,9 @@ buffers its output and the @code{fflush()} function forces
@cindex extensions, common@comma{} @code{fflush()} function
@cindex Brian Kernighan's @command{awk}
-@code{fflush()} was added to BWK @command{awk} in
-April of 1992. For two decades, it was not part of the POSIX standard.
-As of December, 2012, it was accepted for inclusion into the POSIX
-standard.
+Brian Kernighan added @code{fflush()} to his @command{awk} in April
+of 1992. For two decades, it was a common extension. In December,
+2012, it was accepted for inclusion into the POSIX standard.
See @uref{http://austingroupbugs.net/view.php?id=634, the Austin Group website}.
POSIX standardizes @code{fflush()} as follows: If there
@@ -17578,7 +17636,7 @@ is out of range, @code{mktime()} returns @minus{}1.
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array
-@item @code{strftime(} [@var{format} [@code{,} @var{timestamp} [@code{,} @var{utc-flag}] ] ]@code{)}
+@item @code{strftime(}[@var{format} [@code{,} @var{timestamp} [@code{,} @var{utc-flag}] ] ]@code{)}
@c STARTOFRANGE strf
@cindexgawkfunc{strftime}
@cindex format time string
@@ -17684,7 +17742,7 @@ of its ISO week number is 2013, even though its year is 2012.
The full year of the ISO week number, as a decimal number.
@item %h
-Equivalent to @samp{%b}.
+Equivalent to @code{%b}.
@item %H
The hour (24-hour clock) as a decimal number (00--23).
@@ -17753,7 +17811,7 @@ The locale's ``appropriate'' date representation.
@item %X
The locale's ``appropriate'' time representation.
-(This is @samp{%T} in the @code{"C"} locale.)
+(This is @code{%T} in the @code{"C"} locale.)
@item %y
The year modulo 100 as a decimal number (00--99).
@@ -17774,7 +17832,7 @@ no time zone is determinable.
@item %Ec %EC %Ex %EX %Ey %EY %Od %Oe %OH
@itemx %OI %Om %OM %OS %Ou %OU %OV %Ow %OW %Oy
``Alternate representations'' for the specifications
-that use only the second letter (@samp{%c}, @samp{%C},
+that use only the second letter (@code{%c}, @code{%C},
and so on).@footnote{If you don't understand any of this, don't worry about
it; these facilities are meant to make it easier to ``internationalize''
programs.
@@ -17845,7 +17903,7 @@ the string. For example:
@example
$ date '+Today is %A, %B %d, %Y.'
-@print{} Today is Monday, May 05, 2014.
+@print{} Today is Monday, September 22, 2014.
@end example
Here is the @command{gawk} version of the @command{date} utility.
@@ -18037,19 +18095,18 @@ For example, if you have a bit string @samp{10111001} and you shift it
right by three bits, you end up with @samp{00010111}.@footnote{This example
shows that 0's come in on the left side. For @command{gawk}, this is
always true, but in some languages, it's possible to have the left side
-fill with 1's. Caveat emptor.}
+fill with 1's.}
@c Purposely decided to use 0's and 1's here. 2/2001.
-If you start over
-again with @samp{10111001} and shift it left by three bits, you end up
-with @samp{11001000}.
-@command{gawk} provides built-in functions that implement the
-bitwise operations just described. They are:
+If you start over again with @samp{10111001} and shift it left by three
+bits, you end up with @samp{11001000}. The following list describes
+@command{gawk}'s built-in functions that implement the bitwise operations.
+Optional parameters are enclosed in square brackets ([ ]):
@cindex @command{gawk}, bitwise operations in
@table @code
@cindexgawkfunc{and}
@cindex bitwise AND
-@item @code{and(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
+@item @code{and(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
Return the bitwise AND of the arguments. There must be at least two.
@cindexgawkfunc{compl}
@@ -18064,7 +18121,7 @@ Return the value of @var{val}, shifted left by @var{count} bits.
@cindexgawkfunc{or}
@cindex bitwise OR
-@item @code{or(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
+@item @code{or(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
Return the bitwise OR of the arguments. There must be at least two.
@cindexgawkfunc{rshift}
@@ -18074,7 +18131,7 @@ Return the value of @var{val}, shifted right by @var{count} bits.
@cindexgawkfunc{xor}
@cindex bitwise XOR
-@item @code{xor(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
+@item @code{xor(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
Return the bitwise XOR of the arguments. There must be at least two.
@end table
@@ -18197,7 +18254,7 @@ results of the @code{compl()}, @code{lshift()}, and @code{rshift()} functions.
@command{gawk} provides a single function that lets you distinguish
an array from a scalar variable. This is necessary for writing code
-that traverses every element of an array of arrays.
+that traverses every element of an array of arrays
(@pxref{Arrays of Arrays}).
@table @code
@@ -18213,12 +18270,14 @@ an array or not. The second is inside the body of a user-defined function
(not discussed yet; @pxref{User-defined}), to test if a parameter is an
array or not.
-Note, however, that using @code{isarray()} at the global level to test
+@quotation NOTE
+Using @code{isarray()} at the global level to test
variables makes no sense. Since you are the one writing the program, you
are supposed to know if your variables are arrays or not. And in fact,
due to the way @command{gawk} works, if you pass the name of a variable
that has not been previously used to @code{isarray()}, @command{gawk}
-will end up turning it into a scalar.
+ends up turning it into a scalar.
+@end quotation
@node I18N Functions
@subsection String-Translation Functions
@@ -18479,7 +18538,7 @@ extra whitespace signifies the start of the local variable list):
function delarray(a, i)
@{
for (i in a)
- delete a[i]
+ delete a[i]
@}
@end example
@@ -18490,7 +18549,7 @@ Instead of having
to repeat this loop everywhere that you need to clear out
an array, your program can just call @code{delarray}.
(This guarantees portability. The use of @samp{delete @var{array}} to delete
-the contents of an entire array is a recent@footnote{Late in 2012.}
+the contents of an entire array is a relatively recent@footnote{Late in 2012.}
addition to the POSIX standard.)
The following is an example of a recursive function. It takes a string
@@ -18520,7 +18579,7 @@ $ @kbd{echo "Don't Panic!" |}
@print{} !cinaP t'noD
@end example
-The C @code{ctime()} function takes a timestamp and returns it in a string,
+The C @code{ctime()} function takes a timestamp and returns it as a string,
formatted in a well-known fashion.
The following example uses the built-in @code{strftime()} function
(@pxref{Time Functions})
@@ -18535,13 +18594,19 @@ to create an @command{awk} version of @code{ctime()}:
function ctime(ts, format)
@{
- format = PROCINFO["strftime"]
+ format = "%a %b %e %H:%M:%S %Z %Y"
+
if (ts == 0)
ts = systime() # use current time as default
return strftime(format, ts)
@}
@c endfile
@end example
+
+You might think that @code{ctime()} could use @code{PROCINFO["strftime"]}
+for its format string. That would be a mistake, since @code{ctime()} is
+supposed to return the time formatted in a standard fashion, and user-level
+code could have changed @code{PROCINFO["strftime"]}.
@c ENDOFRANGE fdef
@node Function Caveats
@@ -19113,7 +19178,7 @@ saving it in @code{start}.
The last part of the code loops through each function name (from @code{$2} up to
the marker, @samp{data:}), calling the function named by the field. The indirect
function call itself occurs as a parameter in the call to @code{printf}.
-(The @code{printf} format string uses @samp{%s} as the format specifier so that we
+(The @code{printf} format string uses @code{%s} as the format specifier so that we
can use functions that return strings, as well as numbers. Note that the result
from the indirect call is concatenated with the empty string, in order to force
it to be a string value.)
@@ -19190,7 +19255,7 @@ function quicksort(data, left, right, less_than, i, last)
# quicksort_swap --- helper function for quicksort, should really be inline
-function quicksort_swap(data, i, j, temp)
+function quicksort_swap(data, i, j, temp)
@{
temp = data[i]
data[i] = data[j]
@@ -19341,10 +19406,11 @@ functions.
@item
POSIX @command{awk} provides three kinds of built-in functions: numeric,
-string, and I/O. @command{gawk} provides functions that work with values
-representing time, do bit manipulation, sort arrays, and internationalize
-and localize programs. @command{gawk} also provides several extensions to
-some of standard functions, typically in the form of additional arguments.
+string, and I/O. @command{gawk} provides functions that sort arrays, work
+with values representing time, do bit manipulation, determine variable
+type (array vs.@: scalar), and internationalize and localize programs.
+@command{gawk} also provides several extensions to some of standard
+functions, typically in the form of additional arguments.
@item
Functions accept zero or more arguments and return a value. The
@@ -19595,8 +19661,9 @@ are very difficult to track down:
function lib_func(x, y, l1, l2)
@{
@dots{}
- @var{use variable} some_var # some_var should be local
- @dots{} # but is not by oversight
+ # some_var should be local but by oversight is not
+ @var{use variable} some_var
+ @dots{}
@}
@end example
@@ -19707,7 +19774,7 @@ function mystrtonum(str, ret, n, i, k, c)
# a[5] = "123.45"
# a[6] = "1.e3"
# a[7] = "1.32"
-# a[7] = "1.32E2"
+# a[8] = "1.32E2"
#
# for (i = 1; i in a; i++)
# print a[i], strtonum(a[i]), mystrtonum(a[i])
@@ -19718,9 +19785,12 @@ function mystrtonum(str, ret, n, i, k, c)
The function first looks for C-style octal numbers (base 8).
If the input string matches a regular expression describing octal
numbers, then @code{mystrtonum()} loops through each character in the
-string. It sets @code{k} to the index in @code{"01234567"} of the current
-octal digit. Since the return value is one-based, the @samp{k--}
-adjusts @code{k} so it can be used in computing the return value.
+string. It sets @code{k} to the index in @code{"1234567"} of the current
+octal digit.
+The return value will either be the same number as the digit, or zero
+if the character is not there, which will be true for a @samp{0}.
+This is safe, since the regexp test in the @code{if} ensures that
+only octal values are converted.
Similar logic applies to the code that checks for and converts a
hexadecimal value, which starts with @samp{0x} or @samp{0X}.
@@ -19753,7 +19823,7 @@ that a condition or set of conditions is true. Before proceeding with a
particular computation, you make a statement about what you believe to be
the case. Such a statement is known as an
@dfn{assertion}. The C language provides an @code{<assert.h>} header file
-and corresponding @code{assert()} macro that the programmer can use to make
+and corresponding @code{assert()} macro that a programmer can use to make
assertions. If an assertion fails, the @code{assert()} macro arranges to
print a diagnostic message describing the condition that should have
been true but was not, and then it kills the program. In C, using
@@ -20223,7 +20293,7 @@ function getlocaltime(time, ret, now, i)
now = systime()
# return date(1)-style output
- ret = strftime(PROCINFO["strftime"], now)
+ ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
# clear out target array
delete time
@@ -20338,6 +20408,9 @@ if (length(contents) == 0)
This tests the result to see if it is empty or not. An equivalent
test would be @samp{contents == ""}.
+@xref{Extension Sample Readfile}, for an extension function that
+also reads an entire file into memory.
+
@node Data File Management
@section @value{DDF} Management
@@ -20395,15 +20468,14 @@ Besides solving the problem in only nine(!) lines of code, it does so
@c # Arnold Robbins, arnold@@skeeve.com, Public Domain
@c # January 1992
-FILENAME != _oldfilename \
-@{
+FILENAME != _oldfilename @{
if (_oldfilename != "")
endfile(_oldfilename)
_oldfilename = FILENAME
beginfile(FILENAME)
@}
-END @{ endfile(FILENAME) @}
+END @{ endfile(FILENAME) @}
@end example
This file must be loaded before the user's ``main'' program, so that the
@@ -20456,7 +20528,7 @@ FNR == 1 @{
beginfile(FILENAME)
@}
-END @{ endfile(_filename_) @}
+END @{ endfile(_filename_) @}
@c endfile
@end example
@@ -20526,24 +20598,12 @@ function rewind( i)
@c endfile
@end example
-This code relies on the @code{ARGIND} variable
-(@pxref{Auto-set}),
-which is specific to @command{gawk}.
-If you are not using
-@command{gawk}, you can use ideas presented in
-@ifnotinfo
-the previous @value{SECTION}
-@end ifnotinfo
-@ifinfo
-@ref{Filetrans Function},
-@end ifinfo
-to either update @code{ARGIND} on your own
-or modify this code as appropriate.
-
-The @code{rewind()} function also relies on the @code{nextfile} keyword
-(@pxref{Nextfile Statement}). Because of this, you should not call it
-from an @code{ENDFILE} rule. (This isn't necessary anyway, since as soon
-as an @code{ENDFILE} rule finishes @command{gawk} goes to the next file!)
+The @code{rewind()} function relies on the @code{ARGIND} variable
+(@pxref{Auto-set}), which is specific to @command{gawk}. It also
+relies on the @code{nextfile} keyword (@pxref{Nextfile Statement}).
+Because of this, you should not call it from an @code{ENDFILE} rule.
+(This isn't necessary anyway, since as soon as an @code{ENDFILE} rule
+finishes @command{gawk} goes to the next file!)
@node File Checking
@subsection Checking for Readable @value{DDF}s
@@ -20576,7 +20636,7 @@ the following program to your @command{awk} program:
BEGIN @{
for (i = 1; i < ARGC; i++) @{
- if (ARGV[i] ~ /^[[:alpha:]_][[:alnum:]_]*=.*/ \
+ if (ARGV[i] ~ /^[a-zA-Z_][a-zA-Z0-9_]*=.*/ \
|| ARGV[i] == "-" || ARGV[i] == "/dev/stdin")
continue # assignment or standard input
else if ((getline junk < ARGV[i]) < 0) # unreadable
@@ -20594,6 +20654,11 @@ Removing the element from @code{ARGV} with @code{delete}
skips the file (since it's no longer in the list).
See also @ref{ARGC and ARGV}.
+The regular expression check purposely does not use character classes
+such as @samp{[:alpha:]} and @samp{[:alnum:]}
+(@pxref{Bracket Expressions})
+since @command{awk} variable names only allow the English letters.
+
@node Empty Files
@subsection Checking for Zero-length Files
@@ -20690,7 +20755,7 @@ a library file does the trick:
function disable_assigns(argc, argv, i)
@{
for (i = 1; i < argc; i++)
- if (argv[i] ~ /^[[:alpha:]_][[:alnum:]_]*=.*/)
+ if (argv[i] ~ /^[a-zA-Z_][a-zA-Z0-9_]*=.*/)
argv[i] = ("./" argv[i])
@}
@@ -21062,12 +21127,18 @@ In both runs, the first @option{--} terminates the arguments to
etc., as its own options.
@quotation NOTE
-After @code{getopt()} is through, it is the responsibility of the
-user level code to clear out all the elements of @code{ARGV} from 1
+After @code{getopt()} is through,
+user level code must clear out all the elements of @code{ARGV} from 1
to @code{Optind}, so that @command{awk} does not try to process the
command-line options as @value{FN}s.
@end quotation
+Using @samp{#!} with the @option{-E} option may help avoid
+conflicts between your program's options and @command{gawk}'s options,
+since @option{-E} causes @command{gawk} to abandon processing of
+further options
+(@pxref{Executable Scripts}, and @pxref{Options}).
+
Several of the sample programs presented in
@ref{Sample Programs},
use @code{getopt()} to process their arguments.
@@ -21312,13 +21383,14 @@ The @code{BEGIN} rule sets a private variable to the directory where
routine, we have chosen to put it in @file{/usr/local/libexec/awk};
however, you might want it to be in a different directory on your system.
-The function @code{_pw_init()} keeps three copies of the user information
-in three associative arrays. The arrays are indexed by username
+The function @code{_pw_init()} fills three copies of the user information
+into three associative arrays. The arrays are indexed by username
(@code{_pw_byname}), by user ID number (@code{_pw_byuid}), and by order of
occurrence (@code{_pw_bycount}).
The variable @code{_pw_inited} is used for efficiency, since @code{_pw_init()}
needs to be called only once.
+@cindex @code{PROCINFO} array, testing the field splitting
@cindex @code{getline} command, @code{_pw_init()} function
Because this function uses @code{getline} to read information from
@command{pwcat}, it first saves the values of @code{FS}, @code{RS}, and @code{$0}.
@@ -21326,13 +21398,8 @@ It notes in the variable @code{using_fw} whether field splitting
with @code{FIELDWIDTHS} is in effect or not.
Doing so is necessary, since these functions could be called
from anywhere within a user's program, and the user may have his
-or her
-own way of splitting records and fields.
-
-@cindex @code{PROCINFO} array, testing the field splitting
-The @code{using_fw} variable checks @code{PROCINFO["FS"]}, which
-is @code{"FIELDWIDTHS"} if field splitting is being done with
-@code{FIELDWIDTHS}. This makes it possible to restore the correct
+or her own way of splitting records and fields.
+This makes it possible to restore the correct
field-splitting mechanism later. The test can only be true for
@command{gawk}. It is false if using @code{FS} or @code{FPAT},
or on some other @command{awk} implementation.
@@ -21646,8 +21713,7 @@ function _gr_init( oldfs, oldrs, olddol0, grcat,
n = split($4, a, "[ \t]*,[ \t]*")
for (i = 1; i <= n; i++)
if (a[i] in _gr_groupsbyuser)
- _gr_groupsbyuser[a[i]] = \
- _gr_groupsbyuser[a[i]] " " $1
+ _gr_groupsbyuser[a[i]] = gr_groupsbyuser[a[i]] " " $1
else
_gr_groupsbyuser[a[i]] = $1
@@ -21874,8 +21940,8 @@ $ @kbd{gawk -f walk_array.awk}
@itemize @value{BULLET}
@item
Reading programs is an excellent way to learn Good Programming.
-The functions provided in this @value{CHAPTER} and the next are intended
-to serve that purpose.
+The functions and programs provided in this @value{CHAPTER} and the next
+are intended to serve that purpose.
@item
When writing general-purpose library functions, put some thought into how
@@ -22162,22 +22228,16 @@ supplied:
# Requires getopt() and join() library functions
@group
-function usage( e1, e2)
+function usage()
@{
- e1 = "usage: cut [-f list] [-d c] [-s] [files...]"
- e2 = "usage: cut [-c list] [files...]"
- print e1 > "/dev/stderr"
- print e2 > "/dev/stderr"
+ print("usage: cut [-f list] [-d c] [-s] [files...]") > "/dev/stderr"
+ print("usage: cut [-c list] [files...]") > "/dev/stderr"
exit 1
@}
@end group
@c endfile
@end example
-@noindent
-The variables @code{e1} and @code{e2} are used so that the function
-fits nicely on the @value{PAGE}.
-
@cindex @code{BEGIN} pattern, running @command{awk} programs and
@cindex @code{FS} variable, running @command{awk} programs and
Next comes a @code{BEGIN} rule that parses the command-line options.
@@ -22678,19 +22738,15 @@ and then exits:
@example
@c file eg/prog/egrep.awk
-function usage( e)
+function usage()
@{
- e = "Usage: egrep [-csvil] [-e pat] [files ...]"
- e = e "\n\tegrep [-csvil] pat [files ...]"
- print e > "/dev/stderr"
+ print("Usage: egrep [-csvil] [-e pat] [files ...]") > "/dev/stderr"
+ print("\n\tegrep [-csvil] pat [files ...]") > "/dev/stderr"
exit 1
@}
@c endfile
@end example
-The variable @code{e} is used so that the function fits nicely
-on the printed page.
-
@c ENDOFRANGE regexps
@c ENDOFRANGE sfregexp
@c ENDOFRANGE fsregexp
@@ -22748,6 +22804,7 @@ numbers:
# May 1993
# Revised February 1996
# Revised May 2014
+# Revised September 2014
@c endfile
@end ignore
@@ -22766,26 +22823,22 @@ BEGIN @{
printf("uid=%d", uid)
pw = getpwuid(uid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
if (euid != uid) @{
printf(" euid=%d", euid)
pw = getpwuid(euid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
@}
printf(" gid=%d", gid)
pw = getgrgid(gid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
if (egid != gid) @{
printf(" egid=%d", egid)
pw = getgrgid(egid)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
@}
for (i = 1; ("group" i) in PROCINFO; i++) @{
@@ -22794,8 +22847,7 @@ BEGIN @{
group = PROCINFO["group" i]
printf("%d", group)
pw = getgrgid(group)
- if (pw != "")
- pr_first_field(pw)
+ pr_first_field(pw)
if (("group" (i+1)) in PROCINFO)
printf(",")
@}
@@ -22805,8 +22857,10 @@ BEGIN @{
function pr_first_field(str, a)
@{
- split(str, a, ":")
- printf("(%s)", a[1])
+ if (str != "") @{
+ split(str, a, ":")
+ printf("(%s)", a[1])
+ @}
@}
@c endfile
@end example
@@ -22829,7 +22883,8 @@ tested, and the loop body never executes.
The @code{pr_first_field()} function simply isolates out some
code that is used repeatedly, making the whole program
-slightly shorter and cleaner.
+shorter and cleaner. In particular, moving the check for
+the empty string into this function saves several lines of code.
@c ENDOFRANGE id
@@ -22956,19 +23011,14 @@ The @code{usage()} function simply prints an error message and exits:
@example
@c file eg/prog/split.awk
-function usage( e)
+function usage()
@{
- e = "usage: split [-num] [file] [outname]"
- print e > "/dev/stderr"
+ print("usage: split [-num] [file] [outname]") > "/dev/stderr"
exit 1
@}
@c endfile
@end example
-@noindent
-The variable @code{e} is used so that the function
-fits nicely on the @value{PAGE}.
-
This program is a bit sloppy; it relies on @command{awk} to automatically close the last file
instead of doing it in an @code{END} rule.
It also assumes that letters are contiguous in the character set,
@@ -23127,10 +23177,10 @@ The options for @command{uniq} are:
@table @code
@item -d
-Print only repeated lines.
+Print only repeated (duplicated) lines.
@item -u
-Print only nonrepeated lines.
+Print only nonrepeated (unique) lines.
@item -c
Count lines. This option overrides @option{-d} and @option{-u}. Both repeated
@@ -23199,10 +23249,9 @@ standard output, @file{/dev/stdout}:
@end ignore
@c file eg/prog/uniq.awk
-function usage( e)
+function usage()
@{
- e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]"
- print e > "/dev/stderr"
+ print("Usage: uniq [-udc [-n]] [+n] [ in [ out ]]") > "/dev/stderr"
exit 1
@}
@@ -23256,22 +23305,20 @@ BEGIN @{
@end example
The following function, @code{are_equal()}, compares the current line,
-@code{$0}, to the
-previous line, @code{last}. It handles skipping fields and characters.
-If no field count and no character count are specified, @code{are_equal()}
-simply returns one or zero depending upon the result of a simple string
-comparison of @code{last} and @code{$0}. Otherwise, things get more
-complicated.
-If fields have to be skipped, each line is broken into an array using
-@code{split()}
-(@pxref{String Functions});
-the desired fields are then joined back into a line using @code{join()}.
-The joined lines are stored in @code{clast} and @code{cline}.
-If no fields are skipped, @code{clast} and @code{cline} are set to
-@code{last} and @code{$0}, respectively.
-Finally, if characters are skipped, @code{substr()} is used to strip off the
-leading @code{charcount} characters in @code{clast} and @code{cline}. The
-two strings are then compared and @code{are_equal()} returns the result:
+@code{$0}, to the previous line, @code{last}. It handles skipping fields
+and characters. If no field count and no character count are specified,
+@code{are_equal()} returns one or zero depending upon the result of a
+simple string comparison of @code{last} and @code{$0}.
+
+Otherwise, things get more complicated. If fields have to be skipped,
+each line is broken into an array using @code{split()} (@pxref{String
+Functions}); the desired fields are then joined back into a line
+using @code{join()}. The joined lines are stored in @code{clast} and
+@code{cline}. If no fields are skipped, @code{clast} and @code{cline}
+are set to @code{last} and @code{$0}, respectively. Finally, if
+characters are skipped, @code{substr()} is used to strip off the leading
+@code{charcount} characters in @code{clast} and @code{cline}. The two
+strings are then compared and @code{are_equal()} returns the result:
@example
@c file eg/prog/uniq.awk
@@ -23362,6 +23409,13 @@ END @{
@c endfile
@end example
+@c FIXME: Include this?
+@ignore
+This program does not follow our recommended convention of naming
+global variables with a leading capital letter. Doing that would
+make the program a little easier to follow.
+@end ignore
+
@ifset FOR_PRINT
The logic for choosing which lines to print represents a @dfn{state
machine}, which is ``a device that can be in one of a set number of stable
@@ -23407,7 +23461,7 @@ one or more input files. Its usage is as follows:
If no files are specified on the command line, @command{wc} reads its standard
input. If there are multiple files, it also prints total counts for all
-the files. The options and their meanings are shown in the following list:
+the files. The options and their meanings are as follows:
@table @code
@item -l
@@ -24059,7 +24113,7 @@ of lines on the page
Most of the work is done in the @code{printpage()} function.
The label lines are stored sequentially in the @code{line} array. But they
have to print horizontally; @code{line[1]} next to @code{line[6]},
-@code{line[2]} next to @code{line[7]}, and so on. Two loops are used to
+@code{line[2]} next to @code{line[7]}, and so on. Two loops
accomplish this. The outer loop, controlled by @code{i}, steps through
every 10 lines of data; this is each row of labels. The inner loop,
controlled by @code{j}, goes through the lines within the row.
@@ -24173,7 +24227,7 @@ in a useful format.
At first glance, a program like this would seem to do the job:
@example
-# Print list of word frequencies
+# wordfreq-first-try.awk --- print list of word frequencies
@{
for (i = 1; i <= NF; i++)
@@ -24390,16 +24444,16 @@ Texinfo input file into separate files.
This @value{DOCUMENT} is written in @uref{http://www.gnu.org/software/texinfo/, Texinfo},
the GNU project's document formatting language.
A single Texinfo source file can be used to produce both
-printed and online documentation.
+printed documentation, with @TeX{}, and online documentation.
@ifnotinfo
-Texinfo is fully documented in the book
+(Texinfo is fully documented in the book
@cite{Texinfo---The GNU Documentation Format},
available from the Free Software Foundation,
-and also available @uref{http://www.gnu.org/software/texinfo/manual/texinfo/, online}.
+and also available @uref{http://www.gnu.org/software/texinfo/manual/texinfo/, online}.)
@end ifnotinfo
@ifinfo
-The Texinfo language is described fully, starting with
-@inforef{Top, , Texinfo, texinfo,Texinfo---The GNU Documentation Format}.
+(The Texinfo language is described fully, starting with
+@inforef{Top, , Texinfo, texinfo,Texinfo---The GNU Documentation Format}.)
@end ifinfo
For our purposes, it is enough to know three things about Texinfo input
@@ -24477,8 +24531,7 @@ exits with a zero exit status, signifying OK:
@cindex @code{extract.awk} program
@example
@c file eg/prog/extract.awk
-# extract.awk --- extract files and run programs
-# from texinfo files
+# extract.awk --- extract files and run programs from texinfo files
@c endfile
@ignore
@c file eg/prog/extract.awk
@@ -24492,8 +24545,7 @@ exits with a zero exit status, signifying OK:
BEGIN @{ IGNORECASE = 1 @}
-/^@@c(omment)?[ \t]+system/ \
-@{
+/^@@c(omment)?[ \t]+system/ @{
if (NF < 3) @{
e = ("extract: " FILENAME ":" FNR)
e = (e ": badly formed `system' line")
@@ -24550,8 +24602,7 @@ line. That line is then printed to the output file:
@example
@c file eg/prog/extract.awk
-/^@@c(omment)?[ \t]+file/ \
-@{
+/^@@c(omment)?[ \t]+file/ @{
if (NF != 3) @{
e = ("extract: " FILENAME ":" FNR ": badly formed `file' line")
print e > "/dev/stderr"
@@ -24611,7 +24662,7 @@ The @code{END} rule handles the final cleanup, closing the open file:
function unexpected_eof()
@{
printf("extract: %s:%d: unexpected EOF or error\n",
- FILENAME, FNR) > "/dev/stderr"
+ FILENAME, FNR) > "/dev/stderr"
exit 1
@}
@end group
@@ -24871,6 +24922,7 @@ should be the @command{awk} program. If there are no command-line
arguments left, @command{igawk} prints an error message and exits.
Otherwise, the first argument is appended to @code{program}.
In any case, after the arguments have been processed,
+the shell variable
@code{program} contains the complete text of the original @command{awk}
program.
@@ -24993,8 +25045,8 @@ the path, and an attempt is made to open the generated @value{FN}.
The only way to test if a file can be read in @command{awk} is to go
ahead and try to read it with @code{getline}; this is what @code{pathto()}
does.@footnote{On some very old versions of @command{awk}, the test
-@samp{getline junk < t} can loop forever if the file exists but is empty.
-Caveat emptor.} If the file can be read, it is closed and the @value{FN}
+@samp{getline junk < t} can loop forever if the file exists but is empty.}
+If the file can be read, it is closed and the @value{FN}
is returned:
@ignore
@@ -25194,12 +25246,10 @@ in C or C++, and it is frequently easier to do certain kinds of string
and argument manipulation using the shell than it is in @command{awk}.
Finally, @command{igawk} shows that it is not always necessary to add new
-features to a program; they can often be layered on top.
-@ignore
-With @command{igawk},
-there is no real reason to build @code{@@include} processing into
-@command{gawk} itself.
-@end ignore
+features to a program; they can often be layered on top.@footnote{@command{gawk}
+does @code{@@include} processing itself in order to support the use
+of @command{awk} programs as Web CGI scripts.}
+
@c ENDOFRANGE libfex
@c ENDOFRANGE flibex
@c ENDOFRANGE awkpex
@@ -25217,12 +25267,11 @@ One word is an anagram of another if both words contain
the same letters
(for example, ``babbling'' and ``blabbing'').
-An elegant algorithm is presented in Column 2, Problem C of
-Jon Bentley's @cite{Programming Pearls}, second edition.
-The idea is to give words that are anagrams a common signature,
-sort all the words together by their signature, and then print them.
-Dr.@: Bentley observes that taking the letters in each word and
-sorting them produces that common signature.
+Column 2, Problem C of Jon Bentley's @cite{Programming Pearls}, second
+edition, presents an elegant algorithm. The idea is to give words that
+are anagrams a common signature, sort all the words together by their
+signature, and then print them. Dr.@: Bentley observes that taking the
+letters in each word and sorting them produces that common signature.
The following program uses arrays of arrays to bring together
words with the same signature and array sorting to print the words
@@ -25456,7 +25505,7 @@ BEGIN {
@itemize @value{BULLET}
@item
-The functions provided in this @value{CHAPTER} and the previous one
+The programs provided in this @value{CHAPTER}
continue on the theme that reading programs is an excellent way to learn
Good Programming.
@@ -25733,13 +25782,11 @@ discusses the ability to dynamically add new built-in functions to
@cindex constants, nondecimal
If you run @command{gawk} with the @option{--non-decimal-data} option,
-you can have nondecimal constants in your input data:
+you can have nondecimal values in your input data:
-@c line break here for small book format
@example
$ @kbd{echo 0123 123 0x123 |}
-> @kbd{gawk --non-decimal-data '@{ printf "%d, %d, %d\n",}
-> @kbd{$1, $2, $3 @}'}
+> @kbd{gawk --non-decimal-data '@{ printf "%d, %d, %d\n", $1, $2, $3 @}'}
@print{} 83, 123, 291
@end example
@@ -25780,6 +25827,8 @@ Instead, use the @code{strtonum()} function to convert your data
(@pxref{String Functions}).
This makes your programs easier to write and easier to read, and
leads to less surprising results.
+
+This option may disappear in a future version of @command{gawk}.
@end quotation
@node Array Sorting
@@ -25814,7 +25863,9 @@ pre-defined values to @code{PROCINFO["sorted_in"]} in order to
control the order in which @command{gawk} traverses an array
during a @code{for} loop.
-In addition, the value of @code{PROCINFO["sorted_in"]} can be a function name.
+In addition, the value of @code{PROCINFO["sorted_in"]} can be a
+function name.@footnote{This is why the predefined sorting orders
+start with an @samp{@@} character, which cannot be part of an identifier.}
This lets you traverse an array based on any custom criterion.
The array elements are ordered according to the return value of this
function. The comparison function should be defined with at least
@@ -25946,7 +25997,7 @@ according to login name. The following program sorts records
by a specific field position and can be used for this purpose:
@example
-# sort.awk --- simple program to sort by field position
+# passwd-sort.awk --- simple program to sort by field position
# field position is specified by the global variable POS
function cmp_field(i1, v1, i2, v2)
@@ -26005,7 +26056,7 @@ As mentioned above, the order of the indices is arbitrary if two
elements compare equal. This is usually not a problem, but letting
the tied elements come out in arbitrary order can be an issue, especially
when comparing item values. The partial ordering of the equal elements
-may change during the next loop traversal, if other elements are added or
+may change the next time the array is traversed, if other elements are added or
removed from the array. One way to resolve ties when comparing elements
with otherwise equal values is to include the indices in the comparison
rules. Note that doing this may make the loop traversal less efficient,
@@ -26174,7 +26225,6 @@ come into play; comparisons are based on character values only.@footnote{This
is true because locale-based comparison occurs only when in POSIX
compatibility mode, and since @code{asort()} and @code{asorti()} are
@command{gawk} extensions, they are not available in that case.}
-Caveat Emptor.
@node Two-way I/O
@section Two-Way Communications with Another Process
@@ -26240,7 +26290,7 @@ for example, @file{/tmp} will not do, as another user might happen
to be using a temporary file with the same name.@footnote{Michael
Brennan suggests the use of @command{rand()} to generate unique
@value{FN}s. This is a valid point; nevertheless, temporary files
-remain more difficult than two-way pipes.} @c 8/2014
+remain more difficult to use than two-way pipes.} @c 8/2014
@cindex coprocesses
@cindex input/output, two-way
@@ -26383,7 +26433,7 @@ using regular pipes.
@ @ @ @ @i{A host is a host from coast to coast,@*
@ @ @ @ and no-one can talk to host that's close,@*
@ @ @ @ unless the host that isn't close@*
-@ @ @ @ is busy hung or dead.}
+@ @ @ @ is busy, hung, or dead.}
@end quotation
@end ifnotdocbook
@@ -26393,7 +26443,7 @@ using regular pipes.
&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>A host is a host from coast to coast,</emphasis>
&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>and no-one can talk to host that's close,</emphasis>
&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>unless the host that isn't close</emphasis>
-&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>is busy hung or dead.</emphasis></literallayout>
+&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>is busy, hung, or dead.</emphasis></literallayout>
</blockquote>
@end docbook
@@ -26424,7 +26474,7 @@ the system default, most likely IPv4.
@item protocol
The protocol to use over IP. This must be either @samp{tcp}, or
@samp{udp}, for a TCP or UDP IP connection,
-respectively. The use of TCP is recommended for most applications.
+respectively. TCP should be used for most applications.
@item local-port
@cindex @code{getaddrinfo()} function (C library)
@@ -26457,10 +26507,10 @@ Consider the following very simple example:
@example
BEGIN @{
- Service = "/inet/tcp/0/localhost/daytime"
- Service |& getline
- print $0
- close(Service)
+ Service = "/inet/tcp/0/localhost/daytime"
+ Service |& getline
+ print $0
+ close(Service)
@}
@end example
@@ -26825,9 +26875,9 @@ those functions sort arrays. Or you may provide one of the predefined control
strings that work for @code{PROCINFO["sorted_in"]}.
@item
-You can use the @samp{|&} operator to create a two-way pipe to a co-process.
-You read from the co-process with @code{getline} and write to it with @code{print}
-or @code{printf}. Use @code{close()} to close off the co-process completely, or
+You can use the @samp{|&} operator to create a two-way pipe to a coprocess.
+You read from the coprocess with @code{getline} and write to it with @code{print}
+or @code{printf}. Use @code{close()} to close off the coprocess completely, or
optionally, close off one side of the two-way communications.
@item
@@ -34267,7 +34317,7 @@ for case translation
(@pxref{String Functions}).
@item
-A cleaner specification for the @samp{%c} format-control letter in the
+A cleaner specification for the @code{%c} format-control letter in the
@code{printf} function
(@pxref{Control Letters}).
@@ -36670,7 +36720,7 @@ need to use the @code{BINMODE} variable.
This can cause problems with other Unix-like components that have
been ported to MS-Windows that expect @command{gawk} to do automatic
-translation of @code{"\r\n"}, since it won't. Caveat Emptor!
+translation of @code{"\r\n"}, since it won't.
@node VMS Installation
@appendixsubsec How to Compile and Install @command{gawk} on Vax/VMS and OpenVMS
@@ -37139,10 +37189,8 @@ Date: Wed, 4 Sep 1996 08:11:48 -0700 (PDT)
@docbook
<blockquote><attribution>Michael Brennan</attribution>
-<literallayout>
-<emphasis>It's kind of fun to put comments like this in your awk code.</emphasis>
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<literal>// Do C++ comments work? answer: yes! of course</literal>
-</literallayout>
+<literallayout><emphasis>It's kind of fun to put comments like this in your awk code.</emphasis>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<literal>// Do C++ comments work? answer: yes! of course</literal></literallayout>
</blockquote>
@end docbook
@@ -40678,6 +40726,7 @@ Consistency issues:
Use --foo, not -Wfoo when describing long options
Use "Bell Laboratories", but not "Bell Labs".
Use "behavior" instead of "behaviour".
+ Use "coprocess" instead of "co-process".
Use "zeros" instead of "zeroes".
Use "nonzero" not "non-zero".
Use "runtime" not "run time" or "run-time".
@@ -40782,4 +40831,3 @@ But to use it you have to say
which sorta sucks.
TODO:
------
diff --git a/io.c b/io.c
index 7930904d..7154a710 100644
--- a/io.c
+++ b/io.c
@@ -1550,6 +1550,17 @@ nextrres:
* change the string.
*/
+/*
+ * 9/2014: Flow here is a little messy.
+ *
+ * For do_posix, we don't allow any of the special filenames.
+ *
+ * For do_traditional, we allow /dev/{stdin,stdout,stderr} since BWK awk
+ * (and mawk) support them. But we don't allow /dev/fd/N or /inet.
+ *
+ * Note that for POSIX systems os_devopen() is a no-op.
+ */
+
int
devopen(const char *name, const char *mode)
{
@@ -1565,7 +1576,7 @@ devopen(const char *name, const char *mode)
flag = str2mode(mode);
openfd = INVALID_HANDLE;
- if (do_traditional)
+ if (do_posix)
goto strictopen;
if ((openfd = os_devopen(name, flag)) != INVALID_HANDLE) {
@@ -1582,6 +1593,8 @@ devopen(const char *name, const char *mode)
openfd = fileno(stdout);
else if (strcmp(cp, "stderr") == 0 && (flag & O_ACCMODE) == O_WRONLY)
openfd = fileno(stderr);
+ else if (do_traditional)
+ goto strictopen;
else if (strncmp(cp, "fd/", 3) == 0) {
struct stat sbuf;
@@ -1594,6 +1607,8 @@ devopen(const char *name, const char *mode)
/* do not set close-on-exec for inherited fd's */
if (openfd != INVALID_HANDLE)
return openfd;
+ } else if (do_traditional) {
+ goto strictopen;
} else if (inetfile(name, & isi)) {
#ifdef HAVE_SOCKETS
cp = (char *) name;
diff --git a/pc/ChangeLog b/pc/ChangeLog
index a66edae9..235f520c 100644
--- a/pc/ChangeLog
+++ b/pc/ChangeLog
@@ -1,3 +1,7 @@
+2014-09-23 Scott Deifik <scottd.mail@sbcglobal.net>
+
+ * Makefile.tst: Sync with mainline.
+
2014-04-17 Scott Deifik <scottd.mail@sbcglobal.net>
* Makefile.tst: Add readfile2 test.
diff --git a/pc/Makefile.tst b/pc/Makefile.tst
index 610704e4..48fc5189 100644
--- a/pc/Makefile.tst
+++ b/pc/Makefile.tst
@@ -180,19 +180,19 @@ UNIX_TESTS = \
GAWK_EXT_TESTS = \
aadelete1 aadelete2 aarray1 aasort aasorti argtest arraysort \
backw badargs beginfile1 beginfile2 binmode1 charasbytes \
- colonwarn clos1way delsub devfd devfd1 devfd2 dumpvars exit \
+ colonwarn clos1way dbugeval delsub devfd devfd1 devfd2 dumpvars exit \
fieldwdth fpat1 fpat2 fpat3 fpatnull fsfwfs funlen \
functab1 functab2 functab3 fwtest fwtest2 fwtest3 \
gensub gensub2 getlndir gnuops2 gnuops3 gnureops \
icasefs icasers id igncdym igncfs ignrcas2 ignrcase \
incdupe incdupe2 incdupe3 incdupe4 incdupe5 incdupe6 incdupe7 \
- include include2 indirectcall \
+ include include2 indirectcall indirectcall2 \
lint lintold lintwarn \
manyfiles match1 match2 match3 mbstr1 \
nastyparm next nondec nondec2 \
- patsplit posix printfbad1 printfbad2 printfbad3 procinfs \
+ patsplit posix printfbad1 printfbad2 printfbad3 printhuge procinfs \
profile1 profile2 profile3 profile4 profile5 pty1 \
- rebuf regx8bit reginttrad reint reint2 rsstart1 \
+ rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin rsstart1 \
rsstart2 rsstart3 rstest6 shadow sortfor sortu split_after_fpat \
splitarg4 strftime \
strtonum switch2 symtab1 symtab2 symtab3 symtab4 symtab5 symtab6 \
@@ -201,8 +201,9 @@ GAWK_EXT_TESTS = \
EXTRA_TESTS = inftest regtest
INET_TESTS = inetdayu inetdayt inetechu inetecht
MACHINE_TESTS = double1 double2 fmtspcl intformat
-MPFR_TESTS = mpfrnr mpfrnegzero mpfrrnd mpfrieee mpfrexprange \
- mpfrsort mpfrbigint
+MPFR_TESTS = mpfrnr mpfrnegzero mpfrrem mpfrrnd mpfrieee mpfrexprange \
+ mpfrsort mpfrsqrt mpfrbigint
+
LOCALE_CHARSET_TESTS = \
asort asorti backbigs1 backsmalls1 backsmalls2 \
fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \
@@ -318,6 +319,10 @@ machine-msg-end:
charset-msg-start:
@echo "======== Starting tests that can vary based on character set or locale support ========"
+ @echo "************************************************"
+ @echo "** Some or all of these tests may fail if you **"
+ @echo "** have inadequate or missing locale support **"
+ @echo "************************************************"
charset-msg-end:
@echo "======== Done with tests that can vary based on character set or locale support ========"
@@ -352,7 +357,7 @@ poundbang::
@if ./_pbd.awk "$(srcdir)"/poundbang.awk > _`basename $@` ; \
then : ; \
else \
- sed "s;/tmp/gawk;../$(AWKPROG);" < "$(srcdir)"/poundbang.awk > ./_pbd.awk ; \
+ sed "s;/tmp/gawk;$(AWKPROG);" < "$(srcdir)"/poundbang.awk > ./_pbd.awk ; \
chmod +x ./_pbd.awk ; \
LC_ALL=$${GAWKLOCALE:-C} LANG=$${GAWKLOCALE:-C} ./_pbd.awk "$(srcdir)"/poundbang.awk > _`basename $@`; \
fi
@@ -503,6 +508,16 @@ fmtspcl: fmtspcl.ok
$(CMP) "$(srcdir)"/$@-mpfr.ok _$@ && rm -f _$@ ; \
fi
+rebuf::
+ @echo $@
+ @AWKBUFSIZE=4096 AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+rsglstdin::
+ @echo $@
+ @cat "$(srcdir)"/rsgetline.in | AWKPATH="$(srcdir)" $(AWK) -f rsgetline.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
reint::
@echo $@
@$(AWK) --re-interval -f "$(srcdir)"/reint.awk "$(srcdir)"/reint.in >_$@
@@ -932,6 +947,16 @@ mpfrbigint:
@$(AWK) -M -f "$(srcdir)"/$@.awk > _$@ 2>&1
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+mpfrsqrt:
+ @echo $@
+ @$(AWK) -M -f "$(srcdir)"/$@.awk > _$@ 2>&1
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+mpfrrem:
+ @echo $@
+ @$(AWK) -M -f "$(srcdir)"/$@.awk > _$@ 2>&1
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
jarebug::
@echo $@
@echo Expect jarebug to fail with DJGPP and MinGW.
@@ -1135,6 +1160,22 @@ backsmalls2:
@[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+dbugeval::
+ @echo $@
+ @$(AWK) --debug -f /dev/null < "$(srcdir)"/$@.in > _$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+printhuge::
+ @echo $@
+ @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
+ AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+filefuncs:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk -v builddir="$(abs_top_builddir)" >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
Gt-dummy:
# file Maketests, generated from Makefile.am by the Gentests program
addcomma:
@@ -2227,6 +2268,11 @@ indirectcall:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+indirectcall2:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
lint:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@@ -2299,9 +2345,14 @@ pty1:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
-rebuf:
+regnul1:
@echo $@
- @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+regnul2:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
regx8bit:
@@ -2309,6 +2360,11 @@ regx8bit:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+rsgetline:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
rstest6:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@@ -2445,11 +2501,6 @@ fnmatch:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
-filefuncs:
- @echo $@
- @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
- @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
-
fork:
@echo $@
@echo Expect $@ to fail with MinGW because fork.dll is not available
diff --git a/pc/config.h b/pc/config.h
index c5d78a96..a6b2d4c4 100644
--- a/pc/config.h
+++ b/pc/config.h
@@ -242,6 +242,9 @@
#define HAVE_STDLIB_H 1
#endif
+/* Define to 1 if you have the `strcasecmp' function. */
+#undef HAVE_STRCASECMP
+
/* Define to 1 if you have the `strchr' function. */
#define HAVE_STRCHR 1
@@ -407,6 +410,9 @@
/* Define to 1 if the system has the type `_Bool'. */
#undef HAVE__BOOL
+/* enable severe portability problems */
+#undef I_DONT_KNOW_WHAT_IM_DOING
+
/* libc is broken for regex handling */
#undef LIBC_IS_BORKED
diff --git a/profile.c b/profile.c
index 3c4c9181..ddb9c357 100644
--- a/profile.c
+++ b/profile.c
@@ -202,7 +202,7 @@ pprint(INSTRUCTION *startp, INSTRUCTION *endp, bool in_for_header)
if (do_profile) {
if (! rule_count[rule]++)
- fprintf(prof_fp, _("\t# %s block(s)\n\n"), ruletab[rule]);
+ fprintf(prof_fp, _("\t# %s rule(s)\n\n"), ruletab[rule]);
indent(0);
}
fprintf(prof_fp, "%s {\n", ruletab[rule]);
diff --git a/test/ChangeLog b/test/ChangeLog
index c40b803a..5a641b84 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,8 @@
+2014-09-27 Arnold D. Robbins <arnold@skeeve.com>
+
+ * profile2.ok, profile3.ok, profile4.ok, profile5.ok:
+ Adjusted after minor code change.
+
2014-09-18 Arnold D. Robbins <arnold@skeeve.com>
* filefuncs.awk: Change to build directory instead of "..".
diff --git a/test/profile2.ok b/test/profile2.ok
index 50c7e190..938d6858 100644
--- a/test/profile2.ok
+++ b/test/profile2.ok
@@ -1,4 +1,4 @@
- # BEGIN block(s)
+ # BEGIN rule(s)
BEGIN {
1 if (sortcmd == "") {
diff --git a/test/profile3.ok b/test/profile3.ok
index 50172c48..bbf06541 100644
--- a/test/profile3.ok
+++ b/test/profile3.ok
@@ -1,4 +1,4 @@
- # BEGIN block(s)
+ # BEGIN rule(s)
BEGIN {
1 the_func = "p"