diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2014-05-15 22:28:21 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2014-05-15 22:28:21 +0300 |
commit | 8b086817a7907d54dbe813f0dd05626b86e56cd1 (patch) | |
tree | 7e34a110a6e70581ffbebb3cbad8a1ff55a8f025 | |
parent | b34ea22faeecc99f81f4d897d5c4cc815eab2ddb (diff) | |
download | egawk-8b086817a7907d54dbe813f0dd05626b86e56cd1.tar.gz egawk-8b086817a7907d54dbe813f0dd05626b86e56cd1.tar.bz2 egawk-8b086817a7907d54dbe813f0dd05626b86e56cd1.zip |
Edits through Chapter 11, fix displays for docbook.
-rw-r--r-- | awklib/eg/lib/getopt.awk | 2 | ||||
-rw-r--r-- | awklib/eg/lib/gettime.awk | 2 | ||||
-rw-r--r-- | awklib/eg/lib/grcat.c | 2 | ||||
-rw-r--r-- | awklib/eg/lib/pwcat.c | 2 | ||||
-rw-r--r-- | awklib/eg/prog/cut.awk | 2 | ||||
-rw-r--r-- | awklib/eg/prog/egrep.awk | 4 | ||||
-rw-r--r-- | awklib/eg/prog/id.awk | 37 | ||||
-rw-r--r-- | awklib/eg/prog/split.awk | 5 | ||||
-rw-r--r-- | doc/ChangeLog | 4 | ||||
-rw-r--r-- | doc/gawk.info | 1210 | ||||
-rw-r--r-- | doc/gawk.texi | 310 | ||||
-rw-r--r-- | doc/gawktexi.in | 310 |
12 files changed, 964 insertions, 926 deletions
diff --git a/awklib/eg/lib/getopt.awk b/awklib/eg/lib/getopt.awk index 4283a7e1..db957ceb 100644 --- a/awklib/eg/lib/getopt.awk +++ b/awklib/eg/lib/getopt.awk @@ -70,7 +70,7 @@ BEGIN { # test program if (_getopt_test) { while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, optarg = <%s>\n", + printf("c = <%c>, Optarg = <%s>\n", _go_c, Optarg) printf("non-option arguments:\n") for (; Optind < ARGC; Optind++) diff --git a/awklib/eg/lib/gettime.awk b/awklib/eg/lib/gettime.awk index 4cb56330..3da9c8ab 100644 --- a/awklib/eg/lib/gettime.awk +++ b/awklib/eg/lib/gettime.awk @@ -31,7 +31,7 @@ function getlocaltime(time, ret, now, i) now = systime() # return date(1)-style output - ret = strftime("%a %b %e %H:%M:%S %Z %Y", now) + ret = strftime(PROCINFO["strftime"], now) # clear out target array delete time diff --git a/awklib/eg/lib/grcat.c b/awklib/eg/lib/grcat.c index ff2913a1..7d6b6a74 100644 --- a/awklib/eg/lib/grcat.c +++ b/awklib/eg/lib/grcat.c @@ -1,7 +1,7 @@ /* * grcat.c * - * Generate a printable version of the group database + * Generate a printable version of the group database. */ /* * Arnold Robbins, arnold@skeeve.com, May 1993 diff --git a/awklib/eg/lib/pwcat.c b/awklib/eg/lib/pwcat.c index 910e0329..934ef34e 100644 --- a/awklib/eg/lib/pwcat.c +++ b/awklib/eg/lib/pwcat.c @@ -1,7 +1,7 @@ /* * pwcat.c * - * Generate a printable version of the password database + * Generate a printable version of the password database. */ /* * Arnold Robbins, arnold@skeeve.com, May 1993 diff --git a/awklib/eg/prog/cut.awk b/awklib/eg/prog/cut.awk index 1399411e..09ba1f7c 100644 --- a/awklib/eg/prog/cut.awk +++ b/awklib/eg/prog/cut.awk @@ -43,7 +43,7 @@ BEGIN \ if (FS == " ") # defeat awk semantics FS = "[ ]" } else if (c == "s") - suppress++ + suppress = 1 else usage() } diff --git a/awklib/eg/prog/egrep.awk b/awklib/eg/prog/egrep.awk index 56d199c8..86b3cfda 100644 --- a/awklib/eg/prog/egrep.awk +++ b/awklib/eg/prog/egrep.awk @@ -90,9 +90,7 @@ function endfile(file) } END \ { - if (total == 0) - exit 1 - exit 0 + exit (total == 0) } function usage( e) { diff --git a/awklib/eg/prog/id.awk b/awklib/eg/prog/id.awk index 8b60a245..cf744447 100644 --- a/awklib/eg/prog/id.awk +++ b/awklib/eg/prog/id.awk @@ -5,6 +5,7 @@ # Arnold Robbins, arnold@skeeve.com, Public Domain # May 1993 # Revised February 1996 +# Revised May 2014 # output is: # uid=12(foo) euid=34(bar) gid=3(baz) \ @@ -19,34 +20,26 @@ BEGIN \ printf("uid=%d", uid) pw = getpwuid(uid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) if (euid != uid) { printf(" euid=%d", euid) pw = getpwuid(euid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) } printf(" gid=%d", gid) pw = getgrgid(gid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) if (egid != gid) { printf(" egid=%d", egid) pw = getgrgid(egid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) } for (i = 1; ("group" i) in PROCINFO; i++) { @@ -55,13 +48,17 @@ BEGIN \ group = PROCINFO["group" i] printf("%d", group) pw = getgrgid(group) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) if (("group" (i+1)) in PROCINFO) printf(",") } print "" } + +function pr_first_field(str, a) +{ + split(str, a, ":") + printf("(%s)", a[1]) +} diff --git a/awklib/eg/prog/split.awk b/awklib/eg/prog/split.awk index c907530b..bcc73ae6 100644 --- a/awklib/eg/prog/split.awk +++ b/awklib/eg/prog/split.awk @@ -4,8 +4,9 @@ # # Arnold Robbins, arnold@skeeve.com, Public Domain # May 1993 +# Revised slightly, May 2014 -# usage: split [-num] [file] [outname] +# usage: split [-count] [file] [outname] BEGIN { outfile = "x" # default @@ -14,7 +15,7 @@ BEGIN { usage() i = 1 - if (ARGV[i] ~ /^-[[:digit:]]+$/) { + if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) { count = -ARGV[i] ARGV[i] = "" i++ diff --git a/doc/ChangeLog b/doc/ChangeLog index ba4aa094..c0c382d3 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +2014-05-15 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Fix displays for docbook, edits through Chapter 11. + 2014-05-14 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Fix real preface for docbook. diff --git a/doc/gawk.info b/doc/gawk.info index d725b73e..a9fcc117 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -2276,8 +2276,8 @@ There are two ways to run `awk'--with an explicit program or with one or more program files. Here are templates for both of them; items enclosed in [...] in these templates are optional: - awk [OPTIONS] -f progfile [`--'] FILE ... - awk [OPTIONS] [`--'] 'PROGRAM' FILE ... + `awk' [OPTIONS] `-f' PROGFILE [`--'] FILE ... + `awk' [OPTIONS] [`--'] `'PROGRAM'' FILE ... Besides traditional one-letter POSIX-style options, `gawk' also supports GNU long options. @@ -8933,10 +8933,10 @@ which (but not both) may be omitted. The purpose of the "action" is to tell `awk' what to do once a match for the pattern is found. Thus, in outline, an `awk' program generally looks like this: - [PATTERN] { ACTION } - PATTERN [{ ACTION }] + [PATTERN] `{ ACTION }' + PATTERN [`{ ACTION }'] ... - function NAME(ARGS) { ... } + `function NAME(ARGS) { ... }' ... An action consists of one or more `awk' "statements", enclosed in @@ -9024,7 +9024,7 @@ File: gawk.info, Node: If Statement, Next: While Statement, Up: Statements The `if'-`else' statement is `awk''s decision-making statement. It looks like this: - if (CONDITION) THEN-BODY [else ELSE-BODY] + `if (CONDITION) THEN-BODY' [`else ELSE-BODY'] The CONDITION is an expression that controls what the rest of the statement does. If the CONDITION is true, THEN-BODY is executed; @@ -9507,7 +9507,7 @@ The `exit' statement causes `awk' to immediately stop executing the current rule and to stop processing input; any remaining input is ignored. The `exit' statement is written as follows: - exit [RETURN CODE] + `exit' [RETURN CODE] When an `exit' statement is executed from a `BEGIN' rule, the program stops processing everything immediately. No input records are @@ -12786,10 +12786,10 @@ starting to execute any of it. The definition of a function named NAME looks like this: - function NAME([PARAMETER-LIST]) - { + `function' NAME`('[PARAMETER-LIST]`)' + `{' BODY-OF-FUNCTION - } + `}' Here, NAME is the name of the function to define. A valid function name is like a valid variable name: a sequence of letters, digits, and @@ -13232,7 +13232,7 @@ control to the calling part of the `awk' program. It can also be used to return a value for use in the rest of the `awk' program. It looks like this: - return [EXPRESSION] + `return' [EXPRESSION] The EXPRESSION part is optional. Due most likely to an oversight, POSIX does not define what the return value is if you omit the @@ -14250,7 +14250,7 @@ current time formatted in the same way as the `date' utility: now = systime() # return date(1)-style output - ret = strftime("%a %b %e %H:%M:%S %Z %Y", now) + ret = strftime(PROCINFO["strftime"], now) # clear out target array delete time @@ -14510,8 +14510,8 @@ File: gawk.info, Node: File Checking, Next: Empty Files, Prev: Rewind Functio Normally, if you give `awk' a data file that isn't readable, it stops with a fatal error. There are times when you might want to just ignore -such files and keep going. You can do this by prepending the following -program to your `awk' program: +such files and keep going.(1) You can do this by prepending the +following program to your `awk' program: # readable.awk --- library file to skip over unreadable files @@ -14531,10 +14531,16 @@ program to your `awk' program: element from `ARGV' with `delete' skips the file (since it's no longer in the list). See also *note ARGC and ARGV::. + ---------- Footnotes ---------- + + (1) The `BEGINFILE' special pattern (*note BEGINFILE/ENDFILE::) +provides an alternative mechanism for dealing with files that can't be +opened. However, the code here provides a portable solution. + File: gawk.info, Node: Empty Files, Next: Ignoring Assigns, Prev: File Checking, Up: Data File Management -10.3.4 Checking For Zero-length Files +10.3.4 Checking for Zero-length Files ------------------------------------- All known `awk' implementations silently skip over zero-length files. @@ -14879,7 +14885,7 @@ is in `ARGV[0]': # test program if (_getopt_test) { while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, optarg = <%s>\n", + printf("c = <%c>, Optarg = <%s>\n", _go_c, Optarg) printf("non-option arguments:\n") for (; Optind < ARGC; Optind++) @@ -14892,17 +14898,17 @@ is in `ARGV[0]': result of two sample runs of the test program: $ awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x - -| c = <a>, optarg = <> - -| c = <c>, optarg = <> - -| c = <b>, optarg = <ARG> + -| c = <a>, Optarg = <> + -| c = <c>, Optarg = <> + -| c = <b>, Optarg = <ARG> -| non-option arguments: -| ARGV[3] = <bax> -| ARGV[4] = <-x> $ awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc - -| c = <a>, optarg = <> + -| c = <a>, Optarg = <> error--> x -- invalid option - -| c = <?>, optarg = <> + -| c = <?>, Optarg = <> -| non-option arguments: -| ARGV[4] = <xyz> -| ARGV[5] = <abc> @@ -14961,7 +14967,7 @@ that "cats" the password database: /* * pwcat.c * - * Generate a printable version of the password database + * Generate a printable version of the password database. */ #include <stdio.h> #include <pwd.h> @@ -15186,7 +15192,7 @@ group database, is as follows: /* * grcat.c * - * Generate a printable version of the group database + * Generate a printable version of the group database. */ #include <stdio.h> #include <grp.h> @@ -15222,9 +15228,10 @@ Group Password used; it is usually empty or set to `*'. Group ID Number - The group's numeric group ID number; this number must be unique - within the file. (On some systems it's a C `long', and not an - `int'. Thus we cast it to `long' for all cases.) + The group's numeric group ID number; the association of name to + number must be unique within the file. (On some systems it's a C + `long', and not an `int'. Thus we cast it to `long' for all + cases.) Group Member List A comma-separated list of user names. These users are members of @@ -15333,10 +15340,7 @@ following: For this reason, `_gr_init()' looks to see if a group name or group ID number is already seen. If it is, then the user names are simply -concatenated onto the previous list of users. (There is actually a -subtle problem with the code just presented. Suppose that the first -time there were no names. This code adds the names with a leading -comma. It also doesn't check that there is a `$4'.) +concatenated onto the previous list of users.(1) Finally, `_gr_init()' closes the pipeline to `grcat', restores `FS' (and `FIELDWIDTHS' or `FPAT' if necessary), `RS', and `$0', initializes @@ -15401,6 +15405,12 @@ very simple, relying on `awk''s associative arrays to do work. The `id' program in *note Id Program::, uses these functions. + ---------- Footnotes ---------- + + (1) There is actually a subtle problem with the code just presented. +Suppose that the first time there were no names. This code adds the +names with a leading comma. It also doesn't check that there is a `$4'. + File: gawk.info, Node: Walking Arrays, Prev: Group Functions, Up: Library Functions @@ -15637,7 +15647,7 @@ by characters, the output field separator is set to the null string: if (FS == " ") # defeat awk semantics FS = "[ ]" } else if (c == "s") - suppress++ + suppress = 1 else usage() } @@ -15806,7 +15816,7 @@ The `egrep' utility searches files for patterns. It uses regular expressions that are almost identical to those available in `awk' (*note Regexp::). You invoke it as follows: - egrep [ OPTIONS ] 'PATTERN' FILES ... + `egrep' [OPTIONS] `'PATTERN'' FILES ... The PATTERN is a regular expression. In typical usage, the regular expression is quoted to prevent the shell from expanding any of the @@ -15950,6 +15960,11 @@ know the total number of lines that matched the pattern: total += fcount } + The `BEGINFILE' and `ENDFILE' special patterns (*note +BEGINFILE/ENDFILE::) could be used, but then the program would be +`gawk'-specific. Additionally, this example was written before `gawk' +acquired `BEGINFILE' and `ENDFILE'. + The following rule does most of the work of matching lines. The variable `matches' is true if the line matched the pattern. If the user wants lines that did not match, the sense of `matches' is inverted @@ -15997,9 +16012,7 @@ there are no matches, the exit status is one; otherwise it is zero: END \ { - if (total == 0) - exit 1 - exit 0 + exit (total == 0) } The `usage()' function prints a usage message in case of invalid @@ -16041,7 +16054,7 @@ different from the real ones. If possible, `id' also supplies the corresponding user and group names. The output might look like this: $ id - -| uid=500(arnold) gid=500(arnold) groups=6(disk),7(lp),19(floppy) + -| uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo) This information is part of what is provided by `gawk''s `PROCINFO' array (*note Built-in Variables::). However, the `id' utility provides @@ -16074,34 +16087,26 @@ and the group numbers: printf("uid=%d", uid) pw = getpwuid(uid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) if (euid != uid) { printf(" euid=%d", euid) pw = getpwuid(euid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) } printf(" gid=%d", gid) pw = getgrgid(gid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) if (egid != gid) { printf(" egid=%d", egid) pw = getgrgid(egid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) } for (i = 1; ("group" i) in PROCINFO; i++) { @@ -16110,10 +16115,8 @@ and the group numbers: group = PROCINFO["group" i] printf("%d", group) pw = getgrgid(group) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) if (("group" (i+1)) in PROCINFO) printf(",") } @@ -16121,6 +16124,12 @@ and the group numbers: print "" } + function pr_first_field(str, a) + { + split(str, a, ":") + printf("(%s)", a[1]) + } + The test in the `for' loop is worth noting. Any supplementary groups in the `PROCINFO' array have the indices `"group1"' through `"groupN"' for some N, i.e., the total number of supplementary groups. @@ -16135,6 +16144,10 @@ the last group in the array and the loop exits. then the condition is false the first time it's tested, and the loop body never executes. + The `pr_first_field()' function simply isolates out some code that +is used repeatedly, making the whole program slightly shorter and +cleaner. + File: gawk.info, Node: Split Program, Next: Tee Program, Prev: Id Program, Up: Clones @@ -16144,7 +16157,7 @@ File: gawk.info, Node: Split Program, Next: Tee Program, Prev: Id Program, U The `split' program splits large text files into smaller pieces. Usage is as follows:(1) - split [-COUNT] file [ PREFIX ] + `split' [`-COUNT'] [FILE] [PREFIX] By default, the output files are named `xaa', `xab', and so on. Each file has 1000 lines in it, with the likely exception of the last file. @@ -16168,7 +16181,7 @@ output file names: # split.awk --- do split in awk # # Requires ord() and chr() library functions - # usage: split [-num] [file] [outname] + # usage: split [-count] [file] [outname] BEGIN { outfile = "x" # default @@ -16177,7 +16190,7 @@ output file names: usage() i = 1 - if (ARGV[i] ~ /^-[[:digit:]]+$/) { + if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) { count = -ARGV[i] ARGV[i] = "" i++ @@ -16253,7 +16266,7 @@ The `tee' program is known as a "pipe fitting." `tee' copies its standard input to its standard output and also duplicates it to the files named on the command line. Its usage is as follows: - tee [-a] file ... + `tee' [`-a'] FILE ... The `-a' option tells `tee' to append to the named files, instead of truncating them and starting over. @@ -16342,7 +16355,7 @@ and by default removes duplicate lines. In other words, it only prints unique lines--hence the name. `uniq' has a number of options. The usage is as follows: - uniq [-udc [-N]] [+N] [ INPUT FILE [ OUTPUT FILE ]] + `uniq' [`-udc' [`-N']] [`+N'] [INPUTFILE [OUTPUTFILE]] The options for `uniq' are: @@ -16365,11 +16378,11 @@ usage is as follows: Skip N characters before comparing lines. Any fields specified with `-N' are skipped first. -`INPUT FILE' +`INPUTFILE' Data is read from the input file named on the command line, instead of from the standard input. -`OUTPUT FILE' +`OUTPUTFILE' The generated output is sent to the named output file, instead of to the standard output. @@ -16559,7 +16572,7 @@ File: gawk.info, Node: Wc Program, Prev: Uniq Program, Up: Clones The `wc' (word count) utility counts lines, words, and characters in one or more input files. Its usage is as follows: - wc [-lwc] [ FILES ... ] + `wc' [`-lwc'] [FILES ...] If no files are specified on the command line, `wc' reads its standard input. If there are multiple files, it also prints total @@ -16925,11 +16938,11 @@ there are more characters in the "from" list than in the "to" list, the last character of the "to" list is used for the remaining characters in the "from" list. - Some time ago, a user proposed that a transliteration function should -be added to `gawk'. The following program was written to prove that -character transliteration could be done with a user-level function. -This program is not as complete as the system `tr' utility but it does -most of the job. + Once upon a time, a user proposed that a transliteration function +should be added to `gawk'. The following program was written to prove +that character transliteration could be done with a user-level +function. This program is not as complete as the system `tr' utility +but it does most of the job. The `translate' program demonstrates one of the few weaknesses of standard `awk': dealing with individual characters is very painful, @@ -17010,8 +17023,8 @@ record: While it is possible to do character transliteration in a user-level function, it is not necessarily efficient, and we (the `gawk' authors) started to consider adding a built-in function. However, shortly after -writing this program, we learned that the System V Release 4 `awk' had -added the `toupper()' and `tolower()' functions (*note String +writing this program, we learned that Brian Kernighan had added the +`toupper()' and `tolower()' functions to his `awk' (*note String Functions::). These functions handle the vast majority of the cases where character transliteration is necessary, and so we chose to simply add those functions to `gawk' as well and then leave well enough alone. @@ -17023,10 +17036,10 @@ program. ---------- Footnotes ---------- - (1) On some older systems, including Solaris, `tr' may require that -the lists be written as range expressions enclosed in square brackets -(`[a-z]') and quoted, to prevent the shell from attempting a file name -expansion. This is not a feature. + (1) On some older systems, including Solaris, the system version of +`tr' may require that the lists be written as range expressions +enclosed in square brackets (`[a-z]') and quoted, to prevent the shell +from attempting a file name expansion. This is not a feature. (2) This program was written before `gawk' acquired the ability to split each character in a string into separate array elements. @@ -17146,7 +17159,7 @@ File: gawk.info, Node: Word Sorting, Next: History Sorting, Prev: Labels Prog When working with large amounts of text, it can be interesting to know how often different words appear. For example, an author may overuse -certain words, in which case she might wish to find synonyms to +certain words, in which case he or she might wish to find synonyms to substitute for words that appear too often. This node develops a program for counting words and presenting the frequency information in a useful format. @@ -17209,6 +17222,10 @@ script. Here is the new version of the program: printf "%s\t%d\n", word, freq[word] } + The regexp `/[^[:alnum:]_[:blank:]]/' might have been written +`/[[:punct:]]/', but then underscores would also be removed, and we +want to keep them. + Assuming we have saved this program in a file named `wordfreq.awk', and that the data is in `file1', the following pipeline: @@ -17286,8 +17303,7 @@ information. For example, using the following `print' statement in the print data[lines[i]], lines[i] - This works because `data[$0]' is incremented each time a line is -seen. +This works because `data[$0]' is incremented each time a line is seen. File: gawk.info, Node: Extract Program, Next: Simple Sed, Prev: History Sorting, Up: Miscellaneous Programs @@ -17418,8 +17434,9 @@ elements (`@@' in the original file), we have to add a single `@' symbol back in.(1) When the processing of the array is finished, `join()' is called -with the value of `SUBSEP', to rejoin the pieces back into a single -line. That line is then printed to the output file: +with the value of `SUBSEP' (*note Multidimensional::), to rejoin the +pieces back into a single line. That line is then printed to the +output file: /^@c(omment)?[ \t]+file/ \ { @@ -17488,7 +17505,7 @@ closing the open file: ---------- Footnotes ---------- (1) This program was written before `gawk' had the `gensub()' -function. Consider how you might use it to simplify the code. +function. Consider how you might use it to simplify the code. File: gawk.info, Node: Simple Sed, Next: Igawk Program, Prev: Extract Program, Up: Miscellaneous Programs @@ -17827,12 +17844,12 @@ which represents the current directory: pathlist[i] = "." } - The stack is initialized with `ARGV[1]', which will be `/dev/stdin'. -The main loop comes next. Input lines are read in succession. Lines -that do not start with `@include' are printed verbatim. If the line -does start with `@include', the file name is in `$2'. `pathto()' is -called to generate the full path. If it cannot, then the program -prints an error message and continues. + The stack is initialized with `ARGV[1]', which will be +`"/dev/stdin"'. The main loop comes next. Input lines are read in +succession. Lines that do not start with `@include' are printed +verbatim. If the line does start with `@include', the file name is in +`$2'. `pathto()' is called to generate the full path. If it cannot, +then the program prints an error message and continues. The next thing to check is if the file is included already. The `processed' array is indexed by the full file name of each included @@ -17909,7 +17926,7 @@ supplied. The `eval' command is a shell construct that reruns the shell's parsing process. This keeps things properly quoted. - This version of `igawk' represents my fifth version of this program. + This version of `igawk' represents the fifth version of this program. There are four key simplifications that make the program work better: * Using `@include' even for the files named with `-f' makes building @@ -18083,7 +18100,9 @@ supplies the following copyright terms: X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O, O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O}' - We leave it to you to determine what the program does. + We leave it to you to determine what the program does. (If you are +truly desperate to understand it, see Chris Johansen's explanation, +which is embedded in the Texinfo source file for this Info file.) File: gawk.info, Node: Advanced Features, Next: Internationalization, Prev: Sample Programs, Up: Top @@ -20413,7 +20432,7 @@ categories, as follows: Program::) demonstrates: gawk> dump - -| # BEGIN + -| # BEGIN -| -| [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk] -| [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR] @@ -30122,7 +30141,7 @@ Index * Menu: * ! (exclamation point), ! operator: Boolean Ops. (line 67) -* ! (exclamation point), ! operator <1>: Egrep Program. (line 170) +* ! (exclamation point), ! operator <1>: Egrep Program. (line 175) * ! (exclamation point), ! operator <2>: Ranges. (line 48) * ! (exclamation point), ! operator: Precedence. (line 52) * ! (exclamation point), != operator <1>: Precedence. (line 65) @@ -30358,7 +30377,7 @@ Index (line 38) * \ (backslash), as field separator: Command Line Field Separator. (line 27) -* \ (backslash), continuing lines and <1>: Egrep Program. (line 220) +* \ (backslash), continuing lines and <1>: Egrep Program. (line 223) * \ (backslash), continuing lines and: Statements/Lines. (line 19) * \ (backslash), continuing lines and, comments and: Statements/Lines. (line 76) @@ -30386,7 +30405,7 @@ Index * _ (underscore), in names of private variables: Library Names. (line 29) * _ (underscore), translatable string: Programmer i18n. (line 69) -* _gr_init() user-defined function: Group Functions. (line 82) +* _gr_init() user-defined function: Group Functions. (line 83) * _ord_init() user-defined function: Ordinal Functions. (line 16) * _pw_init() user-defined function: Passwd Functions. (line 105) * accessing fields: Fields. (line 6) @@ -30632,7 +30651,7 @@ Index (line 38) * backslash (\), as field separator: Command Line Field Separator. (line 27) -* backslash (\), continuing lines and <1>: Egrep Program. (line 220) +* backslash (\), continuing lines and <1>: Egrep Program. (line 223) * backslash (\), continuing lines and: Statements/Lines. (line 19) * backslash (\), continuing lines and, comments and: Statements/Lines. (line 76) @@ -31288,7 +31307,7 @@ Index * END pattern, and profiling: Profiling. (line 62) * END pattern, assert() user-defined function and: Assert Function. (line 75) -* END pattern, backslash continuation and: Egrep Program. (line 220) +* END pattern, backslash continuation and: Egrep Program. (line 223) * END pattern, Boolean patterns and: Expression Patterns. (line 70) * END pattern, exit statement and: Exit Statement. (line 12) * END pattern, next/nextfile statements and <1>: Next Statement. @@ -31300,8 +31319,8 @@ Index * ENDFILE pattern: BEGINFILE/ENDFILE. (line 6) * ENDFILE pattern, Boolean patterns and: Expression Patterns. (line 70) * endfile() user-defined function: Filetrans Function. (line 62) -* endgrent() function (C library): Group Functions. (line 215) -* endgrent() user-defined function: Group Functions. (line 218) +* endgrent() function (C library): Group Functions. (line 213) +* endgrent() user-defined function: Group Functions. (line 216) * endpwent() function (C library): Passwd Functions. (line 210) * endpwent() user-defined function: Passwd Functions. (line 213) * ENVIRON array: Auto-set. (line 60) @@ -31334,7 +31353,7 @@ Index * evaluation order, concatenation: Concatenation. (line 41) * evaluation order, functions: Calling Built-in. (line 30) * examining fields: Fields. (line 6) -* exclamation point (!), ! operator <1>: Egrep Program. (line 170) +* exclamation point (!), ! operator <1>: Egrep Program. (line 175) * exclamation point (!), ! operator <2>: Precedence. (line 52) * exclamation point (!), ! operator: Boolean Ops. (line 67) * exclamation point (!), != operator <1>: Precedence. (line 65) @@ -31730,15 +31749,15 @@ Index * getaddrinfo() function (C library): TCP/IP Networking. (line 38) * getgrent() function (C library): Group Functions. (line 6) * getgrent() user-defined function: Group Functions. (line 6) -* getgrgid() function (C library): Group Functions. (line 186) -* getgrgid() user-defined function: Group Functions. (line 189) -* getgrnam() function (C library): Group Functions. (line 175) -* getgrnam() user-defined function: Group Functions. (line 180) -* getgruser() function (C library): Group Functions. (line 195) -* getgruser() function, user-defined: Group Functions. (line 198) +* getgrgid() function (C library): Group Functions. (line 184) +* getgrgid() user-defined function: Group Functions. (line 187) +* getgrnam() function (C library): Group Functions. (line 173) +* getgrnam() user-defined function: Group Functions. (line 178) +* getgruser() function (C library): Group Functions. (line 193) +* getgruser() function, user-defined: Group Functions. (line 196) * getline command: Reading Files. (line 20) * getline command, _gr_init() user-defined function: Group Functions. - (line 82) + (line 83) * getline command, _pw_init() function: Passwd Functions. (line 154) * getline command, coprocesses, using from <1>: Close Files And Pipes. (line 6) @@ -31939,6 +31958,7 @@ Index * Java programming language: Glossary. (line 380) * jawk: Other Versions. (line 112) * Jedi knights: Undocumented. (line 6) +* Johansen, Chris: Signature Program. (line 25) * join() user-defined function: Join Function. (line 18) * Kahrs, Ju"rgen <1>: Contributors. (line 70) * Kahrs, Ju"rgen: Acknowledgments. (line 60) @@ -33237,494 +33257,496 @@ Node: Other Features105547 Node: When106475 Node: Invoking Gawk108623 Node: Command Line110086 -Node: Options110869 -Ref: Options-Footnote-1126681 -Node: Other Arguments126706 -Node: Naming Standard Input129368 -Node: Environment Variables130462 -Node: AWKPATH Variable131020 -Ref: AWKPATH Variable-Footnote-1133798 -Ref: AWKPATH Variable-Footnote-2133843 -Node: AWKLIBPATH Variable134103 -Node: Other Environment Variables134862 -Node: Exit Status138517 -Node: Include Files139192 -Node: Loading Shared Libraries142770 -Node: Obsolete144153 -Node: Undocumented144850 -Node: Regexp145092 -Node: Regexp Usage146481 -Node: Escape Sequences148514 -Node: Regexp Operators154181 -Ref: Regexp Operators-Footnote-1161661 -Ref: Regexp Operators-Footnote-2161808 -Node: Bracket Expressions161906 -Ref: table-char-classes163796 -Node: GNU Regexp Operators166319 -Node: Case-sensitivity170042 -Ref: Case-sensitivity-Footnote-1172934 -Ref: Case-sensitivity-Footnote-2173169 -Node: Leftmost Longest173277 -Node: Computed Regexps174478 -Node: Reading Files177827 -Node: Records179829 -Node: awk split records180564 -Node: gawk split records185422 -Ref: gawk split records-Footnote-1189943 -Node: Fields189980 -Ref: Fields-Footnote-1192944 -Node: Nonconstant Fields193030 -Ref: Nonconstant Fields-Footnote-1195260 -Node: Changing Fields195462 -Node: Field Separators201416 -Node: Default Field Splitting204118 -Node: Regexp Field Splitting205235 -Node: Single Character Fields208576 -Node: Command Line Field Separator209635 -Node: Full Line Fields212977 -Ref: Full Line Fields-Footnote-1213485 -Node: Field Splitting Summary213531 -Ref: Field Splitting Summary-Footnote-1216630 -Node: Constant Size216731 -Node: Splitting By Content221338 -Ref: Splitting By Content-Footnote-1225088 -Node: Multiple Line225128 -Ref: Multiple Line-Footnote-1230984 -Node: Getline231163 -Node: Plain Getline233379 -Node: Getline/Variable235474 -Node: Getline/File236621 -Node: Getline/Variable/File238005 -Ref: Getline/Variable/File-Footnote-1239604 -Node: Getline/Pipe239691 -Node: Getline/Variable/Pipe242390 -Node: Getline/Coprocess243497 -Node: Getline/Variable/Coprocess244749 -Node: Getline Notes245486 -Node: Getline Summary248290 -Ref: table-getline-variants248698 -Node: Read Timeout249610 -Ref: Read Timeout-Footnote-1253437 -Node: Command line directories253495 -Node: Printing254377 -Node: Print256008 -Node: Print Examples257349 -Node: Output Separators260128 -Node: OFMT262144 -Node: Printf263502 -Node: Basic Printf264408 -Node: Control Letters265947 -Node: Format Modifiers269801 -Node: Printf Examples275828 -Node: Redirection278535 -Node: Special Files285507 -Node: Special FD286040 -Ref: Special FD-Footnote-1289664 -Node: Special Network289738 -Node: Special Caveats290588 -Node: Close Files And Pipes291384 -Ref: Close Files And Pipes-Footnote-1298522 -Ref: Close Files And Pipes-Footnote-2298670 -Node: Expressions298820 -Node: Values299952 -Node: Constants300628 -Node: Scalar Constants301308 -Ref: Scalar Constants-Footnote-1302167 -Node: Nondecimal-numbers302417 -Node: Regexp Constants305417 -Node: Using Constant Regexps305892 -Node: Variables308962 -Node: Using Variables309617 -Node: Assignment Options311341 -Node: Conversion313216 -Ref: table-locale-affects318652 -Ref: Conversion-Footnote-1319276 -Node: All Operators319385 -Node: Arithmetic Ops320015 -Node: Concatenation322520 -Ref: Concatenation-Footnote-1325316 -Node: Assignment Ops325436 -Ref: table-assign-ops330419 -Node: Increment Ops331736 -Node: Truth Values and Conditions335174 -Node: Truth Values336257 -Node: Typing and Comparison337306 -Node: Variable Typing338099 -Ref: Variable Typing-Footnote-1341999 -Node: Comparison Operators342121 -Ref: table-relational-ops342531 -Node: POSIX String Comparison346079 -Ref: POSIX String Comparison-Footnote-1347163 -Node: Boolean Ops347301 -Ref: Boolean Ops-Footnote-1351371 -Node: Conditional Exp351462 -Node: Function Calls353189 -Node: Precedence356947 -Node: Locales360616 -Node: Patterns and Actions362219 -Node: Pattern Overview363273 -Node: Regexp Patterns364950 -Node: Expression Patterns365493 -Node: Ranges369274 -Node: BEGIN/END372380 -Node: Using BEGIN/END373142 -Ref: Using BEGIN/END-Footnote-1375878 -Node: I/O And BEGIN/END375984 -Node: BEGINFILE/ENDFILE378269 -Node: Empty381205 -Node: Using Shell Variables381522 -Node: Action Overview383805 -Node: Statements386150 -Node: If Statement388004 -Node: While Statement389503 -Node: Do Statement391547 -Node: For Statement392703 -Node: Switch Statement395855 -Node: Break Statement397958 -Node: Continue Statement400013 -Node: Next Statement401806 -Node: Nextfile Statement404196 -Node: Exit Statement406851 -Node: Built-in Variables409253 -Node: User-modified410349 -Ref: User-modified-Footnote-1418034 -Node: Auto-set418096 -Ref: Auto-set-Footnote-1430661 -Ref: Auto-set-Footnote-2430866 -Node: ARGC and ARGV430922 -Node: Arrays434776 -Node: Array Basics436274 -Node: Array Intro437100 -Ref: figure-array-elements439073 -Node: Reference to Elements441480 -Node: Assigning Elements443753 -Node: Array Example444244 -Node: Scanning an Array445976 -Node: Controlling Scanning448991 -Ref: Controlling Scanning-Footnote-1454164 -Node: Delete454480 -Ref: Delete-Footnote-1457245 -Node: Numeric Array Subscripts457302 -Node: Uninitialized Subscripts459485 -Node: Multidimensional461110 -Node: Multiscanning464203 -Node: Arrays of Arrays465792 -Node: Functions470432 -Node: Built-in471251 -Node: Calling Built-in472329 -Node: Numeric Functions474317 -Ref: Numeric Functions-Footnote-1478151 -Ref: Numeric Functions-Footnote-2478508 -Ref: Numeric Functions-Footnote-3478556 -Node: String Functions478825 -Ref: String Functions-Footnote-1501836 -Ref: String Functions-Footnote-2501965 -Ref: String Functions-Footnote-3502213 -Node: Gory Details502300 -Ref: table-sub-escapes503969 -Ref: table-sub-posix-92505323 -Ref: table-sub-proposed506674 -Ref: table-posix-sub508028 -Ref: table-gensub-escapes509573 -Ref: Gory Details-Footnote-1510749 -Ref: Gory Details-Footnote-2510800 -Node: I/O Functions510951 -Ref: I/O Functions-Footnote-1518074 -Node: Time Functions518221 -Ref: Time Functions-Footnote-1528685 -Ref: Time Functions-Footnote-2528753 -Ref: Time Functions-Footnote-3528911 -Ref: Time Functions-Footnote-4529022 -Ref: Time Functions-Footnote-5529134 -Ref: Time Functions-Footnote-6529361 -Node: Bitwise Functions529627 -Ref: table-bitwise-ops530189 -Ref: Bitwise Functions-Footnote-1534434 -Node: Type Functions534618 -Node: I18N Functions535760 -Node: User-defined537405 -Node: Definition Syntax538209 -Ref: Definition Syntax-Footnote-1543124 -Node: Function Example543193 -Ref: Function Example-Footnote-1545837 -Node: Function Caveats545859 -Node: Calling A Function546377 -Node: Variable Scope547332 -Node: Pass By Value/Reference550320 -Node: Return Statement553828 -Node: Dynamic Typing556810 -Node: Indirect Calls557739 -Node: Library Functions567426 -Ref: Library Functions-Footnote-1570939 -Ref: Library Functions-Footnote-2571082 -Node: Library Names571253 -Ref: Library Names-Footnote-1574726 -Ref: Library Names-Footnote-2574946 -Node: General Functions575032 -Node: Strtonum Function576060 -Node: Assert Function578990 -Node: Round Function582316 -Node: Cliff Random Function583857 -Node: Ordinal Functions584873 -Ref: Ordinal Functions-Footnote-1587950 -Ref: Ordinal Functions-Footnote-2588202 -Node: Join Function588413 -Ref: Join Function-Footnote-1590184 -Node: Getlocaltime Function590384 -Node: Readfile Function594125 -Node: Data File Management595964 -Node: Filetrans Function596596 -Node: Rewind Function600665 -Node: File Checking602052 -Node: Empty Files603146 -Node: Ignoring Assigns605376 -Node: Getopt Function606930 -Ref: Getopt Function-Footnote-1618233 -Node: Passwd Functions618436 -Ref: Passwd Functions-Footnote-1627414 -Node: Group Functions627502 -Node: Walking Arrays635586 -Node: Sample Programs637722 -Node: Running Examples638396 -Node: Clones639124 -Node: Cut Program640348 -Node: Egrep Program650199 -Ref: Egrep Program-Footnote-1657972 -Node: Id Program658082 -Node: Split Program661731 -Ref: Split Program-Footnote-1665250 -Node: Tee Program665378 -Node: Uniq Program668181 -Node: Wc Program675610 -Ref: Wc Program-Footnote-1679876 -Ref: Wc Program-Footnote-2680076 -Node: Miscellaneous Programs680168 -Node: Dupword Program681356 -Node: Alarm Program683387 -Node: Translate Program688194 -Ref: Translate Program-Footnote-1692581 -Ref: Translate Program-Footnote-2692829 -Node: Labels Program692963 -Ref: Labels Program-Footnote-1696334 -Node: Word Sorting696418 -Node: History Sorting700302 -Node: Extract Program702141 -Ref: Extract Program-Footnote-1709644 -Node: Simple Sed709772 -Node: Igawk Program712834 -Ref: Igawk Program-Footnote-1728005 -Ref: Igawk Program-Footnote-2728206 -Node: Anagram Program728344 -Node: Signature Program731412 -Node: Advanced Features732512 -Node: Nondecimal Data734398 -Node: Array Sorting735981 -Node: Controlling Array Traversal736678 -Node: Array Sorting Functions744962 -Ref: Array Sorting Functions-Footnote-1748831 -Node: Two-way I/O749025 -Ref: Two-way I/O-Footnote-1754457 -Node: TCP/IP Networking754539 -Node: Profiling757383 -Node: Internationalization764886 -Node: I18N and L10N766311 -Node: Explaining gettext766997 -Ref: Explaining gettext-Footnote-1772065 -Ref: Explaining gettext-Footnote-2772249 -Node: Programmer i18n772414 -Node: Translator i18n776641 -Node: String Extraction777435 -Ref: String Extraction-Footnote-1778396 -Node: Printf Ordering778482 -Ref: Printf Ordering-Footnote-1781264 -Node: I18N Portability781328 -Ref: I18N Portability-Footnote-1783777 -Node: I18N Example783840 -Ref: I18N Example-Footnote-1786478 -Node: Gawk I18N786550 -Node: Debugger787171 -Node: Debugging788142 -Node: Debugging Concepts788575 -Node: Debugging Terms790431 -Node: Awk Debugging793028 -Node: Sample Debugging Session793920 -Node: Debugger Invocation794440 -Node: Finding The Bug795773 -Node: List of Debugger Commands802260 -Node: Breakpoint Control803594 -Node: Debugger Execution Control807258 -Node: Viewing And Changing Data810618 -Node: Execution Stack813974 -Node: Debugger Info815441 -Node: Miscellaneous Debugger Commands819435 -Node: Readline Support824613 -Node: Limitations825444 -Node: Arbitrary Precision Arithmetic827696 -Ref: Arbitrary Precision Arithmetic-Footnote-1829345 -Node: General Arithmetic829493 -Node: Floating Point Issues831213 -Node: String Conversion Precision832094 -Ref: String Conversion Precision-Footnote-1833799 -Node: Unexpected Results833908 -Node: POSIX Floating Point Problems836061 -Ref: POSIX Floating Point Problems-Footnote-1839886 -Node: Integer Programming839924 -Node: Floating-point Programming841663 -Ref: Floating-point Programming-Footnote-1847994 -Ref: Floating-point Programming-Footnote-2848264 -Node: Floating-point Representation848528 -Node: Floating-point Context849693 -Ref: table-ieee-formats850532 -Node: Rounding Mode851916 -Ref: table-rounding-modes852395 -Ref: Rounding Mode-Footnote-1855410 -Node: Gawk and MPFR855589 -Node: Arbitrary Precision Floats856998 -Ref: Arbitrary Precision Floats-Footnote-1859441 -Node: Setting Precision859757 -Ref: table-predefined-precision-strings860443 -Node: Setting Rounding Mode862588 -Ref: table-gawk-rounding-modes862992 -Node: Floating-point Constants864179 -Node: Changing Precision865608 -Ref: Changing Precision-Footnote-1867005 -Node: Exact Arithmetic867179 -Node: Arbitrary Precision Integers870317 -Ref: Arbitrary Precision Integers-Footnote-1873332 -Node: Dynamic Extensions873479 -Node: Extension Intro874937 -Node: Plugin License876202 -Node: Extension Mechanism Outline876887 -Ref: load-extension877304 -Ref: load-new-function878782 -Ref: call-new-function879777 -Node: Extension API Description881792 -Node: Extension API Functions Introduction883079 -Node: General Data Types888006 -Ref: General Data Types-Footnote-1893701 -Node: Requesting Values894000 -Ref: table-value-types-returned894737 -Node: Memory Allocation Functions895691 -Ref: Memory Allocation Functions-Footnote-1898437 -Node: Constructor Functions898533 -Node: Registration Functions900291 -Node: Extension Functions900976 -Node: Exit Callback Functions903278 -Node: Extension Version String904527 -Node: Input Parsers905177 -Node: Output Wrappers914934 -Node: Two-way processors919444 -Node: Printing Messages921652 -Ref: Printing Messages-Footnote-1922729 -Node: Updating `ERRNO'922881 -Node: Accessing Parameters923620 -Node: Symbol Table Access924850 -Node: Symbol table by name925364 -Node: Symbol table by cookie927340 -Ref: Symbol table by cookie-Footnote-1931472 -Node: Cached values931535 -Ref: Cached values-Footnote-1935025 -Node: Array Manipulation935116 -Ref: Array Manipulation-Footnote-1936214 -Node: Array Data Types936253 -Ref: Array Data Types-Footnote-1938956 -Node: Array Functions939048 -Node: Flattening Arrays942884 -Node: Creating Arrays949736 -Node: Extension API Variables954461 -Node: Extension Versioning955097 -Node: Extension API Informational Variables956998 -Node: Extension API Boilerplate958084 -Node: Finding Extensions961888 -Node: Extension Example962448 -Node: Internal File Description963178 -Node: Internal File Ops967269 -Ref: Internal File Ops-Footnote-1978778 -Node: Using Internal File Ops978918 -Ref: Using Internal File Ops-Footnote-1981265 -Node: Extension Samples981531 -Node: Extension Sample File Functions983055 -Node: Extension Sample Fnmatch991542 -Node: Extension Sample Fork993311 -Node: Extension Sample Inplace994524 -Node: Extension Sample Ord996302 -Node: Extension Sample Readdir997138 -Node: Extension Sample Revout998670 -Node: Extension Sample Rev2way999263 -Node: Extension Sample Read write array999953 -Node: Extension Sample Readfile1001836 -Node: Extension Sample API Tests1002936 -Node: Extension Sample Time1003461 -Node: gawkextlib1004825 -Node: Language History1007606 -Node: V7/SVR3.11009199 -Node: SVR41011519 -Node: POSIX1012961 -Node: BTL1014347 -Node: POSIX/GNU1015081 -Node: Feature History1020680 -Node: Common Extensions1033656 -Node: Ranges and Locales1034968 -Ref: Ranges and Locales-Footnote-11039585 -Ref: Ranges and Locales-Footnote-21039612 -Ref: Ranges and Locales-Footnote-31039846 -Node: Contributors1040067 -Node: Installation1045448 -Node: Gawk Distribution1046342 -Node: Getting1046826 -Node: Extracting1047652 -Node: Distribution contents1049344 -Node: Unix Installation1055065 -Node: Quick Installation1055682 -Node: Additional Configuration Options1058128 -Node: Configuration Philosophy1059864 -Node: Non-Unix Installation1062218 -Node: PC Installation1062676 -Node: PC Binary Installation1063987 -Node: PC Compiling1065835 -Node: PC Testing1068795 -Node: PC Using1069971 -Node: Cygwin1074139 -Node: MSYS1074948 -Node: VMS Installation1075462 -Node: VMS Compilation1076258 -Ref: VMS Compilation-Footnote-11077510 -Node: VMS Dynamic Extensions1077568 -Node: VMS Installation Details1078941 -Node: VMS Running1081192 -Node: VMS GNV1084026 -Node: VMS Old Gawk1084749 -Node: Bugs1085219 -Node: Other Versions1089137 -Node: Notes1095221 -Node: Compatibility Mode1096021 -Node: Additions1096804 -Node: Accessing The Source1097731 -Node: Adding Code1099171 -Node: New Ports1105216 -Node: Derived Files1109351 -Ref: Derived Files-Footnote-11114672 -Ref: Derived Files-Footnote-21114706 -Ref: Derived Files-Footnote-31115306 -Node: Future Extensions1115404 -Node: Implementation Limitations1115987 -Node: Extension Design1117235 -Node: Old Extension Problems1118389 -Ref: Old Extension Problems-Footnote-11119897 -Node: Extension New Mechanism Goals1119954 -Ref: Extension New Mechanism Goals-Footnote-11123319 -Node: Extension Other Design Decisions1123505 -Node: Extension Future Growth1125611 -Node: Old Extension Mechanism1126447 -Node: Basic Concepts1128187 -Node: Basic High Level1128868 -Ref: figure-general-flow1129140 -Ref: figure-process-flow1129739 -Ref: Basic High Level-Footnote-11132968 -Node: Basic Data Typing1133153 -Node: Glossary1136508 -Node: Copying1161739 -Node: GNU Free Documentation License1199295 -Node: Index1224431 +Node: Options110877 +Ref: Options-Footnote-1126689 +Node: Other Arguments126714 +Node: Naming Standard Input129376 +Node: Environment Variables130470 +Node: AWKPATH Variable131028 +Ref: AWKPATH Variable-Footnote-1133806 +Ref: AWKPATH Variable-Footnote-2133851 +Node: AWKLIBPATH Variable134111 +Node: Other Environment Variables134870 +Node: Exit Status138525 +Node: Include Files139200 +Node: Loading Shared Libraries142778 +Node: Obsolete144161 +Node: Undocumented144858 +Node: Regexp145100 +Node: Regexp Usage146489 +Node: Escape Sequences148522 +Node: Regexp Operators154189 +Ref: Regexp Operators-Footnote-1161669 +Ref: Regexp Operators-Footnote-2161816 +Node: Bracket Expressions161914 +Ref: table-char-classes163804 +Node: GNU Regexp Operators166327 +Node: Case-sensitivity170050 +Ref: Case-sensitivity-Footnote-1172942 +Ref: Case-sensitivity-Footnote-2173177 +Node: Leftmost Longest173285 +Node: Computed Regexps174486 +Node: Reading Files177835 +Node: Records179837 +Node: awk split records180572 +Node: gawk split records185430 +Ref: gawk split records-Footnote-1189951 +Node: Fields189988 +Ref: Fields-Footnote-1192952 +Node: Nonconstant Fields193038 +Ref: Nonconstant Fields-Footnote-1195268 +Node: Changing Fields195470 +Node: Field Separators201424 +Node: Default Field Splitting204126 +Node: Regexp Field Splitting205243 +Node: Single Character Fields208584 +Node: Command Line Field Separator209643 +Node: Full Line Fields212985 +Ref: Full Line Fields-Footnote-1213493 +Node: Field Splitting Summary213539 +Ref: Field Splitting Summary-Footnote-1216638 +Node: Constant Size216739 +Node: Splitting By Content221346 +Ref: Splitting By Content-Footnote-1225096 +Node: Multiple Line225136 +Ref: Multiple Line-Footnote-1230992 +Node: Getline231171 +Node: Plain Getline233387 +Node: Getline/Variable235482 +Node: Getline/File236629 +Node: Getline/Variable/File238013 +Ref: Getline/Variable/File-Footnote-1239612 +Node: Getline/Pipe239699 +Node: Getline/Variable/Pipe242398 +Node: Getline/Coprocess243505 +Node: Getline/Variable/Coprocess244757 +Node: Getline Notes245494 +Node: Getline Summary248298 +Ref: table-getline-variants248706 +Node: Read Timeout249618 +Ref: Read Timeout-Footnote-1253445 +Node: Command line directories253503 +Node: Printing254385 +Node: Print256016 +Node: Print Examples257357 +Node: Output Separators260136 +Node: OFMT262152 +Node: Printf263510 +Node: Basic Printf264416 +Node: Control Letters265955 +Node: Format Modifiers269809 +Node: Printf Examples275836 +Node: Redirection278543 +Node: Special Files285515 +Node: Special FD286048 +Ref: Special FD-Footnote-1289672 +Node: Special Network289746 +Node: Special Caveats290596 +Node: Close Files And Pipes291392 +Ref: Close Files And Pipes-Footnote-1298530 +Ref: Close Files And Pipes-Footnote-2298678 +Node: Expressions298828 +Node: Values299960 +Node: Constants300636 +Node: Scalar Constants301316 +Ref: Scalar Constants-Footnote-1302175 +Node: Nondecimal-numbers302425 +Node: Regexp Constants305425 +Node: Using Constant Regexps305900 +Node: Variables308970 +Node: Using Variables309625 +Node: Assignment Options311349 +Node: Conversion313224 +Ref: table-locale-affects318660 +Ref: Conversion-Footnote-1319284 +Node: All Operators319393 +Node: Arithmetic Ops320023 +Node: Concatenation322528 +Ref: Concatenation-Footnote-1325324 +Node: Assignment Ops325444 +Ref: table-assign-ops330427 +Node: Increment Ops331744 +Node: Truth Values and Conditions335182 +Node: Truth Values336265 +Node: Typing and Comparison337314 +Node: Variable Typing338107 +Ref: Variable Typing-Footnote-1342007 +Node: Comparison Operators342129 +Ref: table-relational-ops342539 +Node: POSIX String Comparison346087 +Ref: POSIX String Comparison-Footnote-1347171 +Node: Boolean Ops347309 +Ref: Boolean Ops-Footnote-1351379 +Node: Conditional Exp351470 +Node: Function Calls353197 +Node: Precedence356955 +Node: Locales360624 +Node: Patterns and Actions362227 +Node: Pattern Overview363281 +Node: Regexp Patterns364958 +Node: Expression Patterns365501 +Node: Ranges369282 +Node: BEGIN/END372388 +Node: Using BEGIN/END373150 +Ref: Using BEGIN/END-Footnote-1375886 +Node: I/O And BEGIN/END375992 +Node: BEGINFILE/ENDFILE378277 +Node: Empty381213 +Node: Using Shell Variables381530 +Node: Action Overview383813 +Node: Statements386164 +Node: If Statement388018 +Node: While Statement389521 +Node: Do Statement391565 +Node: For Statement392721 +Node: Switch Statement395873 +Node: Break Statement397976 +Node: Continue Statement400031 +Node: Next Statement401824 +Node: Nextfile Statement404214 +Node: Exit Statement406869 +Node: Built-in Variables409273 +Node: User-modified410369 +Ref: User-modified-Footnote-1418054 +Node: Auto-set418116 +Ref: Auto-set-Footnote-1430681 +Ref: Auto-set-Footnote-2430886 +Node: ARGC and ARGV430942 +Node: Arrays434796 +Node: Array Basics436294 +Node: Array Intro437120 +Ref: figure-array-elements439093 +Node: Reference to Elements441500 +Node: Assigning Elements443773 +Node: Array Example444264 +Node: Scanning an Array445996 +Node: Controlling Scanning449011 +Ref: Controlling Scanning-Footnote-1454184 +Node: Delete454500 +Ref: Delete-Footnote-1457265 +Node: Numeric Array Subscripts457322 +Node: Uninitialized Subscripts459505 +Node: Multidimensional461130 +Node: Multiscanning464223 +Node: Arrays of Arrays465812 +Node: Functions470452 +Node: Built-in471271 +Node: Calling Built-in472349 +Node: Numeric Functions474337 +Ref: Numeric Functions-Footnote-1478171 +Ref: Numeric Functions-Footnote-2478528 +Ref: Numeric Functions-Footnote-3478576 +Node: String Functions478845 +Ref: String Functions-Footnote-1501856 +Ref: String Functions-Footnote-2501985 +Ref: String Functions-Footnote-3502233 +Node: Gory Details502320 +Ref: table-sub-escapes503989 +Ref: table-sub-posix-92505343 +Ref: table-sub-proposed506694 +Ref: table-posix-sub508048 +Ref: table-gensub-escapes509593 +Ref: Gory Details-Footnote-1510769 +Ref: Gory Details-Footnote-2510820 +Node: I/O Functions510971 +Ref: I/O Functions-Footnote-1518094 +Node: Time Functions518241 +Ref: Time Functions-Footnote-1528705 +Ref: Time Functions-Footnote-2528773 +Ref: Time Functions-Footnote-3528931 +Ref: Time Functions-Footnote-4529042 +Ref: Time Functions-Footnote-5529154 +Ref: Time Functions-Footnote-6529381 +Node: Bitwise Functions529647 +Ref: table-bitwise-ops530209 +Ref: Bitwise Functions-Footnote-1534454 +Node: Type Functions534638 +Node: I18N Functions535780 +Node: User-defined537425 +Node: Definition Syntax538229 +Ref: Definition Syntax-Footnote-1543154 +Node: Function Example543223 +Ref: Function Example-Footnote-1545867 +Node: Function Caveats545889 +Node: Calling A Function546407 +Node: Variable Scope547362 +Node: Pass By Value/Reference550350 +Node: Return Statement553858 +Node: Dynamic Typing556842 +Node: Indirect Calls557771 +Node: Library Functions567458 +Ref: Library Functions-Footnote-1570971 +Ref: Library Functions-Footnote-2571114 +Node: Library Names571285 +Ref: Library Names-Footnote-1574758 +Ref: Library Names-Footnote-2574978 +Node: General Functions575064 +Node: Strtonum Function576092 +Node: Assert Function579022 +Node: Round Function582348 +Node: Cliff Random Function583889 +Node: Ordinal Functions584905 +Ref: Ordinal Functions-Footnote-1587982 +Ref: Ordinal Functions-Footnote-2588234 +Node: Join Function588445 +Ref: Join Function-Footnote-1590216 +Node: Getlocaltime Function590416 +Node: Readfile Function594152 +Node: Data File Management595991 +Node: Filetrans Function596623 +Node: Rewind Function600692 +Node: File Checking602079 +Ref: File Checking-Footnote-1603211 +Node: Empty Files603412 +Node: Ignoring Assigns605642 +Node: Getopt Function607196 +Ref: Getopt Function-Footnote-1618499 +Node: Passwd Functions618702 +Ref: Passwd Functions-Footnote-1627681 +Node: Group Functions627769 +Ref: Group Functions-Footnote-1635711 +Node: Walking Arrays635924 +Node: Sample Programs638060 +Node: Running Examples638734 +Node: Clones639462 +Node: Cut Program640686 +Node: Egrep Program650539 +Ref: Egrep Program-Footnote-1658510 +Node: Id Program658620 +Node: Split Program662284 +Ref: Split Program-Footnote-1665822 +Node: Tee Program665950 +Node: Uniq Program668757 +Node: Wc Program676187 +Ref: Wc Program-Footnote-1680455 +Ref: Wc Program-Footnote-2680655 +Node: Miscellaneous Programs680747 +Node: Dupword Program681935 +Node: Alarm Program683966 +Node: Translate Program688773 +Ref: Translate Program-Footnote-1693164 +Ref: Translate Program-Footnote-2693434 +Node: Labels Program693568 +Ref: Labels Program-Footnote-1696939 +Node: Word Sorting697023 +Node: History Sorting701066 +Node: Extract Program702902 +Ref: Extract Program-Footnote-1710432 +Node: Simple Sed710561 +Node: Igawk Program713623 +Ref: Igawk Program-Footnote-1728798 +Ref: Igawk Program-Footnote-2728999 +Node: Anagram Program729137 +Node: Signature Program732205 +Node: Advanced Features733452 +Node: Nondecimal Data735338 +Node: Array Sorting736921 +Node: Controlling Array Traversal737618 +Node: Array Sorting Functions745902 +Ref: Array Sorting Functions-Footnote-1749771 +Node: Two-way I/O749965 +Ref: Two-way I/O-Footnote-1755397 +Node: TCP/IP Networking755479 +Node: Profiling758323 +Node: Internationalization765826 +Node: I18N and L10N767251 +Node: Explaining gettext767937 +Ref: Explaining gettext-Footnote-1773005 +Ref: Explaining gettext-Footnote-2773189 +Node: Programmer i18n773354 +Node: Translator i18n777581 +Node: String Extraction778375 +Ref: String Extraction-Footnote-1779336 +Node: Printf Ordering779422 +Ref: Printf Ordering-Footnote-1782204 +Node: I18N Portability782268 +Ref: I18N Portability-Footnote-1784717 +Node: I18N Example784780 +Ref: I18N Example-Footnote-1787418 +Node: Gawk I18N787490 +Node: Debugger788111 +Node: Debugging789082 +Node: Debugging Concepts789515 +Node: Debugging Terms791371 +Node: Awk Debugging793968 +Node: Sample Debugging Session794860 +Node: Debugger Invocation795380 +Node: Finding The Bug796713 +Node: List of Debugger Commands803200 +Node: Breakpoint Control804534 +Node: Debugger Execution Control808198 +Node: Viewing And Changing Data811558 +Node: Execution Stack814914 +Node: Debugger Info816381 +Node: Miscellaneous Debugger Commands820375 +Node: Readline Support825559 +Node: Limitations826390 +Node: Arbitrary Precision Arithmetic828642 +Ref: Arbitrary Precision Arithmetic-Footnote-1830291 +Node: General Arithmetic830439 +Node: Floating Point Issues832159 +Node: String Conversion Precision833040 +Ref: String Conversion Precision-Footnote-1834745 +Node: Unexpected Results834854 +Node: POSIX Floating Point Problems837007 +Ref: POSIX Floating Point Problems-Footnote-1840832 +Node: Integer Programming840870 +Node: Floating-point Programming842609 +Ref: Floating-point Programming-Footnote-1848940 +Ref: Floating-point Programming-Footnote-2849210 +Node: Floating-point Representation849474 +Node: Floating-point Context850639 +Ref: table-ieee-formats851478 +Node: Rounding Mode852862 +Ref: table-rounding-modes853341 +Ref: Rounding Mode-Footnote-1856356 +Node: Gawk and MPFR856535 +Node: Arbitrary Precision Floats857944 +Ref: Arbitrary Precision Floats-Footnote-1860387 +Node: Setting Precision860703 +Ref: table-predefined-precision-strings861389 +Node: Setting Rounding Mode863534 +Ref: table-gawk-rounding-modes863938 +Node: Floating-point Constants865125 +Node: Changing Precision866554 +Ref: Changing Precision-Footnote-1867951 +Node: Exact Arithmetic868125 +Node: Arbitrary Precision Integers871263 +Ref: Arbitrary Precision Integers-Footnote-1874278 +Node: Dynamic Extensions874425 +Node: Extension Intro875883 +Node: Plugin License877148 +Node: Extension Mechanism Outline877833 +Ref: load-extension878250 +Ref: load-new-function879728 +Ref: call-new-function880723 +Node: Extension API Description882738 +Node: Extension API Functions Introduction884025 +Node: General Data Types888952 +Ref: General Data Types-Footnote-1894647 +Node: Requesting Values894946 +Ref: table-value-types-returned895683 +Node: Memory Allocation Functions896637 +Ref: Memory Allocation Functions-Footnote-1899383 +Node: Constructor Functions899479 +Node: Registration Functions901237 +Node: Extension Functions901922 +Node: Exit Callback Functions904224 +Node: Extension Version String905473 +Node: Input Parsers906123 +Node: Output Wrappers915880 +Node: Two-way processors920390 +Node: Printing Messages922598 +Ref: Printing Messages-Footnote-1923675 +Node: Updating `ERRNO'923827 +Node: Accessing Parameters924566 +Node: Symbol Table Access925796 +Node: Symbol table by name926310 +Node: Symbol table by cookie928286 +Ref: Symbol table by cookie-Footnote-1932418 +Node: Cached values932481 +Ref: Cached values-Footnote-1935971 +Node: Array Manipulation936062 +Ref: Array Manipulation-Footnote-1937160 +Node: Array Data Types937199 +Ref: Array Data Types-Footnote-1939902 +Node: Array Functions939994 +Node: Flattening Arrays943830 +Node: Creating Arrays950682 +Node: Extension API Variables955407 +Node: Extension Versioning956043 +Node: Extension API Informational Variables957944 +Node: Extension API Boilerplate959030 +Node: Finding Extensions962834 +Node: Extension Example963394 +Node: Internal File Description964124 +Node: Internal File Ops968215 +Ref: Internal File Ops-Footnote-1979724 +Node: Using Internal File Ops979864 +Ref: Using Internal File Ops-Footnote-1982211 +Node: Extension Samples982477 +Node: Extension Sample File Functions984001 +Node: Extension Sample Fnmatch992488 +Node: Extension Sample Fork994257 +Node: Extension Sample Inplace995470 +Node: Extension Sample Ord997248 +Node: Extension Sample Readdir998084 +Node: Extension Sample Revout999616 +Node: Extension Sample Rev2way1000209 +Node: Extension Sample Read write array1000899 +Node: Extension Sample Readfile1002782 +Node: Extension Sample API Tests1003882 +Node: Extension Sample Time1004407 +Node: gawkextlib1005771 +Node: Language History1008552 +Node: V7/SVR3.11010145 +Node: SVR41012465 +Node: POSIX1013907 +Node: BTL1015293 +Node: POSIX/GNU1016027 +Node: Feature History1021626 +Node: Common Extensions1034602 +Node: Ranges and Locales1035914 +Ref: Ranges and Locales-Footnote-11040531 +Ref: Ranges and Locales-Footnote-21040558 +Ref: Ranges and Locales-Footnote-31040792 +Node: Contributors1041013 +Node: Installation1046394 +Node: Gawk Distribution1047288 +Node: Getting1047772 +Node: Extracting1048598 +Node: Distribution contents1050290 +Node: Unix Installation1056011 +Node: Quick Installation1056628 +Node: Additional Configuration Options1059074 +Node: Configuration Philosophy1060810 +Node: Non-Unix Installation1063164 +Node: PC Installation1063622 +Node: PC Binary Installation1064933 +Node: PC Compiling1066781 +Node: PC Testing1069741 +Node: PC Using1070917 +Node: Cygwin1075085 +Node: MSYS1075894 +Node: VMS Installation1076408 +Node: VMS Compilation1077204 +Ref: VMS Compilation-Footnote-11078456 +Node: VMS Dynamic Extensions1078514 +Node: VMS Installation Details1079887 +Node: VMS Running1082138 +Node: VMS GNV1084972 +Node: VMS Old Gawk1085695 +Node: Bugs1086165 +Node: Other Versions1090083 +Node: Notes1096167 +Node: Compatibility Mode1096967 +Node: Additions1097750 +Node: Accessing The Source1098677 +Node: Adding Code1100117 +Node: New Ports1106162 +Node: Derived Files1110297 +Ref: Derived Files-Footnote-11115618 +Ref: Derived Files-Footnote-21115652 +Ref: Derived Files-Footnote-31116252 +Node: Future Extensions1116350 +Node: Implementation Limitations1116933 +Node: Extension Design1118181 +Node: Old Extension Problems1119335 +Ref: Old Extension Problems-Footnote-11120843 +Node: Extension New Mechanism Goals1120900 +Ref: Extension New Mechanism Goals-Footnote-11124265 +Node: Extension Other Design Decisions1124451 +Node: Extension Future Growth1126557 +Node: Old Extension Mechanism1127393 +Node: Basic Concepts1129133 +Node: Basic High Level1129814 +Ref: figure-general-flow1130086 +Ref: figure-process-flow1130685 +Ref: Basic High Level-Footnote-11133914 +Node: Basic Data Typing1134099 +Node: Glossary1137454 +Node: Copying1162685 +Node: GNU Free Documentation License1200241 +Node: Index1225377 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index 470b2822..88098df3 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -58,6 +58,7 @@ @set SUBSECTION subsection @set DARKCORNER @inmargin{@image{lflashlight,1cm}, @image{rflashlight,1cm}} @set COMMONEXT (c.e.) +@set PAGE page @end iftex @ifinfo @set DOCUMENT Info file @@ -67,6 +68,7 @@ @set SUBSECTION node @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE screen @end ifinfo @ifhtml @set DOCUMENT Web page @@ -76,6 +78,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE screen @end ifhtml @ifdocbook @set DOCUMENT book @@ -85,6 +88,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE page @end ifdocbook @ifxml @set DOCUMENT book @@ -94,6 +98,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE page @end ifxml @ifplaintext @set DOCUMENT book @@ -103,6 +108,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE page @end ifplaintext @ifdocbook @@ -3388,19 +3394,10 @@ There are two ways to run @command{awk}---with an explicit program or with one or more program files. Here are templates for both of them; items enclosed in [@dots{}] in these templates are optional: -@ifnotdocbook -@example -awk @r{[@var{options}]} -f progfile @r{[@code{--}]} @var{file} @dots{} -awk @r{[@var{options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{} -@end example -@end ifnotdocbook - -@c FIXME - find a better way to mark this up in docbook -@docbook -<screen>awk [<replaceable>options</replaceable>] -f progfile [<literal>--</literal>] <replaceable>file</replaceable> … -awk [<replaceable>options</replaceable>] [<literal>--</literal>] '<replaceable>program</replaceable>' <replaceable>file</replaceable> … -</screen> -@end docbook +@display +@command{awk} [@var{options}] @option{-f} @var{progfile} [@option{--}] @var{file} @dots{} +@command{awk} [@var{options}] [@option{--}] @code{'@var{program}'} @var{file} @dots{} +@end display @cindex GNU long options @cindex long options @@ -12948,13 +12945,13 @@ both) may be omitted. The purpose of the @dfn{action} is to tell @command{awk} what to do once a match for the pattern is found. Thus, in outline, an @command{awk} program generally looks like this: -@example -@r{[}@var{pattern}@r{]} @{ @var{action} @} - @var{pattern} @r{[}@{ @var{action} @}@r{]} +@display +[@var{pattern}] @code{@{ @var{action} @}} + @var{pattern} [@code{@{ @var{action} @}}] @dots{} -function @var{name}(@var{args}) @{ @dots{} @} +@code{function @var{name}(@var{args}) @{ @dots{} @}} @dots{} -@end example +@end display @cindex @code{@{@}} (braces), actions and @cindex braces (@code{@{@}}), actions and @@ -13069,9 +13066,9 @@ newlines or semicolons. The @code{if}-@code{else} statement is @command{awk}'s decision-making statement. It looks like this: -@example -if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]} -@end example +@display +@code{if (@var{condition}) @var{then-body}} [@code{else @var{else-body}}] +@end display @noindent The @var{condition} is an expression that controls what the rest of the @@ -13669,9 +13666,9 @@ The @code{exit} statement causes @command{awk} to immediately stop executing the current rule and to stop processing input; any remaining input is ignored. The @code{exit} statement is written as follows: -@example -exit @r{[}@var{return code}@r{]} -@end example +@display +@code{exit} [@var{return code}] +@end display @cindex @code{BEGIN} pattern, @code{exit} statement and @cindex @code{END} pattern, @code{exit} statement and @@ -18510,12 +18507,12 @@ entire program before starting to execute any of it. The definition of a function named @var{name} looks like this: -@example -function @var{name}(@r{[}@var{parameter-list}@r{]}) -@{ +@display +@code{function} @var{name}@code{(}[@var{parameter-list}]@code{)} +@code{@{} @var{body-of-function} -@} -@end example +@code{@}} +@end display @cindex names, functions @cindex functions, names of @@ -19037,9 +19034,9 @@ This statement returns control to the calling part of the @command{awk} program. can also be used to return a value for use in the rest of the @command{awk} program. It looks like this: -@example -return @r{[}@var{expression}@r{]} -@end example +@display +@code{return} [@var{expression}] +@end display The @var{expression} part is optional. Due most likely to an oversight, POSIX does not define what the return @@ -20348,7 +20345,7 @@ function getlocaltime(time, ret, now, i) now = systime() # return date(1)-style output - ret = strftime("%a %b %e %H:%M:%S %Z %Y", now) + ret = strftime(PROCINFO["strftime"], now) # clear out target array delete time @@ -20704,10 +20701,12 @@ The @code{rewind()} function also relies on the @code{nextfile} keyword @cindex readable data files@comma{} checking @cindex files, skipping Normally, if you give @command{awk} a data file that isn't readable, -it stops with a fatal error. There are times when you -might want to just ignore such files and keep going. You can -do this by prepending the following program to your @command{awk} -program: +it stops with a fatal error. There are times when you might want to +just ignore such files and keep going.@footnote{The @code{BEGINFILE} +special pattern (@pxref{BEGINFILE/ENDFILE}) provides an alternative +mechanism for dealing with files that can't be opened. However, the +code here provides a portable solution.} You can do this by prepending +the following program to your @command{awk} program: @cindex @code{readable.awk} program @example @@ -20745,7 +20744,7 @@ skips the file (since it's no longer in the list). See also @ref{ARGC and ARGV}. @node Empty Files -@subsection Checking For Zero-length Files +@subsection Checking for Zero-length Files All known @command{awk} implementations silently skip over zero-length files. This is a by-product of @command{awk}'s implicit @@ -21218,7 +21217,7 @@ BEGIN @{ # test program if (_getopt_test) @{ while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, optarg = <%s>\n", + printf("c = <%c>, Optarg = <%s>\n", _go_c, Optarg) printf("non-option arguments:\n") for (; Optind < ARGC; Optind++) @@ -21234,32 +21233,31 @@ result of two sample runs of the test program: @example $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x} -@print{} c = <a>, optarg = <> -@print{} c = <c>, optarg = <> -@print{} c = <b>, optarg = <ARG> +@print{} c = <a>, Optarg = <> +@print{} c = <c>, Optarg = <> +@print{} c = <b>, Optarg = <ARG> @print{} non-option arguments: @print{} ARGV[3] = <bax> @print{} ARGV[4] = <-x> $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc} -@print{} c = <a>, optarg = <> +@print{} c = <a>, Optarg = <> @error{} x -- invalid option -@print{} c = <?>, optarg = <> +@print{} c = <?>, Optarg = <> @print{} non-option arguments: @print{} ARGV[4] = <xyz> @print{} ARGV[5] = <abc> @end example -In both runs, -the first @option{--} terminates the arguments to @command{awk}, so that it does -not try to interpret the @option{-a}, etc., as its own options. +In both runs, the first @option{--} terminates the arguments to +@command{awk}, so that it does not try to interpret the @option{-a}, +etc., as its own options. @quotation NOTE -After @code{getopt()} is through, it is the responsibility of the user level -code to -clear out all the elements of @code{ARGV} from 1 to @code{Optind}, -so that @command{awk} does not try to process the command-line options -as file names. +After @code{getopt()} is through, it is the responsibility of the +user level code to clear out all the elements of @code{ARGV} from 1 +to @code{Optind}, so that @command{awk} does not try to process the +command-line options as file names. @end quotation Several of the sample programs presented in @@ -21328,7 +21326,7 @@ Following is @command{pwcat}, a C program that ``cats'' the password database: /* * pwcat.c * - * Generate a printable version of the password database + * Generate a printable version of the password database. */ @c endfile @ignore @@ -21674,7 +21672,7 @@ is as follows: /* * grcat.c * - * Generate a printable version of the group database + * Generate a printable version of the group database. */ @c endfile @ignore @@ -21761,7 +21759,7 @@ it is usually empty or set to @samp{*}. @item Group ID Number The group's numeric group ID number; -this number must be unique within the file. +the association of name to number must be unique within the file. (On some systems it's a C @code{long}, and not an @code{int}. Thus we cast it to @code{long} for all cases.) @@ -21897,10 +21895,10 @@ tvpeople:*:101:david,conan,tom,joan For this reason, @code{_gr_init()} looks to see if a group name or group ID number is already seen. If it is, then the user names are -simply concatenated onto the previous list of users. (There is actually a +simply concatenated onto the previous list of users.@footnote{There is actually a subtle problem with the code just presented. Suppose that the first time there were no names. This code adds the names with -a leading comma. It also doesn't check that there is a @code{$4}.) +a leading comma. It also doesn't check that there is a @code{$4}.} Finally, @code{_gr_init()} closes the pipeline to @command{grcat}, restores @code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0}, @@ -22270,13 +22268,7 @@ function usage( e1, e2) @noindent The variables @code{e1} and @code{e2} are used so that the function -fits nicely on the -@ifnotinfo -page. -@end ifnotinfo -@ifnottex -screen. -@end ifnottex +fits nicely on the @value{PAGE}. @cindex @code{BEGIN} pattern, running @command{awk} programs and @cindex @code{FS} variable, running @command{awk} programs and @@ -22315,7 +22307,7 @@ BEGIN \ if (FS == " ") # defeat awk semantics FS = "[ ]" @} else if (c == "s") - suppress++ + suppress = 1 else usage() @} @@ -22528,9 +22520,9 @@ expressions that are almost identical to those available in @command{awk} (@pxref{Regexp}). You invoke it as follows: -@example -egrep @r{[} @var{options} @r{]} '@var{pattern}' @var{files} @dots{} -@end example +@display +@command{egrep} [@var{options}] @code{'@var{pattern}'} @var{files} @dots{} +@end display The @var{pattern} is a regular expression. In typical usage, the regular expression is quoted to prevent the shell from expanding any of the @@ -22712,6 +22704,11 @@ function endfile(file) @c endfile @end example +The @code{BEGINFILE} and @code{ENDFILE} special patterns +(@pxref{BEGINFILE/ENDFILE}) could be used, but then the program would be +@command{gawk}-specific. Additionally, this example was written before +@command{gawk} acquired @code{BEGINFILE} and @code{ENDFILE}. + The following rule does most of the work of matching lines. The variable @code{matches} is true if the line matched the pattern. If the user wants lines that did not match, the sense of @code{matches} is inverted @@ -22768,9 +22765,7 @@ there are no matches, the exit status is one; otherwise it is zero: @c file eg/prog/egrep.awk END \ @{ - if (total == 0) - exit 1 - exit 0 + exit (total == 0) @} @c endfile @end example @@ -22824,7 +22819,7 @@ corresponding user and group names. The output might look like this: @example $ @kbd{id} -@print{} uid=500(arnold) gid=500(arnold) groups=6(disk),7(lp),19(floppy) +@print{} uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo) @end example @cindex @code{PROCINFO} array, and user and group ID numbers @@ -22860,6 +22855,7 @@ numbers: # Arnold Robbins, arnold@@skeeve.com, Public Domain # May 1993 # Revised February 1996 +# Revised May 2014 @c endfile @end ignore @@ -22879,34 +22875,26 @@ BEGIN \ printf("uid=%d", uid) pw = getpwuid(uid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) if (euid != uid) @{ printf(" euid=%d", euid) pw = getpwuid(euid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) @} printf(" gid=%d", gid) pw = getgrgid(gid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) if (egid != gid) @{ printf(" egid=%d", egid) pw = getgrgid(egid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) @} for (i = 1; ("group" i) in PROCINFO; i++) @{ @@ -22915,16 +22903,20 @@ BEGIN \ group = PROCINFO["group" i] printf("%d", group) pw = getgrgid(group) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) if (("group" (i+1)) in PROCINFO) printf(",") @} print "" @} + +function pr_first_field(str, a) +@{ + split(str, a, ":") + printf("(%s)", a[1]) +@} @c endfile @end example @@ -22944,9 +22936,13 @@ The loop is also correct if there are @emph{no} supplementary groups; then the condition is false the first time it's tested, and the loop body never executes. +The @code{pr_first_field()} function simply isolates out some +code that is used repeatedly, making the whole program +slightly shorter and cleaner. + @c exercise!!! @ignore -The POSIX version of @command{id} takes arguments that control which +The POSIX version of @command{id} takes options that control which information is printed. Modify this version to accept the same arguments and perform in the same way. @end ignore @@ -22966,9 +22962,9 @@ Usage is as follows:@footnote{This is the traditional usage. The POSIX usage is different, but not relevant for what the program aims to demonstrate.} -@example -split @r{[}-@var{count}@r{]} file @r{[} @var{prefix} @r{]} -@end example +@display +@command{split} [@code{-@var{count}}] [@var{file}] [@var{prefix}] +@end display By default, the output files are named @file{xaa}, @file{xab}, and so on. Each file has @@ -23002,11 +22998,12 @@ is used as the prefix for the output file names: # # Arnold Robbins, arnold@@skeeve.com, Public Domain # May 1993 +# Revised slightly, May 2014 @c endfile @end ignore @c file eg/prog/split.awk -# usage: split [-num] [file] [outname] +# usage: split [-count] [file] [outname] BEGIN @{ outfile = "x" # default @@ -23015,7 +23012,7 @@ BEGIN @{ usage() i = 1 - if (ARGV[i] ~ /^-[[:digit:]]+$/) @{ + if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) @{ count = -ARGV[i] ARGV[i] = "" i++ @@ -23087,13 +23084,7 @@ function usage( e) @noindent The variable @code{e} is used so that the function -fits nicely on the -@ifinfo -screen. -@end ifinfo -@ifnotinfo -page. -@end ifnotinfo +fits nicely on the @value{PAGE}. This program is a bit sloppy; it relies on @command{awk} to automatically close the last file instead of doing it in an @code{END} rule. @@ -23116,9 +23107,9 @@ The @code{tee} program is known as a ``pipe fitting.'' @code{tee} copies its standard input to its standard output and also duplicates it to the files named on the command line. Its usage is as follows: -@example -tee @r{[}-a@r{]} file @dots{} -@end example +@display +@command{tee} [@option{-a}] @var{file} @dots{} +@end display The @option{-a} option tells @code{tee} to append to the named files, instead of truncating them and starting over. @@ -23243,9 +23234,9 @@ input, and by default removes duplicate lines. In other words, it only prints unique lines---hence the name. @command{uniq} has a number of options. The usage is as follows: -@example -uniq @r{[}-udc @r{[}-@var{n}@r{]]} @r{[}+@var{n}@r{]} @r{[} @var{input file} @r{[} @var{output file} @r{]]} -@end example +@display +@command{uniq} [@option{-udc} [@code{-@var{n}}]] [@code{+@var{n}}] [@var{inputfile} [@var{outputfile}]] +@end display The options for @command{uniq} are: @@ -23269,11 +23260,11 @@ by runs of spaces and/or TABs. Skip @var{n} characters before comparing lines. Any fields specified with @samp{-@var{n}} are skipped first. -@item @var{input file} +@item @var{inputfile} Data is read from the input file named on the command line, instead of from the standard input. -@item @var{output file} +@item @var{outputfile} The generated output is sent to the named output file, instead of to the standard output. @end table @@ -23510,9 +23501,9 @@ END @{ The @command{wc} (word count) utility counts lines, words, and characters in one or more input files. Its usage is as follows: -@example -wc @r{[}-lwc@r{]} @r{[} @var{files} @dots{} @r{]} -@end example +@display +@command{wc} [@option{-lwc}] [@var{files} @dots{}] +@end display If no files are specified on the command line, @command{wc} reads its standard input. If there are multiple files, it also prints total counts for all @@ -23993,19 +23984,18 @@ often used to map uppercase letters into lowercase for further processing: @end example @command{tr} requires two lists of characters.@footnote{On some older -systems, -including Solaris, -@command{tr} may require that the lists be written as -range expressions enclosed in square brackets (@samp{[a-z]}) and quoted, -to prevent the shell from attempting a file name expansion. This is -not a feature.} When processing the input, the first character in the -first list is replaced with the first character in the second list, -the second character in the first list is replaced with the second -character in the second list, and so on. If there are more characters -in the ``from'' list than in the ``to'' list, the last character of the -``to'' list is used for the remaining characters in the ``from'' list. - -Some time ago, +systems, including Solaris, the system version of @command{tr} may require +that the lists be written as range expressions enclosed in square brackets +(@samp{[a-z]}) and quoted, to prevent the shell from attempting a file +name expansion. This is not a feature.} When processing the input, the +first character in the first list is replaced with the first character +in the second list, the second character in the first list is replaced +with the second character in the second list, and so on. If there are +more characters in the ``from'' list than in the ``to'' list, the last +character of the ``to'' list is used for the remaining characters in the +``from'' list. + +Once upon a time, @c early or mid-1989! a user proposed that a transliteration function should be added to @command{gawk}. @@ -24119,13 +24109,12 @@ BEGIN @{ While it is possible to do character transliteration in a user-level function, it is not necessarily efficient, and we (the @command{gawk} authors) started to consider adding a built-in function. However, -shortly after writing this program, we learned that the System V Release 4 -@command{awk} had added the @code{toupper()} and @code{tolower()} functions -(@pxref{String Functions}). -These functions handle the vast majority of the -cases where character transliteration is necessary, and so we chose to -simply add those functions to @command{gawk} as well and then leave well -enough alone. +shortly after writing this program, we learned that Brian Kernighan +had added the @code{toupper()} and @code{tolower()} functions to his +@command{awk} (@pxref{String Functions}). These functions handle the +vast majority of the cases where character transliteration is necessary, +and so we chose to simply add those functions to @command{gawk} as well +and then leave well enough alone. An obvious improvement to this program would be to set up the @code{t_ar} array only once, in a @code{BEGIN} rule. However, this @@ -24158,7 +24147,18 @@ The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that @command{awk} splits records at blank lines (@pxref{Records}). It sets @code{MAXLINES} to 100, since 100 is the maximum number -of lines on the page (20 * 5 = 100). +of lines on the page +@iftex +(@math{20 @cdot 5 = 100}). +@end iftex +@ifnottex +@ifnotdocbook +(20 * 5 = 100). +@end ifnotdocbook +@end ifnottex +@docbook +(20 ⋅ 5 = 100). @c +@end docbook Most of the work is done in the @code{printpage()} function. The label lines are stored sequentially in the @code{line} array. But they @@ -24270,7 +24270,7 @@ END \ When working with large amounts of text, it can be interesting to know how often different words appear. For example, an author may overuse -certain words, in which case she might wish to find synonyms to substitute +certain words, in which case he or she might wish to find synonyms to substitute for words that appear too often. This @value{SUBSECTION} develops a program for counting words and presenting the frequency information in a useful format. @@ -24348,6 +24348,10 @@ END @{ @} @end example +The regexp @samp{/[^[:alnum:]_[:blank:]]/} might have been written +@samp{/[[:punct:]]/}, but then underscores would also be removed, +and we want to keep them. + Assuming we have saved this program in a file named @file{wordfreq.awk}, and that the data is in @file{file1}, the following pipeline: @@ -24459,6 +24463,7 @@ information. For example, using the following @code{print} statement in the print data[lines[i]], lines[i] @end example +@noindent This works because @code{data[$0]} is incremented each time a line is seen. @c ENDOFRANGE lidu @@ -24614,13 +24619,7 @@ BEGIN @{ IGNORECASE = 1 @} @noindent The variable @code{e} is used so that the rule -fits nicely on the -@ifnotinfo -page. -@end ifnotinfo -@ifnottex -screen. -@end ifnottex +fits nicely on the @value{PAGE}. The second rule handles moving data into files. It verifies that a file name is given in the directive. If the file named is not the @@ -24649,10 +24648,13 @@ Each element of @code{a} that is empty indicates two successive @samp{@@} symbols in the original line. For each two empty elements (@samp{@@@@} in the original file), we have to add a single @samp{@@} symbol back in.@footnote{This program was written before @command{gawk} had the -@code{gensub()} function. Consider how you might use it to simplify the code.} +@code{gensub()} function. +@c exercise!! +Consider how you might use it to simplify the code.} When the processing of the array is finished, @code{join()} is called with the -value of @code{SUBSEP}, to rejoin the pieces back into a single +value of @code{SUBSEP} (@pxref{Multidimensional}), +to rejoin the pieces back into a single line. That line is then printed to the output file: @example @@ -25177,7 +25179,7 @@ BEGIN @{ @c endfile @end example -The stack is initialized with @code{ARGV[1]}, which will be @samp{/dev/stdin}. +The stack is initialized with @code{ARGV[1]}, which will be @code{"/dev/stdin"}. The main loop comes next. Input lines are read in succession. Lines that do not start with @code{@@include} are printed verbatim. If the line does start with @code{@@include}, the file name is in @code{$2}. @@ -25287,7 +25289,7 @@ eval gawk $opts -- '"$processed_program"' '"$@@"' The @command{eval} command is a shell construct that reruns the shell's parsing process. This keeps things properly quoted. -This version of @command{igawk} represents my fifth version of this program. +This version of @command{igawk} represents the fifth version of this program. There are four key simplifications that make the program work better: @itemize @bullet @@ -25497,6 +25499,9 @@ babels beslab babery yabber @dots{} @end example + +@c Exercise: Avoid the use of external sort command + @c ENDOFRANGE anagram @node Signature Program @@ -25528,7 +25533,10 @@ X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O, O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O@}' @end example -We leave it to you to determine what the program does. +@cindex Johansen, Chris +We leave it to you to determine what the program does. (If you are +truly desperate to understand it, see Chris Johansen's explanation, +which is embedded in the Texinfo source file for this @value{DOCUMENT}.) @ignore To: "Arnold Robbins" <arnold@skeeve.com> @@ -28656,7 +28664,7 @@ partial dump of Davide Brini's obfuscated code @smallexample gawk> @kbd{dump} -@print{} # BEGIN +@print{} # BEGIN @print{} @print{} [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk] @print{} [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR] diff --git a/doc/gawktexi.in b/doc/gawktexi.in index af323c1b..599bd098 100644 --- a/doc/gawktexi.in +++ b/doc/gawktexi.in @@ -53,6 +53,7 @@ @set SUBSECTION subsection @set DARKCORNER @inmargin{@image{lflashlight,1cm}, @image{rflashlight,1cm}} @set COMMONEXT (c.e.) +@set PAGE page @end iftex @ifinfo @set DOCUMENT Info file @@ -62,6 +63,7 @@ @set SUBSECTION node @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE screen @end ifinfo @ifhtml @set DOCUMENT Web page @@ -71,6 +73,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE screen @end ifhtml @ifdocbook @set DOCUMENT book @@ -80,6 +83,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE page @end ifdocbook @ifxml @set DOCUMENT book @@ -89,6 +93,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE page @end ifxml @ifplaintext @set DOCUMENT book @@ -98,6 +103,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE page @end ifplaintext @ifdocbook @@ -3316,19 +3322,10 @@ There are two ways to run @command{awk}---with an explicit program or with one or more program files. Here are templates for both of them; items enclosed in [@dots{}] in these templates are optional: -@ifnotdocbook -@example -awk @r{[@var{options}]} -f progfile @r{[@code{--}]} @var{file} @dots{} -awk @r{[@var{options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{} -@end example -@end ifnotdocbook - -@c FIXME - find a better way to mark this up in docbook -@docbook -<screen>awk [<replaceable>options</replaceable>] -f progfile [<literal>--</literal>] <replaceable>file</replaceable> … -awk [<replaceable>options</replaceable>] [<literal>--</literal>] '<replaceable>program</replaceable>' <replaceable>file</replaceable> … -</screen> -@end docbook +@display +@command{awk} [@var{options}] @option{-f} @var{progfile} [@option{--}] @var{file} @dots{} +@command{awk} [@var{options}] [@option{--}] @code{'@var{program}'} @var{file} @dots{} +@end display @cindex GNU long options @cindex long options @@ -12328,13 +12325,13 @@ both) may be omitted. The purpose of the @dfn{action} is to tell @command{awk} what to do once a match for the pattern is found. Thus, in outline, an @command{awk} program generally looks like this: -@example -@r{[}@var{pattern}@r{]} @{ @var{action} @} - @var{pattern} @r{[}@{ @var{action} @}@r{]} +@display +[@var{pattern}] @code{@{ @var{action} @}} + @var{pattern} [@code{@{ @var{action} @}}] @dots{} -function @var{name}(@var{args}) @{ @dots{} @} +@code{function @var{name}(@var{args}) @{ @dots{} @}} @dots{} -@end example +@end display @cindex @code{@{@}} (braces), actions and @cindex braces (@code{@{@}}), actions and @@ -12449,9 +12446,9 @@ newlines or semicolons. The @code{if}-@code{else} statement is @command{awk}'s decision-making statement. It looks like this: -@example -if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]} -@end example +@display +@code{if (@var{condition}) @var{then-body}} [@code{else @var{else-body}}] +@end display @noindent The @var{condition} is an expression that controls what the rest of the @@ -13049,9 +13046,9 @@ The @code{exit} statement causes @command{awk} to immediately stop executing the current rule and to stop processing input; any remaining input is ignored. The @code{exit} statement is written as follows: -@example -exit @r{[}@var{return code}@r{]} -@end example +@display +@code{exit} [@var{return code}] +@end display @cindex @code{BEGIN} pattern, @code{exit} statement and @cindex @code{END} pattern, @code{exit} statement and @@ -17683,12 +17680,12 @@ entire program before starting to execute any of it. The definition of a function named @var{name} looks like this: -@example -function @var{name}(@r{[}@var{parameter-list}@r{]}) -@{ +@display +@code{function} @var{name}@code{(}[@var{parameter-list}]@code{)} +@code{@{} @var{body-of-function} -@} -@end example +@code{@}} +@end display @cindex names, functions @cindex functions, names of @@ -18210,9 +18207,9 @@ This statement returns control to the calling part of the @command{awk} program. can also be used to return a value for use in the rest of the @command{awk} program. It looks like this: -@example -return @r{[}@var{expression}@r{]} -@end example +@display +@code{return} [@var{expression}] +@end display The @var{expression} part is optional. Due most likely to an oversight, POSIX does not define what the return @@ -19521,7 +19518,7 @@ function getlocaltime(time, ret, now, i) now = systime() # return date(1)-style output - ret = strftime("%a %b %e %H:%M:%S %Z %Y", now) + ret = strftime(PROCINFO["strftime"], now) # clear out target array delete time @@ -19848,10 +19845,12 @@ The @code{rewind()} function also relies on the @code{nextfile} keyword @cindex readable data files@comma{} checking @cindex files, skipping Normally, if you give @command{awk} a data file that isn't readable, -it stops with a fatal error. There are times when you -might want to just ignore such files and keep going. You can -do this by prepending the following program to your @command{awk} -program: +it stops with a fatal error. There are times when you might want to +just ignore such files and keep going.@footnote{The @code{BEGINFILE} +special pattern (@pxref{BEGINFILE/ENDFILE}) provides an alternative +mechanism for dealing with files that can't be opened. However, the +code here provides a portable solution.} You can do this by prepending +the following program to your @command{awk} program: @cindex @code{readable.awk} program @example @@ -19889,7 +19888,7 @@ skips the file (since it's no longer in the list). See also @ref{ARGC and ARGV}. @node Empty Files -@subsection Checking For Zero-length Files +@subsection Checking for Zero-length Files All known @command{awk} implementations silently skip over zero-length files. This is a by-product of @command{awk}'s implicit @@ -20362,7 +20361,7 @@ BEGIN @{ # test program if (_getopt_test) @{ while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, optarg = <%s>\n", + printf("c = <%c>, Optarg = <%s>\n", _go_c, Optarg) printf("non-option arguments:\n") for (; Optind < ARGC; Optind++) @@ -20378,32 +20377,31 @@ result of two sample runs of the test program: @example $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x} -@print{} c = <a>, optarg = <> -@print{} c = <c>, optarg = <> -@print{} c = <b>, optarg = <ARG> +@print{} c = <a>, Optarg = <> +@print{} c = <c>, Optarg = <> +@print{} c = <b>, Optarg = <ARG> @print{} non-option arguments: @print{} ARGV[3] = <bax> @print{} ARGV[4] = <-x> $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc} -@print{} c = <a>, optarg = <> +@print{} c = <a>, Optarg = <> @error{} x -- invalid option -@print{} c = <?>, optarg = <> +@print{} c = <?>, Optarg = <> @print{} non-option arguments: @print{} ARGV[4] = <xyz> @print{} ARGV[5] = <abc> @end example -In both runs, -the first @option{--} terminates the arguments to @command{awk}, so that it does -not try to interpret the @option{-a}, etc., as its own options. +In both runs, the first @option{--} terminates the arguments to +@command{awk}, so that it does not try to interpret the @option{-a}, +etc., as its own options. @quotation NOTE -After @code{getopt()} is through, it is the responsibility of the user level -code to -clear out all the elements of @code{ARGV} from 1 to @code{Optind}, -so that @command{awk} does not try to process the command-line options -as file names. +After @code{getopt()} is through, it is the responsibility of the +user level code to clear out all the elements of @code{ARGV} from 1 +to @code{Optind}, so that @command{awk} does not try to process the +command-line options as file names. @end quotation Several of the sample programs presented in @@ -20472,7 +20470,7 @@ Following is @command{pwcat}, a C program that ``cats'' the password database: /* * pwcat.c * - * Generate a printable version of the password database + * Generate a printable version of the password database. */ @c endfile @ignore @@ -20818,7 +20816,7 @@ is as follows: /* * grcat.c * - * Generate a printable version of the group database + * Generate a printable version of the group database. */ @c endfile @ignore @@ -20905,7 +20903,7 @@ it is usually empty or set to @samp{*}. @item Group ID Number The group's numeric group ID number; -this number must be unique within the file. +the association of name to number must be unique within the file. (On some systems it's a C @code{long}, and not an @code{int}. Thus we cast it to @code{long} for all cases.) @@ -21041,10 +21039,10 @@ tvpeople:*:101:david,conan,tom,joan For this reason, @code{_gr_init()} looks to see if a group name or group ID number is already seen. If it is, then the user names are -simply concatenated onto the previous list of users. (There is actually a +simply concatenated onto the previous list of users.@footnote{There is actually a subtle problem with the code just presented. Suppose that the first time there were no names. This code adds the names with -a leading comma. It also doesn't check that there is a @code{$4}.) +a leading comma. It also doesn't check that there is a @code{$4}.} Finally, @code{_gr_init()} closes the pipeline to @command{grcat}, restores @code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0}, @@ -21414,13 +21412,7 @@ function usage( e1, e2) @noindent The variables @code{e1} and @code{e2} are used so that the function -fits nicely on the -@ifnotinfo -page. -@end ifnotinfo -@ifnottex -screen. -@end ifnottex +fits nicely on the @value{PAGE}. @cindex @code{BEGIN} pattern, running @command{awk} programs and @cindex @code{FS} variable, running @command{awk} programs and @@ -21459,7 +21451,7 @@ BEGIN \ if (FS == " ") # defeat awk semantics FS = "[ ]" @} else if (c == "s") - suppress++ + suppress = 1 else usage() @} @@ -21672,9 +21664,9 @@ expressions that are almost identical to those available in @command{awk} (@pxref{Regexp}). You invoke it as follows: -@example -egrep @r{[} @var{options} @r{]} '@var{pattern}' @var{files} @dots{} -@end example +@display +@command{egrep} [@var{options}] @code{'@var{pattern}'} @var{files} @dots{} +@end display The @var{pattern} is a regular expression. In typical usage, the regular expression is quoted to prevent the shell from expanding any of the @@ -21856,6 +21848,11 @@ function endfile(file) @c endfile @end example +The @code{BEGINFILE} and @code{ENDFILE} special patterns +(@pxref{BEGINFILE/ENDFILE}) could be used, but then the program would be +@command{gawk}-specific. Additionally, this example was written before +@command{gawk} acquired @code{BEGINFILE} and @code{ENDFILE}. + The following rule does most of the work of matching lines. The variable @code{matches} is true if the line matched the pattern. If the user wants lines that did not match, the sense of @code{matches} is inverted @@ -21912,9 +21909,7 @@ there are no matches, the exit status is one; otherwise it is zero: @c file eg/prog/egrep.awk END \ @{ - if (total == 0) - exit 1 - exit 0 + exit (total == 0) @} @c endfile @end example @@ -21968,7 +21963,7 @@ corresponding user and group names. The output might look like this: @example $ @kbd{id} -@print{} uid=500(arnold) gid=500(arnold) groups=6(disk),7(lp),19(floppy) +@print{} uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo) @end example @cindex @code{PROCINFO} array, and user and group ID numbers @@ -22004,6 +21999,7 @@ numbers: # Arnold Robbins, arnold@@skeeve.com, Public Domain # May 1993 # Revised February 1996 +# Revised May 2014 @c endfile @end ignore @@ -22023,34 +22019,26 @@ BEGIN \ printf("uid=%d", uid) pw = getpwuid(uid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) if (euid != uid) @{ printf(" euid=%d", euid) pw = getpwuid(euid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) @} printf(" gid=%d", gid) pw = getgrgid(gid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) if (egid != gid) @{ printf(" egid=%d", egid) pw = getgrgid(egid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) @} for (i = 1; ("group" i) in PROCINFO; i++) @{ @@ -22059,16 +22047,20 @@ BEGIN \ group = PROCINFO["group" i] printf("%d", group) pw = getgrgid(group) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) if (("group" (i+1)) in PROCINFO) printf(",") @} print "" @} + +function pr_first_field(str, a) +@{ + split(str, a, ":") + printf("(%s)", a[1]) +@} @c endfile @end example @@ -22088,9 +22080,13 @@ The loop is also correct if there are @emph{no} supplementary groups; then the condition is false the first time it's tested, and the loop body never executes. +The @code{pr_first_field()} function simply isolates out some +code that is used repeatedly, making the whole program +slightly shorter and cleaner. + @c exercise!!! @ignore -The POSIX version of @command{id} takes arguments that control which +The POSIX version of @command{id} takes options that control which information is printed. Modify this version to accept the same arguments and perform in the same way. @end ignore @@ -22110,9 +22106,9 @@ Usage is as follows:@footnote{This is the traditional usage. The POSIX usage is different, but not relevant for what the program aims to demonstrate.} -@example -split @r{[}-@var{count}@r{]} file @r{[} @var{prefix} @r{]} -@end example +@display +@command{split} [@code{-@var{count}}] [@var{file}] [@var{prefix}] +@end display By default, the output files are named @file{xaa}, @file{xab}, and so on. Each file has @@ -22146,11 +22142,12 @@ is used as the prefix for the output file names: # # Arnold Robbins, arnold@@skeeve.com, Public Domain # May 1993 +# Revised slightly, May 2014 @c endfile @end ignore @c file eg/prog/split.awk -# usage: split [-num] [file] [outname] +# usage: split [-count] [file] [outname] BEGIN @{ outfile = "x" # default @@ -22159,7 +22156,7 @@ BEGIN @{ usage() i = 1 - if (ARGV[i] ~ /^-[[:digit:]]+$/) @{ + if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) @{ count = -ARGV[i] ARGV[i] = "" i++ @@ -22231,13 +22228,7 @@ function usage( e) @noindent The variable @code{e} is used so that the function -fits nicely on the -@ifinfo -screen. -@end ifinfo -@ifnotinfo -page. -@end ifnotinfo +fits nicely on the @value{PAGE}. This program is a bit sloppy; it relies on @command{awk} to automatically close the last file instead of doing it in an @code{END} rule. @@ -22260,9 +22251,9 @@ The @code{tee} program is known as a ``pipe fitting.'' @code{tee} copies its standard input to its standard output and also duplicates it to the files named on the command line. Its usage is as follows: -@example -tee @r{[}-a@r{]} file @dots{} -@end example +@display +@command{tee} [@option{-a}] @var{file} @dots{} +@end display The @option{-a} option tells @code{tee} to append to the named files, instead of truncating them and starting over. @@ -22387,9 +22378,9 @@ input, and by default removes duplicate lines. In other words, it only prints unique lines---hence the name. @command{uniq} has a number of options. The usage is as follows: -@example -uniq @r{[}-udc @r{[}-@var{n}@r{]]} @r{[}+@var{n}@r{]} @r{[} @var{input file} @r{[} @var{output file} @r{]]} -@end example +@display +@command{uniq} [@option{-udc} [@code{-@var{n}}]] [@code{+@var{n}}] [@var{inputfile} [@var{outputfile}]] +@end display The options for @command{uniq} are: @@ -22413,11 +22404,11 @@ by runs of spaces and/or TABs. Skip @var{n} characters before comparing lines. Any fields specified with @samp{-@var{n}} are skipped first. -@item @var{input file} +@item @var{inputfile} Data is read from the input file named on the command line, instead of from the standard input. -@item @var{output file} +@item @var{outputfile} The generated output is sent to the named output file, instead of to the standard output. @end table @@ -22654,9 +22645,9 @@ END @{ The @command{wc} (word count) utility counts lines, words, and characters in one or more input files. Its usage is as follows: -@example -wc @r{[}-lwc@r{]} @r{[} @var{files} @dots{} @r{]} -@end example +@display +@command{wc} [@option{-lwc}] [@var{files} @dots{}] +@end display If no files are specified on the command line, @command{wc} reads its standard input. If there are multiple files, it also prints total counts for all @@ -23137,19 +23128,18 @@ often used to map uppercase letters into lowercase for further processing: @end example @command{tr} requires two lists of characters.@footnote{On some older -systems, -including Solaris, -@command{tr} may require that the lists be written as -range expressions enclosed in square brackets (@samp{[a-z]}) and quoted, -to prevent the shell from attempting a file name expansion. This is -not a feature.} When processing the input, the first character in the -first list is replaced with the first character in the second list, -the second character in the first list is replaced with the second -character in the second list, and so on. If there are more characters -in the ``from'' list than in the ``to'' list, the last character of the -``to'' list is used for the remaining characters in the ``from'' list. - -Some time ago, +systems, including Solaris, the system version of @command{tr} may require +that the lists be written as range expressions enclosed in square brackets +(@samp{[a-z]}) and quoted, to prevent the shell from attempting a file +name expansion. This is not a feature.} When processing the input, the +first character in the first list is replaced with the first character +in the second list, the second character in the first list is replaced +with the second character in the second list, and so on. If there are +more characters in the ``from'' list than in the ``to'' list, the last +character of the ``to'' list is used for the remaining characters in the +``from'' list. + +Once upon a time, @c early or mid-1989! a user proposed that a transliteration function should be added to @command{gawk}. @@ -23263,13 +23253,12 @@ BEGIN @{ While it is possible to do character transliteration in a user-level function, it is not necessarily efficient, and we (the @command{gawk} authors) started to consider adding a built-in function. However, -shortly after writing this program, we learned that the System V Release 4 -@command{awk} had added the @code{toupper()} and @code{tolower()} functions -(@pxref{String Functions}). -These functions handle the vast majority of the -cases where character transliteration is necessary, and so we chose to -simply add those functions to @command{gawk} as well and then leave well -enough alone. +shortly after writing this program, we learned that Brian Kernighan +had added the @code{toupper()} and @code{tolower()} functions to his +@command{awk} (@pxref{String Functions}). These functions handle the +vast majority of the cases where character transliteration is necessary, +and so we chose to simply add those functions to @command{gawk} as well +and then leave well enough alone. An obvious improvement to this program would be to set up the @code{t_ar} array only once, in a @code{BEGIN} rule. However, this @@ -23302,7 +23291,18 @@ The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that @command{awk} splits records at blank lines (@pxref{Records}). It sets @code{MAXLINES} to 100, since 100 is the maximum number -of lines on the page (20 * 5 = 100). +of lines on the page +@iftex +(@math{20 @cdot 5 = 100}). +@end iftex +@ifnottex +@ifnotdocbook +(20 * 5 = 100). +@end ifnotdocbook +@end ifnottex +@docbook +(20 ⋅ 5 = 100). @c +@end docbook Most of the work is done in the @code{printpage()} function. The label lines are stored sequentially in the @code{line} array. But they @@ -23414,7 +23414,7 @@ END \ When working with large amounts of text, it can be interesting to know how often different words appear. For example, an author may overuse -certain words, in which case she might wish to find synonyms to substitute +certain words, in which case he or she might wish to find synonyms to substitute for words that appear too often. This @value{SUBSECTION} develops a program for counting words and presenting the frequency information in a useful format. @@ -23492,6 +23492,10 @@ END @{ @} @end example +The regexp @samp{/[^[:alnum:]_[:blank:]]/} might have been written +@samp{/[[:punct:]]/}, but then underscores would also be removed, +and we want to keep them. + Assuming we have saved this program in a file named @file{wordfreq.awk}, and that the data is in @file{file1}, the following pipeline: @@ -23603,6 +23607,7 @@ information. For example, using the following @code{print} statement in the print data[lines[i]], lines[i] @end example +@noindent This works because @code{data[$0]} is incremented each time a line is seen. @c ENDOFRANGE lidu @@ -23758,13 +23763,7 @@ BEGIN @{ IGNORECASE = 1 @} @noindent The variable @code{e} is used so that the rule -fits nicely on the -@ifnotinfo -page. -@end ifnotinfo -@ifnottex -screen. -@end ifnottex +fits nicely on the @value{PAGE}. The second rule handles moving data into files. It verifies that a file name is given in the directive. If the file named is not the @@ -23793,10 +23792,13 @@ Each element of @code{a} that is empty indicates two successive @samp{@@} symbols in the original line. For each two empty elements (@samp{@@@@} in the original file), we have to add a single @samp{@@} symbol back in.@footnote{This program was written before @command{gawk} had the -@code{gensub()} function. Consider how you might use it to simplify the code.} +@code{gensub()} function. +@c exercise!! +Consider how you might use it to simplify the code.} When the processing of the array is finished, @code{join()} is called with the -value of @code{SUBSEP}, to rejoin the pieces back into a single +value of @code{SUBSEP} (@pxref{Multidimensional}), +to rejoin the pieces back into a single line. That line is then printed to the output file: @example @@ -24321,7 +24323,7 @@ BEGIN @{ @c endfile @end example -The stack is initialized with @code{ARGV[1]}, which will be @samp{/dev/stdin}. +The stack is initialized with @code{ARGV[1]}, which will be @code{"/dev/stdin"}. The main loop comes next. Input lines are read in succession. Lines that do not start with @code{@@include} are printed verbatim. If the line does start with @code{@@include}, the file name is in @code{$2}. @@ -24431,7 +24433,7 @@ eval gawk $opts -- '"$processed_program"' '"$@@"' The @command{eval} command is a shell construct that reruns the shell's parsing process. This keeps things properly quoted. -This version of @command{igawk} represents my fifth version of this program. +This version of @command{igawk} represents the fifth version of this program. There are four key simplifications that make the program work better: @itemize @bullet @@ -24641,6 +24643,9 @@ babels beslab babery yabber @dots{} @end example + +@c Exercise: Avoid the use of external sort command + @c ENDOFRANGE anagram @node Signature Program @@ -24672,7 +24677,10 @@ X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O, O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O@}' @end example -We leave it to you to determine what the program does. +@cindex Johansen, Chris +We leave it to you to determine what the program does. (If you are +truly desperate to understand it, see Chris Johansen's explanation, +which is embedded in the Texinfo source file for this @value{DOCUMENT}.) @ignore To: "Arnold Robbins" <arnold@skeeve.com> @@ -27800,7 +27808,7 @@ partial dump of Davide Brini's obfuscated code @smallexample gawk> @kbd{dump} -@print{} # BEGIN +@print{} # BEGIN @print{} @print{} [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk] @print{} [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR] |