diff options
-rw-r--r-- | awklib/eg/lib/getopt.awk | 2 | ||||
-rw-r--r-- | awklib/eg/lib/gettime.awk | 2 | ||||
-rw-r--r-- | awklib/eg/lib/grcat.c | 2 | ||||
-rw-r--r-- | awklib/eg/lib/pwcat.c | 2 | ||||
-rw-r--r-- | awklib/eg/prog/cut.awk | 2 | ||||
-rw-r--r-- | awklib/eg/prog/egrep.awk | 4 | ||||
-rw-r--r-- | awklib/eg/prog/id.awk | 37 | ||||
-rw-r--r-- | awklib/eg/prog/split.awk | 5 | ||||
-rw-r--r-- | doc/ChangeLog | 4 | ||||
-rw-r--r-- | doc/gawk.info | 1210 | ||||
-rw-r--r-- | doc/gawk.texi | 310 | ||||
-rw-r--r-- | doc/gawktexi.in | 310 |
12 files changed, 964 insertions, 926 deletions
diff --git a/awklib/eg/lib/getopt.awk b/awklib/eg/lib/getopt.awk index 4283a7e1..db957ceb 100644 --- a/awklib/eg/lib/getopt.awk +++ b/awklib/eg/lib/getopt.awk @@ -70,7 +70,7 @@ BEGIN { # test program if (_getopt_test) { while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, optarg = <%s>\n", + printf("c = <%c>, Optarg = <%s>\n", _go_c, Optarg) printf("non-option arguments:\n") for (; Optind < ARGC; Optind++) diff --git a/awklib/eg/lib/gettime.awk b/awklib/eg/lib/gettime.awk index 4cb56330..3da9c8ab 100644 --- a/awklib/eg/lib/gettime.awk +++ b/awklib/eg/lib/gettime.awk @@ -31,7 +31,7 @@ function getlocaltime(time, ret, now, i) now = systime() # return date(1)-style output - ret = strftime("%a %b %e %H:%M:%S %Z %Y", now) + ret = strftime(PROCINFO["strftime"], now) # clear out target array delete time diff --git a/awklib/eg/lib/grcat.c b/awklib/eg/lib/grcat.c index ff2913a1..7d6b6a74 100644 --- a/awklib/eg/lib/grcat.c +++ b/awklib/eg/lib/grcat.c @@ -1,7 +1,7 @@ /* * grcat.c * - * Generate a printable version of the group database + * Generate a printable version of the group database. */ /* * Arnold Robbins, arnold@skeeve.com, May 1993 diff --git a/awklib/eg/lib/pwcat.c b/awklib/eg/lib/pwcat.c index 910e0329..934ef34e 100644 --- a/awklib/eg/lib/pwcat.c +++ b/awklib/eg/lib/pwcat.c @@ -1,7 +1,7 @@ /* * pwcat.c * - * Generate a printable version of the password database + * Generate a printable version of the password database. */ /* * Arnold Robbins, arnold@skeeve.com, May 1993 diff --git a/awklib/eg/prog/cut.awk b/awklib/eg/prog/cut.awk index 1399411e..09ba1f7c 100644 --- a/awklib/eg/prog/cut.awk +++ b/awklib/eg/prog/cut.awk @@ -43,7 +43,7 @@ BEGIN \ if (FS == " ") # defeat awk semantics FS = "[ ]" } else if (c == "s") - suppress++ + suppress = 1 else usage() } diff --git a/awklib/eg/prog/egrep.awk b/awklib/eg/prog/egrep.awk index 56d199c8..86b3cfda 100644 --- a/awklib/eg/prog/egrep.awk +++ b/awklib/eg/prog/egrep.awk @@ -90,9 +90,7 @@ function endfile(file) } END \ { - if (total == 0) - exit 1 - exit 0 + exit (total == 0) } function usage( e) { diff --git a/awklib/eg/prog/id.awk b/awklib/eg/prog/id.awk index 8b60a245..cf744447 100644 --- a/awklib/eg/prog/id.awk +++ b/awklib/eg/prog/id.awk @@ -5,6 +5,7 @@ # Arnold Robbins, arnold@skeeve.com, Public Domain # May 1993 # Revised February 1996 +# Revised May 2014 # output is: # uid=12(foo) euid=34(bar) gid=3(baz) \ @@ -19,34 +20,26 @@ BEGIN \ printf("uid=%d", uid) pw = getpwuid(uid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) if (euid != uid) { printf(" euid=%d", euid) pw = getpwuid(euid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) } printf(" gid=%d", gid) pw = getgrgid(gid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) if (egid != gid) { printf(" egid=%d", egid) pw = getgrgid(egid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) } for (i = 1; ("group" i) in PROCINFO; i++) { @@ -55,13 +48,17 @@ BEGIN \ group = PROCINFO["group" i] printf("%d", group) pw = getgrgid(group) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) if (("group" (i+1)) in PROCINFO) printf(",") } print "" } + +function pr_first_field(str, a) +{ + split(str, a, ":") + printf("(%s)", a[1]) +} diff --git a/awklib/eg/prog/split.awk b/awklib/eg/prog/split.awk index c907530b..bcc73ae6 100644 --- a/awklib/eg/prog/split.awk +++ b/awklib/eg/prog/split.awk @@ -4,8 +4,9 @@ # # Arnold Robbins, arnold@skeeve.com, Public Domain # May 1993 +# Revised slightly, May 2014 -# usage: split [-num] [file] [outname] +# usage: split [-count] [file] [outname] BEGIN { outfile = "x" # default @@ -14,7 +15,7 @@ BEGIN { usage() i = 1 - if (ARGV[i] ~ /^-[[:digit:]]+$/) { + if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) { count = -ARGV[i] ARGV[i] = "" i++ diff --git a/doc/ChangeLog b/doc/ChangeLog index 167bb9fe..800bb4d8 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +2014-05-15 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Fix displays for docbook, edits through Chapter 11. + 2014-05-14 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Fix real preface for docbook. diff --git a/doc/gawk.info b/doc/gawk.info index 4c824f32..a21116f2 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -2276,8 +2276,8 @@ There are two ways to run `awk'--with an explicit program or with one or more program files. Here are templates for both of them; items enclosed in [...] in these templates are optional: - awk [OPTIONS] -f progfile [`--'] FILE ... - awk [OPTIONS] [`--'] 'PROGRAM' FILE ... + `awk' [OPTIONS] `-f' PROGFILE [`--'] FILE ... + `awk' [OPTIONS] [`--'] `'PROGRAM'' FILE ... Besides traditional one-letter POSIX-style options, `gawk' also supports GNU long options. @@ -8933,10 +8933,10 @@ which (but not both) may be omitted. The purpose of the "action" is to tell `awk' what to do once a match for the pattern is found. Thus, in outline, an `awk' program generally looks like this: - [PATTERN] { ACTION } - PATTERN [{ ACTION }] + [PATTERN] `{ ACTION }' + PATTERN [`{ ACTION }'] ... - function NAME(ARGS) { ... } + `function NAME(ARGS) { ... }' ... An action consists of one or more `awk' "statements", enclosed in @@ -9024,7 +9024,7 @@ File: gawk.info, Node: If Statement, Next: While Statement, Up: Statements The `if'-`else' statement is `awk''s decision-making statement. It looks like this: - if (CONDITION) THEN-BODY [else ELSE-BODY] + `if (CONDITION) THEN-BODY' [`else ELSE-BODY'] The CONDITION is an expression that controls what the rest of the statement does. If the CONDITION is true, THEN-BODY is executed; @@ -9507,7 +9507,7 @@ The `exit' statement causes `awk' to immediately stop executing the current rule and to stop processing input; any remaining input is ignored. The `exit' statement is written as follows: - exit [RETURN CODE] + `exit' [RETURN CODE] When an `exit' statement is executed from a `BEGIN' rule, the program stops processing everything immediately. No input records are @@ -12794,10 +12794,10 @@ starting to execute any of it. The definition of a function named NAME looks like this: - function NAME([PARAMETER-LIST]) - { + `function' NAME`('[PARAMETER-LIST]`)' + `{' BODY-OF-FUNCTION - } + `}' Here, NAME is the name of the function to define. A valid function name is like a valid variable name: a sequence of letters, digits, and @@ -13240,7 +13240,7 @@ control to the calling part of the `awk' program. It can also be used to return a value for use in the rest of the `awk' program. It looks like this: - return [EXPRESSION] + `return' [EXPRESSION] The EXPRESSION part is optional. Due most likely to an oversight, POSIX does not define what the return value is if you omit the @@ -14258,7 +14258,7 @@ current time formatted in the same way as the `date' utility: now = systime() # return date(1)-style output - ret = strftime("%a %b %e %H:%M:%S %Z %Y", now) + ret = strftime(PROCINFO["strftime"], now) # clear out target array delete time @@ -14518,8 +14518,8 @@ File: gawk.info, Node: File Checking, Next: Empty Files, Prev: Rewind Functio Normally, if you give `awk' a data file that isn't readable, it stops with a fatal error. There are times when you might want to just ignore -such files and keep going. You can do this by prepending the following -program to your `awk' program: +such files and keep going.(1) You can do this by prepending the +following program to your `awk' program: # readable.awk --- library file to skip over unreadable files @@ -14539,10 +14539,16 @@ program to your `awk' program: element from `ARGV' with `delete' skips the file (since it's no longer in the list). See also *note ARGC and ARGV::. + ---------- Footnotes ---------- + + (1) The `BEGINFILE' special pattern (*note BEGINFILE/ENDFILE::) +provides an alternative mechanism for dealing with files that can't be +opened. However, the code here provides a portable solution. + File: gawk.info, Node: Empty Files, Next: Ignoring Assigns, Prev: File Checking, Up: Data File Management -10.3.4 Checking For Zero-length Files +10.3.4 Checking for Zero-length Files ------------------------------------- All known `awk' implementations silently skip over zero-length files. @@ -14887,7 +14893,7 @@ is in `ARGV[0]': # test program if (_getopt_test) { while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, optarg = <%s>\n", + printf("c = <%c>, Optarg = <%s>\n", _go_c, Optarg) printf("non-option arguments:\n") for (; Optind < ARGC; Optind++) @@ -14900,17 +14906,17 @@ is in `ARGV[0]': result of two sample runs of the test program: $ awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x - -| c = <a>, optarg = <> - -| c = <c>, optarg = <> - -| c = <b>, optarg = <ARG> + -| c = <a>, Optarg = <> + -| c = <c>, Optarg = <> + -| c = <b>, Optarg = <ARG> -| non-option arguments: -| ARGV[3] = <bax> -| ARGV[4] = <-x> $ awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc - -| c = <a>, optarg = <> + -| c = <a>, Optarg = <> error--> x -- invalid option - -| c = <?>, optarg = <> + -| c = <?>, Optarg = <> -| non-option arguments: -| ARGV[4] = <xyz> -| ARGV[5] = <abc> @@ -14969,7 +14975,7 @@ that "cats" the password database: /* * pwcat.c * - * Generate a printable version of the password database + * Generate a printable version of the password database. */ #include <stdio.h> #include <pwd.h> @@ -15194,7 +15200,7 @@ group database, is as follows: /* * grcat.c * - * Generate a printable version of the group database + * Generate a printable version of the group database. */ #include <stdio.h> #include <grp.h> @@ -15230,9 +15236,10 @@ Group Password used; it is usually empty or set to `*'. Group ID Number - The group's numeric group ID number; this number must be unique - within the file. (On some systems it's a C `long', and not an - `int'. Thus we cast it to `long' for all cases.) + The group's numeric group ID number; the association of name to + number must be unique within the file. (On some systems it's a C + `long', and not an `int'. Thus we cast it to `long' for all + cases.) Group Member List A comma-separated list of user names. These users are members of @@ -15341,10 +15348,7 @@ following: For this reason, `_gr_init()' looks to see if a group name or group ID number is already seen. If it is, then the user names are simply -concatenated onto the previous list of users. (There is actually a -subtle problem with the code just presented. Suppose that the first -time there were no names. This code adds the names with a leading -comma. It also doesn't check that there is a `$4'.) +concatenated onto the previous list of users.(1) Finally, `_gr_init()' closes the pipeline to `grcat', restores `FS' (and `FIELDWIDTHS' or `FPAT' if necessary), `RS', and `$0', initializes @@ -15409,6 +15413,12 @@ very simple, relying on `awk''s associative arrays to do work. The `id' program in *note Id Program::, uses these functions. + ---------- Footnotes ---------- + + (1) There is actually a subtle problem with the code just presented. +Suppose that the first time there were no names. This code adds the +names with a leading comma. It also doesn't check that there is a `$4'. + File: gawk.info, Node: Walking Arrays, Prev: Group Functions, Up: Library Functions @@ -15645,7 +15655,7 @@ by characters, the output field separator is set to the null string: if (FS == " ") # defeat awk semantics FS = "[ ]" } else if (c == "s") - suppress++ + suppress = 1 else usage() } @@ -15814,7 +15824,7 @@ The `egrep' utility searches files for patterns. It uses regular expressions that are almost identical to those available in `awk' (*note Regexp::). You invoke it as follows: - egrep [ OPTIONS ] 'PATTERN' FILES ... + `egrep' [OPTIONS] `'PATTERN'' FILES ... The PATTERN is a regular expression. In typical usage, the regular expression is quoted to prevent the shell from expanding any of the @@ -15958,6 +15968,11 @@ know the total number of lines that matched the pattern: total += fcount } + The `BEGINFILE' and `ENDFILE' special patterns (*note +BEGINFILE/ENDFILE::) could be used, but then the program would be +`gawk'-specific. Additionally, this example was written before `gawk' +acquired `BEGINFILE' and `ENDFILE'. + The following rule does most of the work of matching lines. The variable `matches' is true if the line matched the pattern. If the user wants lines that did not match, the sense of `matches' is inverted @@ -16005,9 +16020,7 @@ there are no matches, the exit status is one; otherwise it is zero: END \ { - if (total == 0) - exit 1 - exit 0 + exit (total == 0) } The `usage()' function prints a usage message in case of invalid @@ -16049,7 +16062,7 @@ different from the real ones. If possible, `id' also supplies the corresponding user and group names. The output might look like this: $ id - -| uid=500(arnold) gid=500(arnold) groups=6(disk),7(lp),19(floppy) + -| uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo) This information is part of what is provided by `gawk''s `PROCINFO' array (*note Built-in Variables::). However, the `id' utility provides @@ -16082,34 +16095,26 @@ and the group numbers: printf("uid=%d", uid) pw = getpwuid(uid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) if (euid != uid) { printf(" euid=%d", euid) pw = getpwuid(euid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) } printf(" gid=%d", gid) pw = getgrgid(gid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) if (egid != gid) { printf(" egid=%d", egid) pw = getgrgid(egid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) } for (i = 1; ("group" i) in PROCINFO; i++) { @@ -16118,10 +16123,8 @@ and the group numbers: group = PROCINFO["group" i] printf("%d", group) pw = getgrgid(group) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } + if (pw != "") + pr_first_field(pw) if (("group" (i+1)) in PROCINFO) printf(",") } @@ -16129,6 +16132,12 @@ and the group numbers: print "" } + function pr_first_field(str, a) + { + split(str, a, ":") + printf("(%s)", a[1]) + } + The test in the `for' loop is worth noting. Any supplementary groups in the `PROCINFO' array have the indices `"group1"' through `"groupN"' for some N, i.e., the total number of supplementary groups. @@ -16143,6 +16152,10 @@ the last group in the array and the loop exits. then the condition is false the first time it's tested, and the loop body never executes. + The `pr_first_field()' function simply isolates out some code that +is used repeatedly, making the whole program slightly shorter and +cleaner. + File: gawk.info, Node: Split Program, Next: Tee Program, Prev: Id Program, Up: Clones @@ -16152,7 +16165,7 @@ File: gawk.info, Node: Split Program, Next: Tee Program, Prev: Id Program, U The `split' program splits large text files into smaller pieces. Usage is as follows:(1) - split [-COUNT] file [ PREFIX ] + `split' [`-COUNT'] [FILE] [PREFIX] By default, the output files are named `xaa', `xab', and so on. Each file has 1000 lines in it, with the likely exception of the last file. @@ -16176,7 +16189,7 @@ output file names: # split.awk --- do split in awk # # Requires ord() and chr() library functions - # usage: split [-num] [file] [outname] + # usage: split [-count] [file] [outname] BEGIN { outfile = "x" # default @@ -16185,7 +16198,7 @@ output file names: usage() i = 1 - if (ARGV[i] ~ /^-[[:digit:]]+$/) { + if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) { count = -ARGV[i] ARGV[i] = "" i++ @@ -16261,7 +16274,7 @@ The `tee' program is known as a "pipe fitting." `tee' copies its standard input to its standard output and also duplicates it to the files named on the command line. Its usage is as follows: - tee [-a] file ... + `tee' [`-a'] FILE ... The `-a' option tells `tee' to append to the named files, instead of truncating them and starting over. @@ -16350,7 +16363,7 @@ and by default removes duplicate lines. In other words, it only prints unique lines--hence the name. `uniq' has a number of options. The usage is as follows: - uniq [-udc [-N]] [+N] [ INPUT FILE [ OUTPUT FILE ]] + `uniq' [`-udc' [`-N']] [`+N'] [INPUTFILE [OUTPUTFILE]] The options for `uniq' are: @@ -16373,11 +16386,11 @@ usage is as follows: Skip N characters before comparing lines. Any fields specified with `-N' are skipped first. -`INPUT FILE' +`INPUTFILE' Data is read from the input file named on the command line, instead of from the standard input. -`OUTPUT FILE' +`OUTPUTFILE' The generated output is sent to the named output file, instead of to the standard output. @@ -16567,7 +16580,7 @@ File: gawk.info, Node: Wc Program, Prev: Uniq Program, Up: Clones The `wc' (word count) utility counts lines, words, and characters in one or more input files. Its usage is as follows: - wc [-lwc] [ FILES ... ] + `wc' [`-lwc'] [FILES ...] If no files are specified on the command line, `wc' reads its standard input. If there are multiple files, it also prints total @@ -16933,11 +16946,11 @@ there are more characters in the "from" list than in the "to" list, the last character of the "to" list is used for the remaining characters in the "from" list. - Some time ago, a user proposed that a transliteration function should -be added to `gawk'. The following program was written to prove that -character transliteration could be done with a user-level function. -This program is not as complete as the system `tr' utility but it does -most of the job. + Once upon a time, a user proposed that a transliteration function +should be added to `gawk'. The following program was written to prove +that character transliteration could be done with a user-level +function. This program is not as complete as the system `tr' utility +but it does most of the job. The `translate' program demonstrates one of the few weaknesses of standard `awk': dealing with individual characters is very painful, @@ -17018,8 +17031,8 @@ record: While it is possible to do character transliteration in a user-level function, it is not necessarily efficient, and we (the `gawk' authors) started to consider adding a built-in function. However, shortly after -writing this program, we learned that the System V Release 4 `awk' had -added the `toupper()' and `tolower()' functions (*note String +writing this program, we learned that Brian Kernighan had added the +`toupper()' and `tolower()' functions to his `awk' (*note String Functions::). These functions handle the vast majority of the cases where character transliteration is necessary, and so we chose to simply add those functions to `gawk' as well and then leave well enough alone. @@ -17031,10 +17044,10 @@ program. ---------- Footnotes ---------- - (1) On some older systems, including Solaris, `tr' may require that -the lists be written as range expressions enclosed in square brackets -(`[a-z]') and quoted, to prevent the shell from attempting a file name -expansion. This is not a feature. + (1) On some older systems, including Solaris, the system version of +`tr' may require that the lists be written as range expressions +enclosed in square brackets (`[a-z]') and quoted, to prevent the shell +from attempting a file name expansion. This is not a feature. (2) This program was written before `gawk' acquired the ability to split each character in a string into separate array elements. @@ -17154,7 +17167,7 @@ File: gawk.info, Node: Word Sorting, Next: History Sorting, Prev: Labels Prog When working with large amounts of text, it can be interesting to know how often different words appear. For example, an author may overuse -certain words, in which case she might wish to find synonyms to +certain words, in which case he or she might wish to find synonyms to substitute for words that appear too often. This node develops a program for counting words and presenting the frequency information in a useful format. @@ -17217,6 +17230,10 @@ script. Here is the new version of the program: printf "%s\t%d\n", word, freq[word] } + The regexp `/[^[:alnum:]_[:blank:]]/' might have been written +`/[[:punct:]]/', but then underscores would also be removed, and we +want to keep them. + Assuming we have saved this program in a file named `wordfreq.awk', and that the data is in `file1', the following pipeline: @@ -17294,8 +17311,7 @@ information. For example, using the following `print' statement in the print data[lines[i]], lines[i] - This works because `data[$0]' is incremented each time a line is -seen. +This works because `data[$0]' is incremented each time a line is seen. File: gawk.info, Node: Extract Program, Next: Simple Sed, Prev: History Sorting, Up: Miscellaneous Programs @@ -17426,8 +17442,9 @@ elements (`@@' in the original file), we have to add a single `@' symbol back in.(1) When the processing of the array is finished, `join()' is called -with the value of `SUBSEP', to rejoin the pieces back into a single -line. That line is then printed to the output file: +with the value of `SUBSEP' (*note Multidimensional::), to rejoin the +pieces back into a single line. That line is then printed to the +output file: /^@c(omment)?[ \t]+file/ \ { @@ -17496,7 +17513,7 @@ closing the open file: ---------- Footnotes ---------- (1) This program was written before `gawk' had the `gensub()' -function. Consider how you might use it to simplify the code. +function. Consider how you might use it to simplify the code. File: gawk.info, Node: Simple Sed, Next: Igawk Program, Prev: Extract Program, Up: Miscellaneous Programs @@ -17835,12 +17852,12 @@ which represents the current directory: pathlist[i] = "." } - The stack is initialized with `ARGV[1]', which will be `/dev/stdin'. -The main loop comes next. Input lines are read in succession. Lines -that do not start with `@include' are printed verbatim. If the line -does start with `@include', the file name is in `$2'. `pathto()' is -called to generate the full path. If it cannot, then the program -prints an error message and continues. + The stack is initialized with `ARGV[1]', which will be +`"/dev/stdin"'. The main loop comes next. Input lines are read in +succession. Lines that do not start with `@include' are printed +verbatim. If the line does start with `@include', the file name is in +`$2'. `pathto()' is called to generate the full path. If it cannot, +then the program prints an error message and continues. The next thing to check is if the file is included already. The `processed' array is indexed by the full file name of each included @@ -17917,7 +17934,7 @@ supplied. The `eval' command is a shell construct that reruns the shell's parsing process. This keeps things properly quoted. - This version of `igawk' represents my fifth version of this program. + This version of `igawk' represents the fifth version of this program. There are four key simplifications that make the program work better: * Using `@include' even for the files named with `-f' makes building @@ -18091,7 +18108,9 @@ supplies the following copyright terms: X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O, O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O}' - We leave it to you to determine what the program does. + We leave it to you to determine what the program does. (If you are +truly desperate to understand it, see Chris Johansen's explanation, +which is embedded in the Texinfo source file for this Info file.) File: gawk.info, Node: Advanced Features, Next: Internationalization, Prev: Sample Programs, Up: Top @@ -20421,7 +20440,7 @@ categories, as follows: Program::) demonstrates: gawk> dump - -| # BEGIN + -| # BEGIN -| -| [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk] -| [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR] @@ -30130,7 +30149,7 @@ Index * Menu: * ! (exclamation point), ! operator: Boolean Ops. (line 67) -* ! (exclamation point), ! operator <1>: Egrep Program. (line 170) +* ! (exclamation point), ! operator <1>: Egrep Program. (line 175) * ! (exclamation point), ! operator <2>: Ranges. (line 48) * ! (exclamation point), ! operator: Precedence. (line 52) * ! (exclamation point), != operator <1>: Precedence. (line 65) @@ -30366,7 +30385,7 @@ Index (line 38) * \ (backslash), as field separator: Command Line Field Separator. (line 27) -* \ (backslash), continuing lines and <1>: Egrep Program. (line 220) +* \ (backslash), continuing lines and <1>: Egrep Program. (line 223) * \ (backslash), continuing lines and: Statements/Lines. (line 19) * \ (backslash), continuing lines and, comments and: Statements/Lines. (line 76) @@ -30394,7 +30413,7 @@ Index * _ (underscore), in names of private variables: Library Names. (line 29) * _ (underscore), translatable string: Programmer i18n. (line 69) -* _gr_init() user-defined function: Group Functions. (line 82) +* _gr_init() user-defined function: Group Functions. (line 83) * _ord_init() user-defined function: Ordinal Functions. (line 16) * _pw_init() user-defined function: Passwd Functions. (line 105) * accessing fields: Fields. (line 6) @@ -30640,7 +30659,7 @@ Index (line 38) * backslash (\), as field separator: Command Line Field Separator. (line 27) -* backslash (\), continuing lines and <1>: Egrep Program. (line 220) +* backslash (\), continuing lines and <1>: Egrep Program. (line 223) * backslash (\), continuing lines and: Statements/Lines. (line 19) * backslash (\), continuing lines and, comments and: Statements/Lines. (line 76) @@ -31296,7 +31315,7 @@ Index * END pattern, and profiling: Profiling. (line 62) * END pattern, assert() user-defined function and: Assert Function. (line 75) -* END pattern, backslash continuation and: Egrep Program. (line 220) +* END pattern, backslash continuation and: Egrep Program. (line 223) * END pattern, Boolean patterns and: Expression Patterns. (line 70) * END pattern, exit statement and: Exit Statement. (line 12) * END pattern, next/nextfile statements and <1>: Next Statement. @@ -31308,8 +31327,8 @@ Index * ENDFILE pattern: BEGINFILE/ENDFILE. (line 6) * ENDFILE pattern, Boolean patterns and: Expression Patterns. (line 70) * endfile() user-defined function: Filetrans Function. (line 62) -* endgrent() function (C library): Group Functions. (line 215) -* endgrent() user-defined function: Group Functions. (line 218) +* endgrent() function (C library): Group Functions. (line 213) +* endgrent() user-defined function: Group Functions. (line 216) * endpwent() function (C library): Passwd Functions. (line 210) * endpwent() user-defined function: Passwd Functions. (line 213) * ENVIRON array: Auto-set. (line 60) @@ -31342,7 +31361,7 @@ Index * evaluation order, concatenation: Concatenation. (line 41) * evaluation order, functions: Calling Built-in. (line 30) * examining fields: Fields. (line 6) -* exclamation point (!), ! operator <1>: Egrep Program. (line 170) +* exclamation point (!), ! operator <1>: Egrep Program. (line 175) * exclamation point (!), ! operator <2>: Precedence. (line 52) * exclamation point (!), ! operator: Boolean Ops. (line 67) * exclamation point (!), != operator <1>: Precedence. (line 65) @@ -31738,15 +31757,15 @@ Index * getaddrinfo() function (C library): TCP/IP Networking. (line 38) * getgrent() function (C library): Group Functions. (line 6) * getgrent() user-defined function: Group Functions. (line 6) -* getgrgid() function (C library): Group Functions. (line 186) -* getgrgid() user-defined function: Group Functions. (line 189) -* getgrnam() function (C library): Group Functions. (line 175) -* getgrnam() user-defined function: Group Functions. (line 180) -* getgruser() function (C library): Group Functions. (line 195) -* getgruser() function, user-defined: Group Functions. (line 198) +* getgrgid() function (C library): Group Functions. (line 184) +* getgrgid() user-defined function: Group Functions. (line 187) +* getgrnam() function (C library): Group Functions. (line 173) +* getgrnam() user-defined function: Group Functions. (line 178) +* getgruser() function (C library): Group Functions. (line 193) +* getgruser() function, user-defined: Group Functions. (line 196) * getline command: Reading Files. (line 20) * getline command, _gr_init() user-defined function: Group Functions. - (line 82) + (line 83) * getline command, _pw_init() function: Passwd Functions. (line 154) * getline command, coprocesses, using from <1>: Close Files And Pipes. (line 6) @@ -31947,6 +31966,7 @@ Index * Java programming language: Glossary. (line 380) * jawk: Other Versions. (line 112) * Jedi knights: Undocumented. (line 6) +* Johansen, Chris: Signature Program. (line 25) * join() user-defined function: Join Function. (line 18) * Kahrs, Ju"rgen <1>: Contributors. (line 70) * Kahrs, Ju"rgen: Acknowledgments. (line 60) @@ -33245,494 +33265,496 @@ Node: Other Features105547 Node: When106475 Node: Invoking Gawk108623 Node: Command Line110086 -Node: Options110869 -Ref: Options-Footnote-1126681 -Node: Other Arguments126706 -Node: Naming Standard Input129368 -Node: Environment Variables130462 -Node: AWKPATH Variable131020 -Ref: AWKPATH Variable-Footnote-1133798 -Ref: AWKPATH Variable-Footnote-2133843 -Node: AWKLIBPATH Variable134103 -Node: Other Environment Variables134862 -Node: Exit Status138517 -Node: Include Files139192 -Node: Loading Shared Libraries142770 -Node: Obsolete144153 -Node: Undocumented144850 -Node: Regexp145092 -Node: Regexp Usage146481 -Node: Escape Sequences148514 -Node: Regexp Operators154181 -Ref: Regexp Operators-Footnote-1161661 -Ref: Regexp Operators-Footnote-2161808 -Node: Bracket Expressions161906 -Ref: table-char-classes163796 -Node: GNU Regexp Operators166319 -Node: Case-sensitivity170042 -Ref: Case-sensitivity-Footnote-1172934 -Ref: Case-sensitivity-Footnote-2173169 -Node: Leftmost Longest173277 -Node: Computed Regexps174478 -Node: Reading Files177827 -Node: Records179829 -Node: awk split records180564 -Node: gawk split records185422 -Ref: gawk split records-Footnote-1189943 -Node: Fields189980 -Ref: Fields-Footnote-1192944 -Node: Nonconstant Fields193030 -Ref: Nonconstant Fields-Footnote-1195260 -Node: Changing Fields195462 -Node: Field Separators201416 -Node: Default Field Splitting204118 -Node: Regexp Field Splitting205235 -Node: Single Character Fields208576 -Node: Command Line Field Separator209635 -Node: Full Line Fields212977 -Ref: Full Line Fields-Footnote-1213485 -Node: Field Splitting Summary213531 -Ref: Field Splitting Summary-Footnote-1216630 -Node: Constant Size216731 -Node: Splitting By Content221338 -Ref: Splitting By Content-Footnote-1225088 -Node: Multiple Line225128 -Ref: Multiple Line-Footnote-1230984 -Node: Getline231163 -Node: Plain Getline233379 -Node: Getline/Variable235474 -Node: Getline/File236621 -Node: Getline/Variable/File238005 -Ref: Getline/Variable/File-Footnote-1239604 -Node: Getline/Pipe239691 -Node: Getline/Variable/Pipe242390 -Node: Getline/Coprocess243497 -Node: Getline/Variable/Coprocess244749 -Node: Getline Notes245486 -Node: Getline Summary248290 -Ref: table-getline-variants248698 -Node: Read Timeout249610 -Ref: Read Timeout-Footnote-1253437 -Node: Command line directories253495 -Node: Printing254377 -Node: Print256008 -Node: Print Examples257349 -Node: Output Separators260128 -Node: OFMT262144 -Node: Printf263502 -Node: Basic Printf264408 -Node: Control Letters265947 -Node: Format Modifiers269801 -Node: Printf Examples275828 -Node: Redirection278535 -Node: Special Files285507 -Node: Special FD286040 -Ref: Special FD-Footnote-1289664 -Node: Special Network289738 -Node: Special Caveats290588 -Node: Close Files And Pipes291384 -Ref: Close Files And Pipes-Footnote-1298522 -Ref: Close Files And Pipes-Footnote-2298670 -Node: Expressions298820 -Node: Values299952 -Node: Constants300628 -Node: Scalar Constants301308 -Ref: Scalar Constants-Footnote-1302167 -Node: Nondecimal-numbers302417 -Node: Regexp Constants305417 -Node: Using Constant Regexps305892 -Node: Variables308962 -Node: Using Variables309617 -Node: Assignment Options311341 -Node: Conversion313216 -Ref: table-locale-affects318652 -Ref: Conversion-Footnote-1319276 -Node: All Operators319385 -Node: Arithmetic Ops320015 -Node: Concatenation322520 -Ref: Concatenation-Footnote-1325316 -Node: Assignment Ops325436 -Ref: table-assign-ops330419 -Node: Increment Ops331736 -Node: Truth Values and Conditions335174 -Node: Truth Values336257 -Node: Typing and Comparison337306 -Node: Variable Typing338099 -Ref: Variable Typing-Footnote-1341999 -Node: Comparison Operators342121 -Ref: table-relational-ops342531 -Node: POSIX String Comparison346079 -Ref: POSIX String Comparison-Footnote-1347163 -Node: Boolean Ops347301 -Ref: Boolean Ops-Footnote-1351371 -Node: Conditional Exp351462 -Node: Function Calls353189 -Node: Precedence356947 -Node: Locales360616 -Node: Patterns and Actions362219 -Node: Pattern Overview363273 -Node: Regexp Patterns364950 -Node: Expression Patterns365493 -Node: Ranges369274 -Node: BEGIN/END372380 -Node: Using BEGIN/END373142 -Ref: Using BEGIN/END-Footnote-1375878 -Node: I/O And BEGIN/END375984 -Node: BEGINFILE/ENDFILE378269 -Node: Empty381205 -Node: Using Shell Variables381522 -Node: Action Overview383805 -Node: Statements386150 -Node: If Statement388004 -Node: While Statement389503 -Node: Do Statement391547 -Node: For Statement392703 -Node: Switch Statement395855 -Node: Break Statement397958 -Node: Continue Statement400013 -Node: Next Statement401806 -Node: Nextfile Statement404196 -Node: Exit Statement406851 -Node: Built-in Variables409253 -Node: User-modified410349 -Ref: User-modified-Footnote-1418034 -Node: Auto-set418096 -Ref: Auto-set-Footnote-1430998 -Ref: Auto-set-Footnote-2431203 -Node: ARGC and ARGV431259 -Node: Arrays435113 -Node: Array Basics436611 -Node: Array Intro437437 -Ref: figure-array-elements439410 -Node: Reference to Elements441817 -Node: Assigning Elements444090 -Node: Array Example444581 -Node: Scanning an Array446313 -Node: Controlling Scanning449328 -Ref: Controlling Scanning-Footnote-1454501 -Node: Delete454817 -Ref: Delete-Footnote-1457582 -Node: Numeric Array Subscripts457639 -Node: Uninitialized Subscripts459822 -Node: Multidimensional461447 -Node: Multiscanning464540 -Node: Arrays of Arrays466129 -Node: Functions470769 -Node: Built-in471588 -Node: Calling Built-in472666 -Node: Numeric Functions474654 -Ref: Numeric Functions-Footnote-1478488 -Ref: Numeric Functions-Footnote-2478845 -Ref: Numeric Functions-Footnote-3478893 -Node: String Functions479162 -Ref: String Functions-Footnote-1502173 -Ref: String Functions-Footnote-2502302 -Ref: String Functions-Footnote-3502550 -Node: Gory Details502637 -Ref: table-sub-escapes504306 -Ref: table-sub-posix-92505660 -Ref: table-sub-proposed507011 -Ref: table-posix-sub508365 -Ref: table-gensub-escapes509910 -Ref: Gory Details-Footnote-1511086 -Ref: Gory Details-Footnote-2511137 -Node: I/O Functions511288 -Ref: I/O Functions-Footnote-1518411 -Node: Time Functions518558 -Ref: Time Functions-Footnote-1529022 -Ref: Time Functions-Footnote-2529090 -Ref: Time Functions-Footnote-3529248 -Ref: Time Functions-Footnote-4529359 -Ref: Time Functions-Footnote-5529471 -Ref: Time Functions-Footnote-6529698 -Node: Bitwise Functions529964 -Ref: table-bitwise-ops530526 -Ref: Bitwise Functions-Footnote-1534771 -Node: Type Functions534955 -Node: I18N Functions536097 -Node: User-defined537742 -Node: Definition Syntax538546 -Ref: Definition Syntax-Footnote-1543461 -Node: Function Example543530 -Ref: Function Example-Footnote-1546174 -Node: Function Caveats546196 -Node: Calling A Function546714 -Node: Variable Scope547669 -Node: Pass By Value/Reference550657 -Node: Return Statement554165 -Node: Dynamic Typing557147 -Node: Indirect Calls558076 -Node: Library Functions567763 -Ref: Library Functions-Footnote-1571276 -Ref: Library Functions-Footnote-2571419 -Node: Library Names571590 -Ref: Library Names-Footnote-1575063 -Ref: Library Names-Footnote-2575283 -Node: General Functions575369 -Node: Strtonum Function576397 -Node: Assert Function579327 -Node: Round Function582653 -Node: Cliff Random Function584194 -Node: Ordinal Functions585210 -Ref: Ordinal Functions-Footnote-1588287 -Ref: Ordinal Functions-Footnote-2588539 -Node: Join Function588750 -Ref: Join Function-Footnote-1590521 -Node: Getlocaltime Function590721 -Node: Readfile Function594462 -Node: Data File Management596301 -Node: Filetrans Function596933 -Node: Rewind Function601002 -Node: File Checking602389 -Node: Empty Files603483 -Node: Ignoring Assigns605713 -Node: Getopt Function607267 -Ref: Getopt Function-Footnote-1618570 -Node: Passwd Functions618773 -Ref: Passwd Functions-Footnote-1627751 -Node: Group Functions627839 -Node: Walking Arrays635923 -Node: Sample Programs638059 -Node: Running Examples638733 -Node: Clones639461 -Node: Cut Program640685 -Node: Egrep Program650536 -Ref: Egrep Program-Footnote-1658309 -Node: Id Program658419 -Node: Split Program662068 -Ref: Split Program-Footnote-1665587 -Node: Tee Program665715 -Node: Uniq Program668518 -Node: Wc Program675947 -Ref: Wc Program-Footnote-1680213 -Ref: Wc Program-Footnote-2680413 -Node: Miscellaneous Programs680505 -Node: Dupword Program681693 -Node: Alarm Program683724 -Node: Translate Program688531 -Ref: Translate Program-Footnote-1692918 -Ref: Translate Program-Footnote-2693166 -Node: Labels Program693300 -Ref: Labels Program-Footnote-1696671 -Node: Word Sorting696755 -Node: History Sorting700639 -Node: Extract Program702478 -Ref: Extract Program-Footnote-1709981 -Node: Simple Sed710109 -Node: Igawk Program713171 -Ref: Igawk Program-Footnote-1728342 -Ref: Igawk Program-Footnote-2728543 -Node: Anagram Program728681 -Node: Signature Program731749 -Node: Advanced Features732849 -Node: Nondecimal Data734735 -Node: Array Sorting736318 -Node: Controlling Array Traversal737015 -Node: Array Sorting Functions745299 -Ref: Array Sorting Functions-Footnote-1749168 -Node: Two-way I/O749362 -Ref: Two-way I/O-Footnote-1754794 -Node: TCP/IP Networking754876 -Node: Profiling757720 -Node: Internationalization765223 -Node: I18N and L10N766648 -Node: Explaining gettext767334 -Ref: Explaining gettext-Footnote-1772402 -Ref: Explaining gettext-Footnote-2772586 -Node: Programmer i18n772751 -Node: Translator i18n776978 -Node: String Extraction777772 -Ref: String Extraction-Footnote-1778733 -Node: Printf Ordering778819 -Ref: Printf Ordering-Footnote-1781601 -Node: I18N Portability781665 -Ref: I18N Portability-Footnote-1784114 -Node: I18N Example784177 -Ref: I18N Example-Footnote-1786815 -Node: Gawk I18N786887 -Node: Debugger787508 -Node: Debugging788479 -Node: Debugging Concepts788912 -Node: Debugging Terms790768 -Node: Awk Debugging793365 -Node: Sample Debugging Session794257 -Node: Debugger Invocation794777 -Node: Finding The Bug796110 -Node: List of Debugger Commands802597 -Node: Breakpoint Control803931 -Node: Debugger Execution Control807595 -Node: Viewing And Changing Data810955 -Node: Execution Stack814311 -Node: Debugger Info815778 -Node: Miscellaneous Debugger Commands819772 -Node: Readline Support824950 -Node: Limitations825781 -Node: Arbitrary Precision Arithmetic828033 -Ref: Arbitrary Precision Arithmetic-Footnote-1829682 -Node: General Arithmetic829830 -Node: Floating Point Issues831550 -Node: String Conversion Precision832431 -Ref: String Conversion Precision-Footnote-1834136 -Node: Unexpected Results834245 -Node: POSIX Floating Point Problems836398 -Ref: POSIX Floating Point Problems-Footnote-1840223 -Node: Integer Programming840261 -Node: Floating-point Programming842000 -Ref: Floating-point Programming-Footnote-1848331 -Ref: Floating-point Programming-Footnote-2848601 -Node: Floating-point Representation848865 -Node: Floating-point Context850030 -Ref: table-ieee-formats850869 -Node: Rounding Mode852253 -Ref: table-rounding-modes852732 -Ref: Rounding Mode-Footnote-1855747 -Node: Gawk and MPFR855926 -Node: Arbitrary Precision Floats857335 -Ref: Arbitrary Precision Floats-Footnote-1859778 -Node: Setting Precision860094 -Ref: table-predefined-precision-strings860780 -Node: Setting Rounding Mode862925 -Ref: table-gawk-rounding-modes863329 -Node: Floating-point Constants864516 -Node: Changing Precision865945 -Ref: Changing Precision-Footnote-1867342 -Node: Exact Arithmetic867516 -Node: Arbitrary Precision Integers870654 -Ref: Arbitrary Precision Integers-Footnote-1873669 -Node: Dynamic Extensions873816 -Node: Extension Intro875274 -Node: Plugin License876539 -Node: Extension Mechanism Outline877224 -Ref: load-extension877641 -Ref: load-new-function879119 -Ref: call-new-function880114 -Node: Extension API Description882129 -Node: Extension API Functions Introduction883416 -Node: General Data Types888343 -Ref: General Data Types-Footnote-1894038 -Node: Requesting Values894337 -Ref: table-value-types-returned895074 -Node: Memory Allocation Functions896028 -Ref: Memory Allocation Functions-Footnote-1898774 -Node: Constructor Functions898870 -Node: Registration Functions900628 -Node: Extension Functions901313 -Node: Exit Callback Functions903615 -Node: Extension Version String904864 -Node: Input Parsers905514 -Node: Output Wrappers915271 -Node: Two-way processors919781 -Node: Printing Messages921989 -Ref: Printing Messages-Footnote-1923066 -Node: Updating `ERRNO'923218 -Node: Accessing Parameters923957 -Node: Symbol Table Access925187 -Node: Symbol table by name925701 -Node: Symbol table by cookie927677 -Ref: Symbol table by cookie-Footnote-1931809 -Node: Cached values931872 -Ref: Cached values-Footnote-1935362 -Node: Array Manipulation935453 -Ref: Array Manipulation-Footnote-1936551 -Node: Array Data Types936590 -Ref: Array Data Types-Footnote-1939293 -Node: Array Functions939385 -Node: Flattening Arrays943221 -Node: Creating Arrays950073 -Node: Extension API Variables954798 -Node: Extension Versioning955434 -Node: Extension API Informational Variables957335 -Node: Extension API Boilerplate958421 -Node: Finding Extensions962225 -Node: Extension Example962785 -Node: Internal File Description963515 -Node: Internal File Ops967606 -Ref: Internal File Ops-Footnote-1979115 -Node: Using Internal File Ops979255 -Ref: Using Internal File Ops-Footnote-1981602 -Node: Extension Samples981868 -Node: Extension Sample File Functions983392 -Node: Extension Sample Fnmatch991879 -Node: Extension Sample Fork993648 -Node: Extension Sample Inplace994861 -Node: Extension Sample Ord996639 -Node: Extension Sample Readdir997475 -Node: Extension Sample Revout999007 -Node: Extension Sample Rev2way999600 -Node: Extension Sample Read write array1000290 -Node: Extension Sample Readfile1002173 -Node: Extension Sample API Tests1003273 -Node: Extension Sample Time1003798 -Node: gawkextlib1005162 -Node: Language History1007943 -Node: V7/SVR3.11009536 -Node: SVR41011856 -Node: POSIX1013298 -Node: BTL1014684 -Node: POSIX/GNU1015418 -Node: Feature History1021017 -Node: Common Extensions1033993 -Node: Ranges and Locales1035305 -Ref: Ranges and Locales-Footnote-11039922 -Ref: Ranges and Locales-Footnote-21039949 -Ref: Ranges and Locales-Footnote-31040183 -Node: Contributors1040404 -Node: Installation1045785 -Node: Gawk Distribution1046679 -Node: Getting1047163 -Node: Extracting1047989 -Node: Distribution contents1049681 -Node: Unix Installation1055402 -Node: Quick Installation1056019 -Node: Additional Configuration Options1058465 -Node: Configuration Philosophy1060201 -Node: Non-Unix Installation1062555 -Node: PC Installation1063013 -Node: PC Binary Installation1064324 -Node: PC Compiling1066172 -Node: PC Testing1069132 -Node: PC Using1070308 -Node: Cygwin1074476 -Node: MSYS1075285 -Node: VMS Installation1075799 -Node: VMS Compilation1076595 -Ref: VMS Compilation-Footnote-11077847 -Node: VMS Dynamic Extensions1077905 -Node: VMS Installation Details1079278 -Node: VMS Running1081529 -Node: VMS GNV1084363 -Node: VMS Old Gawk1085086 -Node: Bugs1085556 -Node: Other Versions1089474 -Node: Notes1095558 -Node: Compatibility Mode1096358 -Node: Additions1097141 -Node: Accessing The Source1098068 -Node: Adding Code1099508 -Node: New Ports1105553 -Node: Derived Files1109688 -Ref: Derived Files-Footnote-11115009 -Ref: Derived Files-Footnote-21115043 -Ref: Derived Files-Footnote-31115643 -Node: Future Extensions1115741 -Node: Implementation Limitations1116324 -Node: Extension Design1117572 -Node: Old Extension Problems1118726 -Ref: Old Extension Problems-Footnote-11120234 -Node: Extension New Mechanism Goals1120291 -Ref: Extension New Mechanism Goals-Footnote-11123656 -Node: Extension Other Design Decisions1123842 -Node: Extension Future Growth1125948 -Node: Old Extension Mechanism1126784 -Node: Basic Concepts1128524 -Node: Basic High Level1129205 -Ref: figure-general-flow1129477 -Ref: figure-process-flow1130076 -Ref: Basic High Level-Footnote-11133305 -Node: Basic Data Typing1133490 -Node: Glossary1136845 -Node: Copying1162076 -Node: GNU Free Documentation License1199632 -Node: Index1224768 +Node: Options110877 +Ref: Options-Footnote-1126689 +Node: Other Arguments126714 +Node: Naming Standard Input129376 +Node: Environment Variables130470 +Node: AWKPATH Variable131028 +Ref: AWKPATH Variable-Footnote-1133806 +Ref: AWKPATH Variable-Footnote-2133851 +Node: AWKLIBPATH Variable134111 +Node: Other Environment Variables134870 +Node: Exit Status138525 +Node: Include Files139200 +Node: Loading Shared Libraries142778 +Node: Obsolete144161 +Node: Undocumented144858 +Node: Regexp145100 +Node: Regexp Usage146489 +Node: Escape Sequences148522 +Node: Regexp Operators154189 +Ref: Regexp Operators-Footnote-1161669 +Ref: Regexp Operators-Footnote-2161816 +Node: Bracket Expressions161914 +Ref: table-char-classes163804 +Node: GNU Regexp Operators166327 +Node: Case-sensitivity170050 +Ref: Case-sensitivity-Footnote-1172942 +Ref: Case-sensitivity-Footnote-2173177 +Node: Leftmost Longest173285 +Node: Computed Regexps174486 +Node: Reading Files177835 +Node: Records179837 +Node: awk split records180572 +Node: gawk split records185430 +Ref: gawk split records-Footnote-1189951 +Node: Fields189988 +Ref: Fields-Footnote-1192952 +Node: Nonconstant Fields193038 +Ref: Nonconstant Fields-Footnote-1195268 +Node: Changing Fields195470 +Node: Field Separators201424 +Node: Default Field Splitting204126 +Node: Regexp Field Splitting205243 +Node: Single Character Fields208584 +Node: Command Line Field Separator209643 +Node: Full Line Fields212985 +Ref: Full Line Fields-Footnote-1213493 +Node: Field Splitting Summary213539 +Ref: Field Splitting Summary-Footnote-1216638 +Node: Constant Size216739 +Node: Splitting By Content221346 +Ref: Splitting By Content-Footnote-1225096 +Node: Multiple Line225136 +Ref: Multiple Line-Footnote-1230992 +Node: Getline231171 +Node: Plain Getline233387 +Node: Getline/Variable235482 +Node: Getline/File236629 +Node: Getline/Variable/File238013 +Ref: Getline/Variable/File-Footnote-1239612 +Node: Getline/Pipe239699 +Node: Getline/Variable/Pipe242398 +Node: Getline/Coprocess243505 +Node: Getline/Variable/Coprocess244757 +Node: Getline Notes245494 +Node: Getline Summary248298 +Ref: table-getline-variants248706 +Node: Read Timeout249618 +Ref: Read Timeout-Footnote-1253445 +Node: Command line directories253503 +Node: Printing254385 +Node: Print256016 +Node: Print Examples257357 +Node: Output Separators260136 +Node: OFMT262152 +Node: Printf263510 +Node: Basic Printf264416 +Node: Control Letters265955 +Node: Format Modifiers269809 +Node: Printf Examples275836 +Node: Redirection278543 +Node: Special Files285515 +Node: Special FD286048 +Ref: Special FD-Footnote-1289672 +Node: Special Network289746 +Node: Special Caveats290596 +Node: Close Files And Pipes291392 +Ref: Close Files And Pipes-Footnote-1298530 +Ref: Close Files And Pipes-Footnote-2298678 +Node: Expressions298828 +Node: Values299960 +Node: Constants300636 +Node: Scalar Constants301316 +Ref: Scalar Constants-Footnote-1302175 +Node: Nondecimal-numbers302425 +Node: Regexp Constants305425 +Node: Using Constant Regexps305900 +Node: Variables308970 +Node: Using Variables309625 +Node: Assignment Options311349 +Node: Conversion313224 +Ref: table-locale-affects318660 +Ref: Conversion-Footnote-1319284 +Node: All Operators319393 +Node: Arithmetic Ops320023 +Node: Concatenation322528 +Ref: Concatenation-Footnote-1325324 +Node: Assignment Ops325444 +Ref: table-assign-ops330427 +Node: Increment Ops331744 +Node: Truth Values and Conditions335182 +Node: Truth Values336265 +Node: Typing and Comparison337314 +Node: Variable Typing338107 +Ref: Variable Typing-Footnote-1342007 +Node: Comparison Operators342129 +Ref: table-relational-ops342539 +Node: POSIX String Comparison346087 +Ref: POSIX String Comparison-Footnote-1347171 +Node: Boolean Ops347309 +Ref: Boolean Ops-Footnote-1351379 +Node: Conditional Exp351470 +Node: Function Calls353197 +Node: Precedence356955 +Node: Locales360624 +Node: Patterns and Actions362227 +Node: Pattern Overview363281 +Node: Regexp Patterns364958 +Node: Expression Patterns365501 +Node: Ranges369282 +Node: BEGIN/END372388 +Node: Using BEGIN/END373150 +Ref: Using BEGIN/END-Footnote-1375886 +Node: I/O And BEGIN/END375992 +Node: BEGINFILE/ENDFILE378277 +Node: Empty381213 +Node: Using Shell Variables381530 +Node: Action Overview383813 +Node: Statements386164 +Node: If Statement388018 +Node: While Statement389521 +Node: Do Statement391565 +Node: For Statement392721 +Node: Switch Statement395873 +Node: Break Statement397976 +Node: Continue Statement400031 +Node: Next Statement401824 +Node: Nextfile Statement404214 +Node: Exit Statement406869 +Node: Built-in Variables409273 +Node: User-modified410369 +Ref: User-modified-Footnote-1418054 +Node: Auto-set418116 +Ref: Auto-set-Footnote-1431018 +Ref: Auto-set-Footnote-2431223 +Node: ARGC and ARGV431279 +Node: Arrays435133 +Node: Array Basics436631 +Node: Array Intro437457 +Ref: figure-array-elements439430 +Node: Reference to Elements441837 +Node: Assigning Elements444110 +Node: Array Example444601 +Node: Scanning an Array446333 +Node: Controlling Scanning449348 +Ref: Controlling Scanning-Footnote-1454521 +Node: Delete454837 +Ref: Delete-Footnote-1457602 +Node: Numeric Array Subscripts457659 +Node: Uninitialized Subscripts459842 +Node: Multidimensional461467 +Node: Multiscanning464560 +Node: Arrays of Arrays466149 +Node: Functions470789 +Node: Built-in471608 +Node: Calling Built-in472686 +Node: Numeric Functions474674 +Ref: Numeric Functions-Footnote-1478508 +Ref: Numeric Functions-Footnote-2478865 +Ref: Numeric Functions-Footnote-3478913 +Node: String Functions479182 +Ref: String Functions-Footnote-1502193 +Ref: String Functions-Footnote-2502322 +Ref: String Functions-Footnote-3502570 +Node: Gory Details502657 +Ref: table-sub-escapes504326 +Ref: table-sub-posix-92505680 +Ref: table-sub-proposed507031 +Ref: table-posix-sub508385 +Ref: table-gensub-escapes509930 +Ref: Gory Details-Footnote-1511106 +Ref: Gory Details-Footnote-2511157 +Node: I/O Functions511308 +Ref: I/O Functions-Footnote-1518431 +Node: Time Functions518578 +Ref: Time Functions-Footnote-1529042 +Ref: Time Functions-Footnote-2529110 +Ref: Time Functions-Footnote-3529268 +Ref: Time Functions-Footnote-4529379 +Ref: Time Functions-Footnote-5529491 +Ref: Time Functions-Footnote-6529718 +Node: Bitwise Functions529984 +Ref: table-bitwise-ops530546 +Ref: Bitwise Functions-Footnote-1534791 +Node: Type Functions534975 +Node: I18N Functions536117 +Node: User-defined537762 +Node: Definition Syntax538566 +Ref: Definition Syntax-Footnote-1543491 +Node: Function Example543560 +Ref: Function Example-Footnote-1546204 +Node: Function Caveats546226 +Node: Calling A Function546744 +Node: Variable Scope547699 +Node: Pass By Value/Reference550687 +Node: Return Statement554195 +Node: Dynamic Typing557179 +Node: Indirect Calls558108 +Node: Library Functions567795 +Ref: Library Functions-Footnote-1571308 +Ref: Library Functions-Footnote-2571451 +Node: Library Names571622 +Ref: Library Names-Footnote-1575095 +Ref: Library Names-Footnote-2575315 +Node: General Functions575401 +Node: Strtonum Function576429 +Node: Assert Function579359 +Node: Round Function582685 +Node: Cliff Random Function584226 +Node: Ordinal Functions585242 +Ref: Ordinal Functions-Footnote-1588319 +Ref: Ordinal Functions-Footnote-2588571 +Node: Join Function588782 +Ref: Join Function-Footnote-1590553 +Node: Getlocaltime Function590753 +Node: Readfile Function594489 +Node: Data File Management596328 +Node: Filetrans Function596960 +Node: Rewind Function601029 +Node: File Checking602416 +Ref: File Checking-Footnote-1603548 +Node: Empty Files603749 +Node: Ignoring Assigns605979 +Node: Getopt Function607533 +Ref: Getopt Function-Footnote-1618836 +Node: Passwd Functions619039 +Ref: Passwd Functions-Footnote-1628018 +Node: Group Functions628106 +Ref: Group Functions-Footnote-1636048 +Node: Walking Arrays636261 +Node: Sample Programs638397 +Node: Running Examples639071 +Node: Clones639799 +Node: Cut Program641023 +Node: Egrep Program650876 +Ref: Egrep Program-Footnote-1658847 +Node: Id Program658957 +Node: Split Program662621 +Ref: Split Program-Footnote-1666159 +Node: Tee Program666287 +Node: Uniq Program669094 +Node: Wc Program676524 +Ref: Wc Program-Footnote-1680792 +Ref: Wc Program-Footnote-2680992 +Node: Miscellaneous Programs681084 +Node: Dupword Program682272 +Node: Alarm Program684303 +Node: Translate Program689110 +Ref: Translate Program-Footnote-1693501 +Ref: Translate Program-Footnote-2693771 +Node: Labels Program693905 +Ref: Labels Program-Footnote-1697276 +Node: Word Sorting697360 +Node: History Sorting701403 +Node: Extract Program703239 +Ref: Extract Program-Footnote-1710769 +Node: Simple Sed710898 +Node: Igawk Program713960 +Ref: Igawk Program-Footnote-1729135 +Ref: Igawk Program-Footnote-2729336 +Node: Anagram Program729474 +Node: Signature Program732542 +Node: Advanced Features733789 +Node: Nondecimal Data735675 +Node: Array Sorting737258 +Node: Controlling Array Traversal737955 +Node: Array Sorting Functions746239 +Ref: Array Sorting Functions-Footnote-1750108 +Node: Two-way I/O750302 +Ref: Two-way I/O-Footnote-1755734 +Node: TCP/IP Networking755816 +Node: Profiling758660 +Node: Internationalization766163 +Node: I18N and L10N767588 +Node: Explaining gettext768274 +Ref: Explaining gettext-Footnote-1773342 +Ref: Explaining gettext-Footnote-2773526 +Node: Programmer i18n773691 +Node: Translator i18n777918 +Node: String Extraction778712 +Ref: String Extraction-Footnote-1779673 +Node: Printf Ordering779759 +Ref: Printf Ordering-Footnote-1782541 +Node: I18N Portability782605 +Ref: I18N Portability-Footnote-1785054 +Node: I18N Example785117 +Ref: I18N Example-Footnote-1787755 +Node: Gawk I18N787827 +Node: Debugger788448 +Node: Debugging789419 +Node: Debugging Concepts789852 +Node: Debugging Terms791708 +Node: Awk Debugging794305 +Node: Sample Debugging Session795197 +Node: Debugger Invocation795717 +Node: Finding The Bug797050 +Node: List of Debugger Commands803537 +Node: Breakpoint Control804871 +Node: Debugger Execution Control808535 +Node: Viewing And Changing Data811895 +Node: Execution Stack815251 +Node: Debugger Info816718 +Node: Miscellaneous Debugger Commands820712 +Node: Readline Support825896 +Node: Limitations826727 +Node: Arbitrary Precision Arithmetic828979 +Ref: Arbitrary Precision Arithmetic-Footnote-1830628 +Node: General Arithmetic830776 +Node: Floating Point Issues832496 +Node: String Conversion Precision833377 +Ref: String Conversion Precision-Footnote-1835082 +Node: Unexpected Results835191 +Node: POSIX Floating Point Problems837344 +Ref: POSIX Floating Point Problems-Footnote-1841169 +Node: Integer Programming841207 +Node: Floating-point Programming842946 +Ref: Floating-point Programming-Footnote-1849277 +Ref: Floating-point Programming-Footnote-2849547 +Node: Floating-point Representation849811 +Node: Floating-point Context850976 +Ref: table-ieee-formats851815 +Node: Rounding Mode853199 +Ref: table-rounding-modes853678 +Ref: Rounding Mode-Footnote-1856693 +Node: Gawk and MPFR856872 +Node: Arbitrary Precision Floats858281 +Ref: Arbitrary Precision Floats-Footnote-1860724 +Node: Setting Precision861040 +Ref: table-predefined-precision-strings861726 +Node: Setting Rounding Mode863871 +Ref: table-gawk-rounding-modes864275 +Node: Floating-point Constants865462 +Node: Changing Precision866891 +Ref: Changing Precision-Footnote-1868288 +Node: Exact Arithmetic868462 +Node: Arbitrary Precision Integers871600 +Ref: Arbitrary Precision Integers-Footnote-1874615 +Node: Dynamic Extensions874762 +Node: Extension Intro876220 +Node: Plugin License877485 +Node: Extension Mechanism Outline878170 +Ref: load-extension878587 +Ref: load-new-function880065 +Ref: call-new-function881060 +Node: Extension API Description883075 +Node: Extension API Functions Introduction884362 +Node: General Data Types889289 +Ref: General Data Types-Footnote-1894984 +Node: Requesting Values895283 +Ref: table-value-types-returned896020 +Node: Memory Allocation Functions896974 +Ref: Memory Allocation Functions-Footnote-1899720 +Node: Constructor Functions899816 +Node: Registration Functions901574 +Node: Extension Functions902259 +Node: Exit Callback Functions904561 +Node: Extension Version String905810 +Node: Input Parsers906460 +Node: Output Wrappers916217 +Node: Two-way processors920727 +Node: Printing Messages922935 +Ref: Printing Messages-Footnote-1924012 +Node: Updating `ERRNO'924164 +Node: Accessing Parameters924903 +Node: Symbol Table Access926133 +Node: Symbol table by name926647 +Node: Symbol table by cookie928623 +Ref: Symbol table by cookie-Footnote-1932755 +Node: Cached values932818 +Ref: Cached values-Footnote-1936308 +Node: Array Manipulation936399 +Ref: Array Manipulation-Footnote-1937497 +Node: Array Data Types937536 +Ref: Array Data Types-Footnote-1940239 +Node: Array Functions940331 +Node: Flattening Arrays944167 +Node: Creating Arrays951019 +Node: Extension API Variables955744 +Node: Extension Versioning956380 +Node: Extension API Informational Variables958281 +Node: Extension API Boilerplate959367 +Node: Finding Extensions963171 +Node: Extension Example963731 +Node: Internal File Description964461 +Node: Internal File Ops968552 +Ref: Internal File Ops-Footnote-1980061 +Node: Using Internal File Ops980201 +Ref: Using Internal File Ops-Footnote-1982548 +Node: Extension Samples982814 +Node: Extension Sample File Functions984338 +Node: Extension Sample Fnmatch992825 +Node: Extension Sample Fork994594 +Node: Extension Sample Inplace995807 +Node: Extension Sample Ord997585 +Node: Extension Sample Readdir998421 +Node: Extension Sample Revout999953 +Node: Extension Sample Rev2way1000546 +Node: Extension Sample Read write array1001236 +Node: Extension Sample Readfile1003119 +Node: Extension Sample API Tests1004219 +Node: Extension Sample Time1004744 +Node: gawkextlib1006108 +Node: Language History1008889 +Node: V7/SVR3.11010482 +Node: SVR41012802 +Node: POSIX1014244 +Node: BTL1015630 +Node: POSIX/GNU1016364 +Node: Feature History1021963 +Node: Common Extensions1034939 +Node: Ranges and Locales1036251 +Ref: Ranges and Locales-Footnote-11040868 +Ref: Ranges and Locales-Footnote-21040895 +Ref: Ranges and Locales-Footnote-31041129 +Node: Contributors1041350 +Node: Installation1046731 +Node: Gawk Distribution1047625 +Node: Getting1048109 +Node: Extracting1048935 +Node: Distribution contents1050627 +Node: Unix Installation1056348 +Node: Quick Installation1056965 +Node: Additional Configuration Options1059411 +Node: Configuration Philosophy1061147 +Node: Non-Unix Installation1063501 +Node: PC Installation1063959 +Node: PC Binary Installation1065270 +Node: PC Compiling1067118 +Node: PC Testing1070078 +Node: PC Using1071254 +Node: Cygwin1075422 +Node: MSYS1076231 +Node: VMS Installation1076745 +Node: VMS Compilation1077541 +Ref: VMS Compilation-Footnote-11078793 +Node: VMS Dynamic Extensions1078851 +Node: VMS Installation Details1080224 +Node: VMS Running1082475 +Node: VMS GNV1085309 +Node: VMS Old Gawk1086032 +Node: Bugs1086502 +Node: Other Versions1090420 +Node: Notes1096504 +Node: Compatibility Mode1097304 +Node: Additions1098087 +Node: Accessing The Source1099014 +Node: Adding Code1100454 +Node: New Ports1106499 +Node: Derived Files1110634 +Ref: Derived Files-Footnote-11115955 +Ref: Derived Files-Footnote-21115989 +Ref: Derived Files-Footnote-31116589 +Node: Future Extensions1116687 +Node: Implementation Limitations1117270 +Node: Extension Design1118518 +Node: Old Extension Problems1119672 +Ref: Old Extension Problems-Footnote-11121180 +Node: Extension New Mechanism Goals1121237 +Ref: Extension New Mechanism Goals-Footnote-11124602 +Node: Extension Other Design Decisions1124788 +Node: Extension Future Growth1126894 +Node: Old Extension Mechanism1127730 +Node: Basic Concepts1129470 +Node: Basic High Level1130151 +Ref: figure-general-flow1130423 +Ref: figure-process-flow1131022 +Ref: Basic High Level-Footnote-11134251 +Node: Basic Data Typing1134436 +Node: Glossary1137791 +Node: Copying1163022 +Node: GNU Free Documentation License1200578 +Node: Index1225714 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index 9422f43b..08aa5ddd 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -58,6 +58,7 @@ @set SUBSECTION subsection @set DARKCORNER @inmargin{@image{lflashlight,1cm}, @image{rflashlight,1cm}} @set COMMONEXT (c.e.) +@set PAGE page @end iftex @ifinfo @set DOCUMENT Info file @@ -67,6 +68,7 @@ @set SUBSECTION node @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE screen @end ifinfo @ifhtml @set DOCUMENT Web page @@ -76,6 +78,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE screen @end ifhtml @ifdocbook @set DOCUMENT book @@ -85,6 +88,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE page @end ifdocbook @ifxml @set DOCUMENT book @@ -94,6 +98,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE page @end ifxml @ifplaintext @set DOCUMENT book @@ -103,6 +108,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE page @end ifplaintext @ifdocbook @@ -3388,19 +3394,10 @@ There are two ways to run @command{awk}---with an explicit program or with one or more program files. Here are templates for both of them; items enclosed in [@dots{}] in these templates are optional: -@ifnotdocbook -@example -awk @r{[@var{options}]} -f progfile @r{[@code{--}]} @var{file} @dots{} -awk @r{[@var{options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{} -@end example -@end ifnotdocbook - -@c FIXME - find a better way to mark this up in docbook -@docbook -<screen>awk [<replaceable>options</replaceable>] -f progfile [<literal>--</literal>] <replaceable>file</replaceable> … -awk [<replaceable>options</replaceable>] [<literal>--</literal>] '<replaceable>program</replaceable>' <replaceable>file</replaceable> … -</screen> -@end docbook +@display +@command{awk} [@var{options}] @option{-f} @var{progfile} [@option{--}] @var{file} @dots{} +@command{awk} [@var{options}] [@option{--}] @code{'@var{program}'} @var{file} @dots{} +@end display @cindex GNU long options @cindex long options @@ -12948,13 +12945,13 @@ both) may be omitted. The purpose of the @dfn{action} is to tell @command{awk} what to do once a match for the pattern is found. Thus, in outline, an @command{awk} program generally looks like this: -@example -@r{[}@var{pattern}@r{]} @{ @var{action} @} - @var{pattern} @r{[}@{ @var{action} @}@r{]} +@display +[@var{pattern}] @code{@{ @var{action} @}} + @var{pattern} [@code{@{ @var{action} @}}] @dots{} -function @var{name}(@var{args}) @{ @dots{} @} +@code{function @var{name}(@var{args}) @{ @dots{} @}} @dots{} -@end example +@end display @cindex @code{@{@}} (braces), actions and @cindex braces (@code{@{@}}), actions and @@ -13069,9 +13066,9 @@ newlines or semicolons. The @code{if}-@code{else} statement is @command{awk}'s decision-making statement. It looks like this: -@example -if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]} -@end example +@display +@code{if (@var{condition}) @var{then-body}} [@code{else @var{else-body}}] +@end display @noindent The @var{condition} is an expression that controls what the rest of the @@ -13669,9 +13666,9 @@ The @code{exit} statement causes @command{awk} to immediately stop executing the current rule and to stop processing input; any remaining input is ignored. The @code{exit} statement is written as follows: -@example -exit @r{[}@var{return code}@r{]} -@end example +@display +@code{exit} [@var{return code}] +@end display @cindex @code{BEGIN} pattern, @code{exit} statement and @cindex @code{END} pattern, @code{exit} statement and @@ -18518,12 +18515,12 @@ entire program before starting to execute any of it. The definition of a function named @var{name} looks like this: -@example -function @var{name}(@r{[}@var{parameter-list}@r{]}) -@{ +@display +@code{function} @var{name}@code{(}[@var{parameter-list}]@code{)} +@code{@{} @var{body-of-function} -@} -@end example +@code{@}} +@end display @cindex names, functions @cindex functions, names of @@ -19045,9 +19042,9 @@ This statement returns control to the calling part of the @command{awk} program. can also be used to return a value for use in the rest of the @command{awk} program. It looks like this: -@example -return @r{[}@var{expression}@r{]} -@end example +@display +@code{return} [@var{expression}] +@end display The @var{expression} part is optional. Due most likely to an oversight, POSIX does not define what the return @@ -20356,7 +20353,7 @@ function getlocaltime(time, ret, now, i) now = systime() # return date(1)-style output - ret = strftime("%a %b %e %H:%M:%S %Z %Y", now) + ret = strftime(PROCINFO["strftime"], now) # clear out target array delete time @@ -20712,10 +20709,12 @@ The @code{rewind()} function also relies on the @code{nextfile} keyword @cindex readable data files@comma{} checking @cindex files, skipping Normally, if you give @command{awk} a data file that isn't readable, -it stops with a fatal error. There are times when you -might want to just ignore such files and keep going. You can -do this by prepending the following program to your @command{awk} -program: +it stops with a fatal error. There are times when you might want to +just ignore such files and keep going.@footnote{The @code{BEGINFILE} +special pattern (@pxref{BEGINFILE/ENDFILE}) provides an alternative +mechanism for dealing with files that can't be opened. However, the +code here provides a portable solution.} You can do this by prepending +the following program to your @command{awk} program: @cindex @code{readable.awk} program @example @@ -20753,7 +20752,7 @@ skips the file (since it's no longer in the list). See also @ref{ARGC and ARGV}. @node Empty Files -@subsection Checking For Zero-length Files +@subsection Checking for Zero-length Files All known @command{awk} implementations silently skip over zero-length files. This is a by-product of @command{awk}'s implicit @@ -21226,7 +21225,7 @@ BEGIN @{ # test program if (_getopt_test) @{ while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, optarg = <%s>\n", + printf("c = <%c>, Optarg = <%s>\n", _go_c, Optarg) printf("non-option arguments:\n") for (; Optind < ARGC; Optind++) @@ -21242,32 +21241,31 @@ result of two sample runs of the test program: @example $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x} -@print{} c = <a>, optarg = <> -@print{} c = <c>, optarg = <> -@print{} c = <b>, optarg = <ARG> +@print{} c = <a>, Optarg = <> +@print{} c = <c>, Optarg = <> +@print{} c = <b>, Optarg = <ARG> @print{} non-option arguments: @print{} ARGV[3] = <bax> @print{} ARGV[4] = <-x> $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc} -@print{} c = <a>, optarg = <> +@print{} c = <a>, Optarg = <> @error{} x -- invalid option -@print{} c = <?>, optarg = <> +@print{} c = <?>, Optarg = <> @print{} non-option arguments: @print{} ARGV[4] = <xyz> @print{} ARGV[5] = <abc> @end example -In both runs, -the first @option{--} terminates the arguments to @command{awk}, so that it does -not try to interpret the @option{-a}, etc., as its own options. +In both runs, the first @option{--} terminates the arguments to +@command{awk}, so that it does not try to interpret the @option{-a}, +etc., as its own options. @quotation NOTE -After @code{getopt()} is through, it is the responsibility of the user level -code to -clear out all the elements of @code{ARGV} from 1 to @code{Optind}, -so that @command{awk} does not try to process the command-line options -as file names. +After @code{getopt()} is through, it is the responsibility of the +user level code to clear out all the elements of @code{ARGV} from 1 +to @code{Optind}, so that @command{awk} does not try to process the +command-line options as file names. @end quotation Several of the sample programs presented in @@ -21336,7 +21334,7 @@ Following is @command{pwcat}, a C program that ``cats'' the password database: /* * pwcat.c * - * Generate a printable version of the password database + * Generate a printable version of the password database. */ @c endfile @ignore @@ -21682,7 +21680,7 @@ is as follows: /* * grcat.c * - * Generate a printable version of the group database + * Generate a printable version of the group database. */ @c endfile @ignore @@ -21769,7 +21767,7 @@ it is usually empty or set to @samp{*}. @item Group ID Number The group's numeric group ID number; -this number must be unique within the file. +the association of name to number must be unique within the file. (On some systems it's a C @code{long}, and not an @code{int}. Thus we cast it to @code{long} for all cases.) @@ -21905,10 +21903,10 @@ tvpeople:*:101:david,conan,tom,joan For this reason, @code{_gr_init()} looks to see if a group name or group ID number is already seen. If it is, then the user names are -simply concatenated onto the previous list of users. (There is actually a +simply concatenated onto the previous list of users.@footnote{There is actually a subtle problem with the code just presented. Suppose that the first time there were no names. This code adds the names with -a leading comma. It also doesn't check that there is a @code{$4}.) +a leading comma. It also doesn't check that there is a @code{$4}.} Finally, @code{_gr_init()} closes the pipeline to @command{grcat}, restores @code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0}, @@ -22278,13 +22276,7 @@ function usage( e1, e2) @noindent The variables @code{e1} and @code{e2} are used so that the function -fits nicely on the -@ifnotinfo -page. -@end ifnotinfo -@ifnottex -screen. -@end ifnottex +fits nicely on the @value{PAGE}. @cindex @code{BEGIN} pattern, running @command{awk} programs and @cindex @code{FS} variable, running @command{awk} programs and @@ -22323,7 +22315,7 @@ BEGIN \ if (FS == " ") # defeat awk semantics FS = "[ ]" @} else if (c == "s") - suppress++ + suppress = 1 else usage() @} @@ -22536,9 +22528,9 @@ expressions that are almost identical to those available in @command{awk} (@pxref{Regexp}). You invoke it as follows: -@example -egrep @r{[} @var{options} @r{]} '@var{pattern}' @var{files} @dots{} -@end example +@display +@command{egrep} [@var{options}] @code{'@var{pattern}'} @var{files} @dots{} +@end display The @var{pattern} is a regular expression. In typical usage, the regular expression is quoted to prevent the shell from expanding any of the @@ -22720,6 +22712,11 @@ function endfile(file) @c endfile @end example +The @code{BEGINFILE} and @code{ENDFILE} special patterns +(@pxref{BEGINFILE/ENDFILE}) could be used, but then the program would be +@command{gawk}-specific. Additionally, this example was written before +@command{gawk} acquired @code{BEGINFILE} and @code{ENDFILE}. + The following rule does most of the work of matching lines. The variable @code{matches} is true if the line matched the pattern. If the user wants lines that did not match, the sense of @code{matches} is inverted @@ -22776,9 +22773,7 @@ there are no matches, the exit status is one; otherwise it is zero: @c file eg/prog/egrep.awk END \ @{ - if (total == 0) - exit 1 - exit 0 + exit (total == 0) @} @c endfile @end example @@ -22832,7 +22827,7 @@ corresponding user and group names. The output might look like this: @example $ @kbd{id} -@print{} uid=500(arnold) gid=500(arnold) groups=6(disk),7(lp),19(floppy) +@print{} uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo) @end example @cindex @code{PROCINFO} array, and user and group ID numbers @@ -22868,6 +22863,7 @@ numbers: # Arnold Robbins, arnold@@skeeve.com, Public Domain # May 1993 # Revised February 1996 +# Revised May 2014 @c endfile @end ignore @@ -22887,34 +22883,26 @@ BEGIN \ printf("uid=%d", uid) pw = getpwuid(uid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) if (euid != uid) @{ printf(" euid=%d", euid) pw = getpwuid(euid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) @} printf(" gid=%d", gid) pw = getgrgid(gid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) if (egid != gid) @{ printf(" egid=%d", egid) pw = getgrgid(egid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) @} for (i = 1; ("group" i) in PROCINFO; i++) @{ @@ -22923,16 +22911,20 @@ BEGIN \ group = PROCINFO["group" i] printf("%d", group) pw = getgrgid(group) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) if (("group" (i+1)) in PROCINFO) printf(",") @} print "" @} + +function pr_first_field(str, a) +@{ + split(str, a, ":") + printf("(%s)", a[1]) +@} @c endfile @end example @@ -22952,9 +22944,13 @@ The loop is also correct if there are @emph{no} supplementary groups; then the condition is false the first time it's tested, and the loop body never executes. +The @code{pr_first_field()} function simply isolates out some +code that is used repeatedly, making the whole program +slightly shorter and cleaner. + @c exercise!!! @ignore -The POSIX version of @command{id} takes arguments that control which +The POSIX version of @command{id} takes options that control which information is printed. Modify this version to accept the same arguments and perform in the same way. @end ignore @@ -22974,9 +22970,9 @@ Usage is as follows:@footnote{This is the traditional usage. The POSIX usage is different, but not relevant for what the program aims to demonstrate.} -@example -split @r{[}-@var{count}@r{]} file @r{[} @var{prefix} @r{]} -@end example +@display +@command{split} [@code{-@var{count}}] [@var{file}] [@var{prefix}] +@end display By default, the output files are named @file{xaa}, @file{xab}, and so on. Each file has @@ -23010,11 +23006,12 @@ is used as the prefix for the output file names: # # Arnold Robbins, arnold@@skeeve.com, Public Domain # May 1993 +# Revised slightly, May 2014 @c endfile @end ignore @c file eg/prog/split.awk -# usage: split [-num] [file] [outname] +# usage: split [-count] [file] [outname] BEGIN @{ outfile = "x" # default @@ -23023,7 +23020,7 @@ BEGIN @{ usage() i = 1 - if (ARGV[i] ~ /^-[[:digit:]]+$/) @{ + if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) @{ count = -ARGV[i] ARGV[i] = "" i++ @@ -23095,13 +23092,7 @@ function usage( e) @noindent The variable @code{e} is used so that the function -fits nicely on the -@ifinfo -screen. -@end ifinfo -@ifnotinfo -page. -@end ifnotinfo +fits nicely on the @value{PAGE}. This program is a bit sloppy; it relies on @command{awk} to automatically close the last file instead of doing it in an @code{END} rule. @@ -23124,9 +23115,9 @@ The @code{tee} program is known as a ``pipe fitting.'' @code{tee} copies its standard input to its standard output and also duplicates it to the files named on the command line. Its usage is as follows: -@example -tee @r{[}-a@r{]} file @dots{} -@end example +@display +@command{tee} [@option{-a}] @var{file} @dots{} +@end display The @option{-a} option tells @code{tee} to append to the named files, instead of truncating them and starting over. @@ -23251,9 +23242,9 @@ input, and by default removes duplicate lines. In other words, it only prints unique lines---hence the name. @command{uniq} has a number of options. The usage is as follows: -@example -uniq @r{[}-udc @r{[}-@var{n}@r{]]} @r{[}+@var{n}@r{]} @r{[} @var{input file} @r{[} @var{output file} @r{]]} -@end example +@display +@command{uniq} [@option{-udc} [@code{-@var{n}}]] [@code{+@var{n}}] [@var{inputfile} [@var{outputfile}]] +@end display The options for @command{uniq} are: @@ -23277,11 +23268,11 @@ by runs of spaces and/or TABs. Skip @var{n} characters before comparing lines. Any fields specified with @samp{-@var{n}} are skipped first. -@item @var{input file} +@item @var{inputfile} Data is read from the input file named on the command line, instead of from the standard input. -@item @var{output file} +@item @var{outputfile} The generated output is sent to the named output file, instead of to the standard output. @end table @@ -23518,9 +23509,9 @@ END @{ The @command{wc} (word count) utility counts lines, words, and characters in one or more input files. Its usage is as follows: -@example -wc @r{[}-lwc@r{]} @r{[} @var{files} @dots{} @r{]} -@end example +@display +@command{wc} [@option{-lwc}] [@var{files} @dots{}] +@end display If no files are specified on the command line, @command{wc} reads its standard input. If there are multiple files, it also prints total counts for all @@ -24001,19 +23992,18 @@ often used to map uppercase letters into lowercase for further processing: @end example @command{tr} requires two lists of characters.@footnote{On some older -systems, -including Solaris, -@command{tr} may require that the lists be written as -range expressions enclosed in square brackets (@samp{[a-z]}) and quoted, -to prevent the shell from attempting a file name expansion. This is -not a feature.} When processing the input, the first character in the -first list is replaced with the first character in the second list, -the second character in the first list is replaced with the second -character in the second list, and so on. If there are more characters -in the ``from'' list than in the ``to'' list, the last character of the -``to'' list is used for the remaining characters in the ``from'' list. - -Some time ago, +systems, including Solaris, the system version of @command{tr} may require +that the lists be written as range expressions enclosed in square brackets +(@samp{[a-z]}) and quoted, to prevent the shell from attempting a file +name expansion. This is not a feature.} When processing the input, the +first character in the first list is replaced with the first character +in the second list, the second character in the first list is replaced +with the second character in the second list, and so on. If there are +more characters in the ``from'' list than in the ``to'' list, the last +character of the ``to'' list is used for the remaining characters in the +``from'' list. + +Once upon a time, @c early or mid-1989! a user proposed that a transliteration function should be added to @command{gawk}. @@ -24127,13 +24117,12 @@ BEGIN @{ While it is possible to do character transliteration in a user-level function, it is not necessarily efficient, and we (the @command{gawk} authors) started to consider adding a built-in function. However, -shortly after writing this program, we learned that the System V Release 4 -@command{awk} had added the @code{toupper()} and @code{tolower()} functions -(@pxref{String Functions}). -These functions handle the vast majority of the -cases where character transliteration is necessary, and so we chose to -simply add those functions to @command{gawk} as well and then leave well -enough alone. +shortly after writing this program, we learned that Brian Kernighan +had added the @code{toupper()} and @code{tolower()} functions to his +@command{awk} (@pxref{String Functions}). These functions handle the +vast majority of the cases where character transliteration is necessary, +and so we chose to simply add those functions to @command{gawk} as well +and then leave well enough alone. An obvious improvement to this program would be to set up the @code{t_ar} array only once, in a @code{BEGIN} rule. However, this @@ -24166,7 +24155,18 @@ The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that @command{awk} splits records at blank lines (@pxref{Records}). It sets @code{MAXLINES} to 100, since 100 is the maximum number -of lines on the page (20 * 5 = 100). +of lines on the page +@iftex +(@math{20 @cdot 5 = 100}). +@end iftex +@ifnottex +@ifnotdocbook +(20 * 5 = 100). +@end ifnotdocbook +@end ifnottex +@docbook +(20 ⋅ 5 = 100). @c +@end docbook Most of the work is done in the @code{printpage()} function. The label lines are stored sequentially in the @code{line} array. But they @@ -24278,7 +24278,7 @@ END \ When working with large amounts of text, it can be interesting to know how often different words appear. For example, an author may overuse -certain words, in which case she might wish to find synonyms to substitute +certain words, in which case he or she might wish to find synonyms to substitute for words that appear too often. This @value{SUBSECTION} develops a program for counting words and presenting the frequency information in a useful format. @@ -24356,6 +24356,10 @@ END @{ @} @end example +The regexp @samp{/[^[:alnum:]_[:blank:]]/} might have been written +@samp{/[[:punct:]]/}, but then underscores would also be removed, +and we want to keep them. + Assuming we have saved this program in a file named @file{wordfreq.awk}, and that the data is in @file{file1}, the following pipeline: @@ -24467,6 +24471,7 @@ information. For example, using the following @code{print} statement in the print data[lines[i]], lines[i] @end example +@noindent This works because @code{data[$0]} is incremented each time a line is seen. @c ENDOFRANGE lidu @@ -24622,13 +24627,7 @@ BEGIN @{ IGNORECASE = 1 @} @noindent The variable @code{e} is used so that the rule -fits nicely on the -@ifnotinfo -page. -@end ifnotinfo -@ifnottex -screen. -@end ifnottex +fits nicely on the @value{PAGE}. The second rule handles moving data into files. It verifies that a file name is given in the directive. If the file named is not the @@ -24657,10 +24656,13 @@ Each element of @code{a} that is empty indicates two successive @samp{@@} symbols in the original line. For each two empty elements (@samp{@@@@} in the original file), we have to add a single @samp{@@} symbol back in.@footnote{This program was written before @command{gawk} had the -@code{gensub()} function. Consider how you might use it to simplify the code.} +@code{gensub()} function. +@c exercise!! +Consider how you might use it to simplify the code.} When the processing of the array is finished, @code{join()} is called with the -value of @code{SUBSEP}, to rejoin the pieces back into a single +value of @code{SUBSEP} (@pxref{Multidimensional}), +to rejoin the pieces back into a single line. That line is then printed to the output file: @example @@ -25185,7 +25187,7 @@ BEGIN @{ @c endfile @end example -The stack is initialized with @code{ARGV[1]}, which will be @samp{/dev/stdin}. +The stack is initialized with @code{ARGV[1]}, which will be @code{"/dev/stdin"}. The main loop comes next. Input lines are read in succession. Lines that do not start with @code{@@include} are printed verbatim. If the line does start with @code{@@include}, the file name is in @code{$2}. @@ -25295,7 +25297,7 @@ eval gawk $opts -- '"$processed_program"' '"$@@"' The @command{eval} command is a shell construct that reruns the shell's parsing process. This keeps things properly quoted. -This version of @command{igawk} represents my fifth version of this program. +This version of @command{igawk} represents the fifth version of this program. There are four key simplifications that make the program work better: @itemize @bullet @@ -25505,6 +25507,9 @@ babels beslab babery yabber @dots{} @end example + +@c Exercise: Avoid the use of external sort command + @c ENDOFRANGE anagram @node Signature Program @@ -25536,7 +25541,10 @@ X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O, O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O@}' @end example -We leave it to you to determine what the program does. +@cindex Johansen, Chris +We leave it to you to determine what the program does. (If you are +truly desperate to understand it, see Chris Johansen's explanation, +which is embedded in the Texinfo source file for this @value{DOCUMENT}.) @ignore To: "Arnold Robbins" <arnold@skeeve.com> @@ -28664,7 +28672,7 @@ partial dump of Davide Brini's obfuscated code @smallexample gawk> @kbd{dump} -@print{} # BEGIN +@print{} # BEGIN @print{} @print{} [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk] @print{} [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR] diff --git a/doc/gawktexi.in b/doc/gawktexi.in index 2edeacc8..802fb536 100644 --- a/doc/gawktexi.in +++ b/doc/gawktexi.in @@ -53,6 +53,7 @@ @set SUBSECTION subsection @set DARKCORNER @inmargin{@image{lflashlight,1cm}, @image{rflashlight,1cm}} @set COMMONEXT (c.e.) +@set PAGE page @end iftex @ifinfo @set DOCUMENT Info file @@ -62,6 +63,7 @@ @set SUBSECTION node @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE screen @end ifinfo @ifhtml @set DOCUMENT Web page @@ -71,6 +73,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE screen @end ifhtml @ifdocbook @set DOCUMENT book @@ -80,6 +83,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE page @end ifdocbook @ifxml @set DOCUMENT book @@ -89,6 +93,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE page @end ifxml @ifplaintext @set DOCUMENT book @@ -98,6 +103,7 @@ @set SUBSECTION subsection @set DARKCORNER (d.c.) @set COMMONEXT (c.e.) +@set PAGE page @end ifplaintext @ifdocbook @@ -3316,19 +3322,10 @@ There are two ways to run @command{awk}---with an explicit program or with one or more program files. Here are templates for both of them; items enclosed in [@dots{}] in these templates are optional: -@ifnotdocbook -@example -awk @r{[@var{options}]} -f progfile @r{[@code{--}]} @var{file} @dots{} -awk @r{[@var{options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{} -@end example -@end ifnotdocbook - -@c FIXME - find a better way to mark this up in docbook -@docbook -<screen>awk [<replaceable>options</replaceable>] -f progfile [<literal>--</literal>] <replaceable>file</replaceable> … -awk [<replaceable>options</replaceable>] [<literal>--</literal>] '<replaceable>program</replaceable>' <replaceable>file</replaceable> … -</screen> -@end docbook +@display +@command{awk} [@var{options}] @option{-f} @var{progfile} [@option{--}] @var{file} @dots{} +@command{awk} [@var{options}] [@option{--}] @code{'@var{program}'} @var{file} @dots{} +@end display @cindex GNU long options @cindex long options @@ -12328,13 +12325,13 @@ both) may be omitted. The purpose of the @dfn{action} is to tell @command{awk} what to do once a match for the pattern is found. Thus, in outline, an @command{awk} program generally looks like this: -@example -@r{[}@var{pattern}@r{]} @{ @var{action} @} - @var{pattern} @r{[}@{ @var{action} @}@r{]} +@display +[@var{pattern}] @code{@{ @var{action} @}} + @var{pattern} [@code{@{ @var{action} @}}] @dots{} -function @var{name}(@var{args}) @{ @dots{} @} +@code{function @var{name}(@var{args}) @{ @dots{} @}} @dots{} -@end example +@end display @cindex @code{@{@}} (braces), actions and @cindex braces (@code{@{@}}), actions and @@ -12449,9 +12446,9 @@ newlines or semicolons. The @code{if}-@code{else} statement is @command{awk}'s decision-making statement. It looks like this: -@example -if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]} -@end example +@display +@code{if (@var{condition}) @var{then-body}} [@code{else @var{else-body}}] +@end display @noindent The @var{condition} is an expression that controls what the rest of the @@ -13049,9 +13046,9 @@ The @code{exit} statement causes @command{awk} to immediately stop executing the current rule and to stop processing input; any remaining input is ignored. The @code{exit} statement is written as follows: -@example -exit @r{[}@var{return code}@r{]} -@end example +@display +@code{exit} [@var{return code}] +@end display @cindex @code{BEGIN} pattern, @code{exit} statement and @cindex @code{END} pattern, @code{exit} statement and @@ -17691,12 +17688,12 @@ entire program before starting to execute any of it. The definition of a function named @var{name} looks like this: -@example -function @var{name}(@r{[}@var{parameter-list}@r{]}) -@{ +@display +@code{function} @var{name}@code{(}[@var{parameter-list}]@code{)} +@code{@{} @var{body-of-function} -@} -@end example +@code{@}} +@end display @cindex names, functions @cindex functions, names of @@ -18218,9 +18215,9 @@ This statement returns control to the calling part of the @command{awk} program. can also be used to return a value for use in the rest of the @command{awk} program. It looks like this: -@example -return @r{[}@var{expression}@r{]} -@end example +@display +@code{return} [@var{expression}] +@end display The @var{expression} part is optional. Due most likely to an oversight, POSIX does not define what the return @@ -19529,7 +19526,7 @@ function getlocaltime(time, ret, now, i) now = systime() # return date(1)-style output - ret = strftime("%a %b %e %H:%M:%S %Z %Y", now) + ret = strftime(PROCINFO["strftime"], now) # clear out target array delete time @@ -19856,10 +19853,12 @@ The @code{rewind()} function also relies on the @code{nextfile} keyword @cindex readable data files@comma{} checking @cindex files, skipping Normally, if you give @command{awk} a data file that isn't readable, -it stops with a fatal error. There are times when you -might want to just ignore such files and keep going. You can -do this by prepending the following program to your @command{awk} -program: +it stops with a fatal error. There are times when you might want to +just ignore such files and keep going.@footnote{The @code{BEGINFILE} +special pattern (@pxref{BEGINFILE/ENDFILE}) provides an alternative +mechanism for dealing with files that can't be opened. However, the +code here provides a portable solution.} You can do this by prepending +the following program to your @command{awk} program: @cindex @code{readable.awk} program @example @@ -19897,7 +19896,7 @@ skips the file (since it's no longer in the list). See also @ref{ARGC and ARGV}. @node Empty Files -@subsection Checking For Zero-length Files +@subsection Checking for Zero-length Files All known @command{awk} implementations silently skip over zero-length files. This is a by-product of @command{awk}'s implicit @@ -20370,7 +20369,7 @@ BEGIN @{ # test program if (_getopt_test) @{ while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, optarg = <%s>\n", + printf("c = <%c>, Optarg = <%s>\n", _go_c, Optarg) printf("non-option arguments:\n") for (; Optind < ARGC; Optind++) @@ -20386,32 +20385,31 @@ result of two sample runs of the test program: @example $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x} -@print{} c = <a>, optarg = <> -@print{} c = <c>, optarg = <> -@print{} c = <b>, optarg = <ARG> +@print{} c = <a>, Optarg = <> +@print{} c = <c>, Optarg = <> +@print{} c = <b>, Optarg = <ARG> @print{} non-option arguments: @print{} ARGV[3] = <bax> @print{} ARGV[4] = <-x> $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc} -@print{} c = <a>, optarg = <> +@print{} c = <a>, Optarg = <> @error{} x -- invalid option -@print{} c = <?>, optarg = <> +@print{} c = <?>, Optarg = <> @print{} non-option arguments: @print{} ARGV[4] = <xyz> @print{} ARGV[5] = <abc> @end example -In both runs, -the first @option{--} terminates the arguments to @command{awk}, so that it does -not try to interpret the @option{-a}, etc., as its own options. +In both runs, the first @option{--} terminates the arguments to +@command{awk}, so that it does not try to interpret the @option{-a}, +etc., as its own options. @quotation NOTE -After @code{getopt()} is through, it is the responsibility of the user level -code to -clear out all the elements of @code{ARGV} from 1 to @code{Optind}, -so that @command{awk} does not try to process the command-line options -as file names. +After @code{getopt()} is through, it is the responsibility of the +user level code to clear out all the elements of @code{ARGV} from 1 +to @code{Optind}, so that @command{awk} does not try to process the +command-line options as file names. @end quotation Several of the sample programs presented in @@ -20480,7 +20478,7 @@ Following is @command{pwcat}, a C program that ``cats'' the password database: /* * pwcat.c * - * Generate a printable version of the password database + * Generate a printable version of the password database. */ @c endfile @ignore @@ -20826,7 +20824,7 @@ is as follows: /* * grcat.c * - * Generate a printable version of the group database + * Generate a printable version of the group database. */ @c endfile @ignore @@ -20913,7 +20911,7 @@ it is usually empty or set to @samp{*}. @item Group ID Number The group's numeric group ID number; -this number must be unique within the file. +the association of name to number must be unique within the file. (On some systems it's a C @code{long}, and not an @code{int}. Thus we cast it to @code{long} for all cases.) @@ -21049,10 +21047,10 @@ tvpeople:*:101:david,conan,tom,joan For this reason, @code{_gr_init()} looks to see if a group name or group ID number is already seen. If it is, then the user names are -simply concatenated onto the previous list of users. (There is actually a +simply concatenated onto the previous list of users.@footnote{There is actually a subtle problem with the code just presented. Suppose that the first time there were no names. This code adds the names with -a leading comma. It also doesn't check that there is a @code{$4}.) +a leading comma. It also doesn't check that there is a @code{$4}.} Finally, @code{_gr_init()} closes the pipeline to @command{grcat}, restores @code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0}, @@ -21422,13 +21420,7 @@ function usage( e1, e2) @noindent The variables @code{e1} and @code{e2} are used so that the function -fits nicely on the -@ifnotinfo -page. -@end ifnotinfo -@ifnottex -screen. -@end ifnottex +fits nicely on the @value{PAGE}. @cindex @code{BEGIN} pattern, running @command{awk} programs and @cindex @code{FS} variable, running @command{awk} programs and @@ -21467,7 +21459,7 @@ BEGIN \ if (FS == " ") # defeat awk semantics FS = "[ ]" @} else if (c == "s") - suppress++ + suppress = 1 else usage() @} @@ -21680,9 +21672,9 @@ expressions that are almost identical to those available in @command{awk} (@pxref{Regexp}). You invoke it as follows: -@example -egrep @r{[} @var{options} @r{]} '@var{pattern}' @var{files} @dots{} -@end example +@display +@command{egrep} [@var{options}] @code{'@var{pattern}'} @var{files} @dots{} +@end display The @var{pattern} is a regular expression. In typical usage, the regular expression is quoted to prevent the shell from expanding any of the @@ -21864,6 +21856,11 @@ function endfile(file) @c endfile @end example +The @code{BEGINFILE} and @code{ENDFILE} special patterns +(@pxref{BEGINFILE/ENDFILE}) could be used, but then the program would be +@command{gawk}-specific. Additionally, this example was written before +@command{gawk} acquired @code{BEGINFILE} and @code{ENDFILE}. + The following rule does most of the work of matching lines. The variable @code{matches} is true if the line matched the pattern. If the user wants lines that did not match, the sense of @code{matches} is inverted @@ -21920,9 +21917,7 @@ there are no matches, the exit status is one; otherwise it is zero: @c file eg/prog/egrep.awk END \ @{ - if (total == 0) - exit 1 - exit 0 + exit (total == 0) @} @c endfile @end example @@ -21976,7 +21971,7 @@ corresponding user and group names. The output might look like this: @example $ @kbd{id} -@print{} uid=500(arnold) gid=500(arnold) groups=6(disk),7(lp),19(floppy) +@print{} uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo) @end example @cindex @code{PROCINFO} array, and user and group ID numbers @@ -22012,6 +22007,7 @@ numbers: # Arnold Robbins, arnold@@skeeve.com, Public Domain # May 1993 # Revised February 1996 +# Revised May 2014 @c endfile @end ignore @@ -22031,34 +22027,26 @@ BEGIN \ printf("uid=%d", uid) pw = getpwuid(uid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) if (euid != uid) @{ printf(" euid=%d", euid) pw = getpwuid(euid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) @} printf(" gid=%d", gid) pw = getgrgid(gid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) if (egid != gid) @{ printf(" egid=%d", egid) pw = getgrgid(egid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) @} for (i = 1; ("group" i) in PROCINFO; i++) @{ @@ -22067,16 +22055,20 @@ BEGIN \ group = PROCINFO["group" i] printf("%d", group) pw = getgrgid(group) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} + if (pw != "") + pr_first_field(pw) if (("group" (i+1)) in PROCINFO) printf(",") @} print "" @} + +function pr_first_field(str, a) +@{ + split(str, a, ":") + printf("(%s)", a[1]) +@} @c endfile @end example @@ -22096,9 +22088,13 @@ The loop is also correct if there are @emph{no} supplementary groups; then the condition is false the first time it's tested, and the loop body never executes. +The @code{pr_first_field()} function simply isolates out some +code that is used repeatedly, making the whole program +slightly shorter and cleaner. + @c exercise!!! @ignore -The POSIX version of @command{id} takes arguments that control which +The POSIX version of @command{id} takes options that control which information is printed. Modify this version to accept the same arguments and perform in the same way. @end ignore @@ -22118,9 +22114,9 @@ Usage is as follows:@footnote{This is the traditional usage. The POSIX usage is different, but not relevant for what the program aims to demonstrate.} -@example -split @r{[}-@var{count}@r{]} file @r{[} @var{prefix} @r{]} -@end example +@display +@command{split} [@code{-@var{count}}] [@var{file}] [@var{prefix}] +@end display By default, the output files are named @file{xaa}, @file{xab}, and so on. Each file has @@ -22154,11 +22150,12 @@ is used as the prefix for the output file names: # # Arnold Robbins, arnold@@skeeve.com, Public Domain # May 1993 +# Revised slightly, May 2014 @c endfile @end ignore @c file eg/prog/split.awk -# usage: split [-num] [file] [outname] +# usage: split [-count] [file] [outname] BEGIN @{ outfile = "x" # default @@ -22167,7 +22164,7 @@ BEGIN @{ usage() i = 1 - if (ARGV[i] ~ /^-[[:digit:]]+$/) @{ + if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) @{ count = -ARGV[i] ARGV[i] = "" i++ @@ -22239,13 +22236,7 @@ function usage( e) @noindent The variable @code{e} is used so that the function -fits nicely on the -@ifinfo -screen. -@end ifinfo -@ifnotinfo -page. -@end ifnotinfo +fits nicely on the @value{PAGE}. This program is a bit sloppy; it relies on @command{awk} to automatically close the last file instead of doing it in an @code{END} rule. @@ -22268,9 +22259,9 @@ The @code{tee} program is known as a ``pipe fitting.'' @code{tee} copies its standard input to its standard output and also duplicates it to the files named on the command line. Its usage is as follows: -@example -tee @r{[}-a@r{]} file @dots{} -@end example +@display +@command{tee} [@option{-a}] @var{file} @dots{} +@end display The @option{-a} option tells @code{tee} to append to the named files, instead of truncating them and starting over. @@ -22395,9 +22386,9 @@ input, and by default removes duplicate lines. In other words, it only prints unique lines---hence the name. @command{uniq} has a number of options. The usage is as follows: -@example -uniq @r{[}-udc @r{[}-@var{n}@r{]]} @r{[}+@var{n}@r{]} @r{[} @var{input file} @r{[} @var{output file} @r{]]} -@end example +@display +@command{uniq} [@option{-udc} [@code{-@var{n}}]] [@code{+@var{n}}] [@var{inputfile} [@var{outputfile}]] +@end display The options for @command{uniq} are: @@ -22421,11 +22412,11 @@ by runs of spaces and/or TABs. Skip @var{n} characters before comparing lines. Any fields specified with @samp{-@var{n}} are skipped first. -@item @var{input file} +@item @var{inputfile} Data is read from the input file named on the command line, instead of from the standard input. -@item @var{output file} +@item @var{outputfile} The generated output is sent to the named output file, instead of to the standard output. @end table @@ -22662,9 +22653,9 @@ END @{ The @command{wc} (word count) utility counts lines, words, and characters in one or more input files. Its usage is as follows: -@example -wc @r{[}-lwc@r{]} @r{[} @var{files} @dots{} @r{]} -@end example +@display +@command{wc} [@option{-lwc}] [@var{files} @dots{}] +@end display If no files are specified on the command line, @command{wc} reads its standard input. If there are multiple files, it also prints total counts for all @@ -23145,19 +23136,18 @@ often used to map uppercase letters into lowercase for further processing: @end example @command{tr} requires two lists of characters.@footnote{On some older -systems, -including Solaris, -@command{tr} may require that the lists be written as -range expressions enclosed in square brackets (@samp{[a-z]}) and quoted, -to prevent the shell from attempting a file name expansion. This is -not a feature.} When processing the input, the first character in the -first list is replaced with the first character in the second list, -the second character in the first list is replaced with the second -character in the second list, and so on. If there are more characters -in the ``from'' list than in the ``to'' list, the last character of the -``to'' list is used for the remaining characters in the ``from'' list. - -Some time ago, +systems, including Solaris, the system version of @command{tr} may require +that the lists be written as range expressions enclosed in square brackets +(@samp{[a-z]}) and quoted, to prevent the shell from attempting a file +name expansion. This is not a feature.} When processing the input, the +first character in the first list is replaced with the first character +in the second list, the second character in the first list is replaced +with the second character in the second list, and so on. If there are +more characters in the ``from'' list than in the ``to'' list, the last +character of the ``to'' list is used for the remaining characters in the +``from'' list. + +Once upon a time, @c early or mid-1989! a user proposed that a transliteration function should be added to @command{gawk}. @@ -23271,13 +23261,12 @@ BEGIN @{ While it is possible to do character transliteration in a user-level function, it is not necessarily efficient, and we (the @command{gawk} authors) started to consider adding a built-in function. However, -shortly after writing this program, we learned that the System V Release 4 -@command{awk} had added the @code{toupper()} and @code{tolower()} functions -(@pxref{String Functions}). -These functions handle the vast majority of the -cases where character transliteration is necessary, and so we chose to -simply add those functions to @command{gawk} as well and then leave well -enough alone. +shortly after writing this program, we learned that Brian Kernighan +had added the @code{toupper()} and @code{tolower()} functions to his +@command{awk} (@pxref{String Functions}). These functions handle the +vast majority of the cases where character transliteration is necessary, +and so we chose to simply add those functions to @command{gawk} as well +and then leave well enough alone. An obvious improvement to this program would be to set up the @code{t_ar} array only once, in a @code{BEGIN} rule. However, this @@ -23310,7 +23299,18 @@ The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that @command{awk} splits records at blank lines (@pxref{Records}). It sets @code{MAXLINES} to 100, since 100 is the maximum number -of lines on the page (20 * 5 = 100). +of lines on the page +@iftex +(@math{20 @cdot 5 = 100}). +@end iftex +@ifnottex +@ifnotdocbook +(20 * 5 = 100). +@end ifnotdocbook +@end ifnottex +@docbook +(20 ⋅ 5 = 100). @c +@end docbook Most of the work is done in the @code{printpage()} function. The label lines are stored sequentially in the @code{line} array. But they @@ -23422,7 +23422,7 @@ END \ When working with large amounts of text, it can be interesting to know how often different words appear. For example, an author may overuse -certain words, in which case she might wish to find synonyms to substitute +certain words, in which case he or she might wish to find synonyms to substitute for words that appear too often. This @value{SUBSECTION} develops a program for counting words and presenting the frequency information in a useful format. @@ -23500,6 +23500,10 @@ END @{ @} @end example +The regexp @samp{/[^[:alnum:]_[:blank:]]/} might have been written +@samp{/[[:punct:]]/}, but then underscores would also be removed, +and we want to keep them. + Assuming we have saved this program in a file named @file{wordfreq.awk}, and that the data is in @file{file1}, the following pipeline: @@ -23611,6 +23615,7 @@ information. For example, using the following @code{print} statement in the print data[lines[i]], lines[i] @end example +@noindent This works because @code{data[$0]} is incremented each time a line is seen. @c ENDOFRANGE lidu @@ -23766,13 +23771,7 @@ BEGIN @{ IGNORECASE = 1 @} @noindent The variable @code{e} is used so that the rule -fits nicely on the -@ifnotinfo -page. -@end ifnotinfo -@ifnottex -screen. -@end ifnottex +fits nicely on the @value{PAGE}. The second rule handles moving data into files. It verifies that a file name is given in the directive. If the file named is not the @@ -23801,10 +23800,13 @@ Each element of @code{a} that is empty indicates two successive @samp{@@} symbols in the original line. For each two empty elements (@samp{@@@@} in the original file), we have to add a single @samp{@@} symbol back in.@footnote{This program was written before @command{gawk} had the -@code{gensub()} function. Consider how you might use it to simplify the code.} +@code{gensub()} function. +@c exercise!! +Consider how you might use it to simplify the code.} When the processing of the array is finished, @code{join()} is called with the -value of @code{SUBSEP}, to rejoin the pieces back into a single +value of @code{SUBSEP} (@pxref{Multidimensional}), +to rejoin the pieces back into a single line. That line is then printed to the output file: @example @@ -24329,7 +24331,7 @@ BEGIN @{ @c endfile @end example -The stack is initialized with @code{ARGV[1]}, which will be @samp{/dev/stdin}. +The stack is initialized with @code{ARGV[1]}, which will be @code{"/dev/stdin"}. The main loop comes next. Input lines are read in succession. Lines that do not start with @code{@@include} are printed verbatim. If the line does start with @code{@@include}, the file name is in @code{$2}. @@ -24439,7 +24441,7 @@ eval gawk $opts -- '"$processed_program"' '"$@@"' The @command{eval} command is a shell construct that reruns the shell's parsing process. This keeps things properly quoted. -This version of @command{igawk} represents my fifth version of this program. +This version of @command{igawk} represents the fifth version of this program. There are four key simplifications that make the program work better: @itemize @bullet @@ -24649,6 +24651,9 @@ babels beslab babery yabber @dots{} @end example + +@c Exercise: Avoid the use of external sort command + @c ENDOFRANGE anagram @node Signature Program @@ -24680,7 +24685,10 @@ X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O, O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O@}' @end example -We leave it to you to determine what the program does. +@cindex Johansen, Chris +We leave it to you to determine what the program does. (If you are +truly desperate to understand it, see Chris Johansen's explanation, +which is embedded in the Texinfo source file for this @value{DOCUMENT}.) @ignore To: "Arnold Robbins" <arnold@skeeve.com> @@ -27808,7 +27816,7 @@ partial dump of Davide Brini's obfuscated code @smallexample gawk> @kbd{dump} -@print{} # BEGIN +@print{} # BEGIN @print{} @print{} [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk] @print{} [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR] |