diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | awk.h | 6 | ||||
-rw-r--r-- | doc/ChangeLog | 4 | ||||
-rw-r--r-- | doc/gawk.info | 246 | ||||
-rw-r--r-- | doc/gawk.texi | 26 | ||||
-rw-r--r-- | gawkapi.h | 6 | ||||
-rw-r--r-- | io.c | 11 |
7 files changed, 184 insertions, 121 deletions
@@ -2,6 +2,12 @@ * bootstrap.sh: Touch extension/aclocal.m4 also. + Unrelated: Extend input parser API: + + * awk.h (IOBUF): Remove read_func pointer. + * gawkapi.h (awk_input_buf_t): Move it to here. + * io.c (iop_alloc, get_a_record, get_read_timeout): Adjust code. + 2012-12-18 Andrew J. Schorr <aschorr@telemetry-investments.com> * gawkapi.c (sym_update_real): If setting a scalar variable that exists @@ -886,12 +886,6 @@ typedef struct iobuf { ssize_t count; /* amount read last time */ size_t scanoff; /* where we were in the buffer when we had to regrow/refill */ - /* - * No argument prototype on read_func. See get_src_buf() - * in awkgram.y. - */ - ssize_t (*read_func)(); - bool valid; int errcode; diff --git a/doc/ChangeLog b/doc/ChangeLog index b6866938..0e0b806d 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +2012-12-18 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi (Input Parsers): Add info on read_func. + 2012-12-16 Arnold D. Robbins <arnold@skeeve.com> * gawk.texi: Move design decisions on new API to appendix C. diff --git a/doc/gawk.info b/doc/gawk.info index 8a440f5e..55595e70 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -22126,6 +22126,7 @@ used for `RT', if any. void *opaque; /* private data for input parsers */ int (*get_record)(char **out, struct awk_input *iobuf, int *errcode, char **rt_start, size_t *rt_len); + ssize_t (*read_func)(); void (*close_func)(struct awk_input *iobuf); struct stat sbuf; /* stat buf */ } awk_input_buf_t; @@ -22175,6 +22176,12 @@ function then fills in at least the `get_record' field of the input records. Said function is the core of the input parser. Its behavior is described below. +`ssize_t (*read_func)();' + This function pointer should point to function that has the same + behavior as the standard POSIX `read()' system call. It is an + alternative to the `get_record' pointer. Its behavior is also + described below. + `void (*close_func)(struct awk_input *iobuf);' This function pointer should point to a function that does the "tear down." It should release any resources allocated by @@ -22227,6 +22234,23 @@ equal -1, `gawk' automatically updates the `ERRNO' variable based on the value of `*errcode' (e.g., setting `*errcode = errno' should do the right thing). + As an alternative to supplying a function that returns an input +record, you may instead supply a function that simply reads bytes, and +let `gawk' parse the data into records. If you do so, the data should +be returned in the multibyte encoding of the current locale. Such a +function should follow the same behavior as the `read()' system call, +and you fill in the `read_func' pointer with its address in the +`awk_input_buf_t' structure. + + By default, `gawk' sets the `read_func' pointer to point to the +`read()' system call. So your extension need not set this field +explicitly. + + NOTE: You must choose one method or the other: either a function + that returns a record, or one that returns raw data. In + particular, if you supply a function to get a record, `gawk' will + call it, and never call the raw read function. + `gawk' ships with a sample extension that reads directories, returning records for each entry in the directory (*note Extension Sample Readdir::). You may wish to use that code as a guide for writing @@ -32226,116 +32250,116 @@ Node: Extension Functions887880 Node: Exit Callback Functions890054 Node: Extension Version String891297 Node: Input Parsers891947 -Node: Output Wrappers900534 -Node: Two-way processors904950 -Node: Printing Messages907080 -Ref: Printing Messages-Footnote-1908157 -Node: Updating `ERRNO'908309 -Node: Accessing Parameters909048 -Node: Symbol Table Access910278 -Node: Symbol table by name910790 -Ref: Symbol table by name-Footnote-1912960 -Node: Symbol table by cookie913040 -Ref: Symbol table by cookie-Footnote-1917169 -Node: Cached values917232 -Ref: Cached values-Footnote-1920675 -Node: Array Manipulation920766 -Ref: Array Manipulation-Footnote-1921864 -Node: Array Data Types921903 -Ref: Array Data Types-Footnote-1924606 -Node: Array Functions924698 -Node: Flattening Arrays928464 -Node: Creating Arrays935303 -Node: Extension API Variables940098 -Node: Extension Versioning940734 -Node: Extension API Informational Variables942635 -Node: Extension API Boilerplate943721 -Node: Finding Extensions947552 -Node: Extension Example948099 -Node: Internal File Description948837 -Node: Internal File Ops952525 -Ref: Internal File Ops-Footnote-1963972 -Node: Using Internal File Ops964112 -Ref: Using Internal File Ops-Footnote-1966465 -Node: Extension Samples966731 -Node: Extension Sample File Functions968174 -Node: Extension Sample Fnmatch976647 -Node: Extension Sample Fork978373 -Node: Extension Sample Ord979587 -Node: Extension Sample Readdir980363 -Node: Extension Sample Revout981867 -Node: Extension Sample Rev2way982460 -Node: Extension Sample Read write array983150 -Node: Extension Sample Readfile985033 -Node: Extension Sample API Tests985788 -Node: Extension Sample Time986313 -Node: gawkextlib987620 -Node: Language History990001 -Node: V7/SVR3.1991523 -Node: SVR4993844 -Node: POSIX995286 -Node: BTL996294 -Node: POSIX/GNU997099 -Node: Common Extensions1002634 -Node: Ranges and Locales1003693 -Ref: Ranges and Locales-Footnote-11008311 -Ref: Ranges and Locales-Footnote-21008338 -Ref: Ranges and Locales-Footnote-31008598 -Node: Contributors1008819 -Node: Installation1013115 -Node: Gawk Distribution1014009 -Node: Getting1014493 -Node: Extracting1015319 -Node: Distribution contents1017011 -Node: Unix Installation1022272 -Node: Quick Installation1022889 -Node: Additional Configuration Options1024851 -Node: Configuration Philosophy1026328 -Node: Non-Unix Installation1028670 -Node: PC Installation1029128 -Node: PC Binary Installation1030427 -Node: PC Compiling1032275 -Node: PC Testing1035219 -Node: PC Using1036395 -Node: Cygwin1040580 -Node: MSYS1041580 -Node: VMS Installation1042094 -Node: VMS Compilation1042697 -Ref: VMS Compilation-Footnote-11043704 -Node: VMS Installation Details1043762 -Node: VMS Running1045397 -Node: VMS Old Gawk1047004 -Node: Bugs1047478 -Node: Other Versions1051330 -Node: Notes1056645 -Node: Compatibility Mode1057445 -Node: Additions1058228 -Node: Accessing The Source1059155 -Node: Adding Code1060758 -Node: New Ports1066800 -Node: Derived Files1070935 -Ref: Derived Files-Footnote-11076256 -Ref: Derived Files-Footnote-21076290 -Ref: Derived Files-Footnote-31076890 -Node: Future Extensions1076988 -Node: Implementation Limitations1077569 -Node: Extension Design1078821 -Node: Old Extension Problems1079970 -Ref: Old Extension Problems-Footnote-11081478 -Node: Extension New Mechanism Goals1081535 -Ref: Extension New Mechanism Goals-Footnote-11084894 -Node: Extension Other Design Decisions1085080 -Node: Extension Future Growth1087186 -Node: Old Extension Mechansim1088007 -Node: Basic Concepts1089764 -Node: Basic High Level1090445 -Ref: figure-general-flow1090716 -Ref: figure-process-flow1091315 -Ref: Basic High Level-Footnote-11094544 -Node: Basic Data Typing1094729 -Node: Glossary1098084 -Node: Copying1123395 -Node: GNU Free Documentation License1160952 -Node: Index1186089 +Node: Output Wrappers901664 +Node: Two-way processors906080 +Node: Printing Messages908210 +Ref: Printing Messages-Footnote-1909287 +Node: Updating `ERRNO'909439 +Node: Accessing Parameters910178 +Node: Symbol Table Access911408 +Node: Symbol table by name911920 +Ref: Symbol table by name-Footnote-1914090 +Node: Symbol table by cookie914170 +Ref: Symbol table by cookie-Footnote-1918299 +Node: Cached values918362 +Ref: Cached values-Footnote-1921805 +Node: Array Manipulation921896 +Ref: Array Manipulation-Footnote-1922994 +Node: Array Data Types923033 +Ref: Array Data Types-Footnote-1925736 +Node: Array Functions925828 +Node: Flattening Arrays929594 +Node: Creating Arrays936433 +Node: Extension API Variables941228 +Node: Extension Versioning941864 +Node: Extension API Informational Variables943765 +Node: Extension API Boilerplate944851 +Node: Finding Extensions948682 +Node: Extension Example949229 +Node: Internal File Description949967 +Node: Internal File Ops953655 +Ref: Internal File Ops-Footnote-1965102 +Node: Using Internal File Ops965242 +Ref: Using Internal File Ops-Footnote-1967595 +Node: Extension Samples967861 +Node: Extension Sample File Functions969304 +Node: Extension Sample Fnmatch977777 +Node: Extension Sample Fork979503 +Node: Extension Sample Ord980717 +Node: Extension Sample Readdir981493 +Node: Extension Sample Revout982997 +Node: Extension Sample Rev2way983590 +Node: Extension Sample Read write array984280 +Node: Extension Sample Readfile986163 +Node: Extension Sample API Tests986918 +Node: Extension Sample Time987443 +Node: gawkextlib988750 +Node: Language History991131 +Node: V7/SVR3.1992653 +Node: SVR4994974 +Node: POSIX996416 +Node: BTL997424 +Node: POSIX/GNU998229 +Node: Common Extensions1003764 +Node: Ranges and Locales1004823 +Ref: Ranges and Locales-Footnote-11009441 +Ref: Ranges and Locales-Footnote-21009468 +Ref: Ranges and Locales-Footnote-31009728 +Node: Contributors1009949 +Node: Installation1014245 +Node: Gawk Distribution1015139 +Node: Getting1015623 +Node: Extracting1016449 +Node: Distribution contents1018141 +Node: Unix Installation1023402 +Node: Quick Installation1024019 +Node: Additional Configuration Options1025981 +Node: Configuration Philosophy1027458 +Node: Non-Unix Installation1029800 +Node: PC Installation1030258 +Node: PC Binary Installation1031557 +Node: PC Compiling1033405 +Node: PC Testing1036349 +Node: PC Using1037525 +Node: Cygwin1041710 +Node: MSYS1042710 +Node: VMS Installation1043224 +Node: VMS Compilation1043827 +Ref: VMS Compilation-Footnote-11044834 +Node: VMS Installation Details1044892 +Node: VMS Running1046527 +Node: VMS Old Gawk1048134 +Node: Bugs1048608 +Node: Other Versions1052460 +Node: Notes1057775 +Node: Compatibility Mode1058575 +Node: Additions1059358 +Node: Accessing The Source1060285 +Node: Adding Code1061888 +Node: New Ports1067930 +Node: Derived Files1072065 +Ref: Derived Files-Footnote-11077386 +Ref: Derived Files-Footnote-21077420 +Ref: Derived Files-Footnote-31078020 +Node: Future Extensions1078118 +Node: Implementation Limitations1078699 +Node: Extension Design1079951 +Node: Old Extension Problems1081100 +Ref: Old Extension Problems-Footnote-11082608 +Node: Extension New Mechanism Goals1082665 +Ref: Extension New Mechanism Goals-Footnote-11086024 +Node: Extension Other Design Decisions1086210 +Node: Extension Future Growth1088316 +Node: Old Extension Mechansim1089137 +Node: Basic Concepts1090894 +Node: Basic High Level1091575 +Ref: figure-general-flow1091846 +Ref: figure-process-flow1092445 +Ref: Basic High Level-Footnote-11095674 +Node: Basic Data Typing1095859 +Node: Glossary1099214 +Node: Copying1124525 +Node: GNU Free Documentation License1162082 +Node: Index1187219 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index 90058424..922f7ccd 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -29060,6 +29060,7 @@ typedef struct awk_input @{ void *opaque; /* private data for input parsers */ int (*get_record)(char **out, struct awk_input *iobuf, int *errcode, char **rt_start, size_t *rt_len); + ssize_t (*read_func)(); void (*close_func)(struct awk_input *iobuf); struct stat sbuf; /* stat buf */ @} awk_input_buf_t; @@ -29115,6 +29116,12 @@ This function pointer should point to a function that creates the input records. Said function is the core of the input parser. Its behavior is described below. +@item ssize_t (*read_func)(); +This function pointer should point to function that has the +same behavior as the standard POSIX @code{read()} system call. +It is an alternative to the @code{get_record} pointer. Its behavior +is also described below. + @item void (*close_func)(struct awk_input *iobuf); This function pointer should point to a function that does the ``tear down.'' It should release any resources allocated by @@ -29172,6 +29179,25 @@ does not equal @minus{}1, @command{gawk} automatically updates the @code{ERRNO} variable based on the value of @code{*errcode} (e.g., setting @samp{*errcode = errno} should do the right thing). +As an alternative to supplying a function that returns an input record, +you may instead supply a function that simply reads bytes, and let +@command{gawk} parse the data into records. If you do so, the data +should be returned in the multibyte encoding of the current locale. +Such a function should follow the same behavior as the @code{read()} +system call, and you fill in the @code{read_func} pointer with its +address in the @code{awk_input_buf_t} structure. + +By default, @command{gawk} sets the @code{read_func} pointer to +point to the @code{read()} system call. So your extension need not +set this field explicitly. + +@quotation NOTE +You must choose one method or the other: either a function that +returns a record, or one that returns raw data. In particular, +if you supply a function to get a record, @command{gawk} will +call it, and never call the raw read function. +@end quotation + @command{gawk} ships with a sample extension that reads directories, returning records for each entry in the directory (@pxref{Extension Sample Readdir}). You may wish to use that code as a guide for writing @@ -152,6 +152,12 @@ typedef struct awk_input { char **rt_start, size_t *rt_len); /* + * No argument prototype on read_func to allow for older systems + * whose headers are not up to date. + */ + ssize_t (*read_func)(); + + /* * The close_func is called to allow the parser to free private data. * Gawk itself will close the fd unless close_func first sets it to * INVALID_HANDLE. @@ -2877,7 +2877,7 @@ iop_alloc(int fd, const char *name, int errno_val) memset(iop, '\0', sizeof(IOBUF)); iop->public.fd = fd; iop->public.name = name; - iop->read_func = ( ssize_t(*)() ) read; + iop->public.read_func = ( ssize_t(*)() ) read; iop->valid = false; iop->errcode = errno_val; @@ -3343,7 +3343,7 @@ get_a_record(char **out, /* pointer to pointer to data */ /* fill initial buffer */ if (has_no_data(iop) || no_data_left(iop)) { - iop->count = iop->read_func(iop->public.fd, iop->buf, iop->readsize); + iop->count = iop->public.read_func(iop->public.fd, iop->buf, iop->readsize); if (iop->count == 0) { iop->flag |= IOP_AT_EOF; return EOF; @@ -3409,7 +3409,7 @@ get_a_record(char **out, /* pointer to pointer to data */ amt_to_read = min(amt_to_read, SSIZE_MAX); #endif - iop->count = iop->read_func(iop->public.fd, iop->dataend, amt_to_read); + iop->count = iop->public.read_func(iop->public.fd, iop->dataend, amt_to_read); if (iop->count == -1) { *errcode = errno; iop->flag |= IOP_AT_EOF; @@ -3705,7 +3705,10 @@ get_read_timeout(IOBUF *iop) } else tmout = read_default_timeout; /* initialized from env. variable in init_io() */ - iop->read_func = tmout > 0 ? read_with_timeout : ( ssize_t(*)() ) read; + /* overwrite read routine only if an extension has not done so */ + if ((iop->public.read_func == ( ssize_t(*)() ) read) && tmout > 0) + iop->public.read_func = read_with_timeout; + return tmout; } |