aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--awk.h6
-rw-r--r--doc/ChangeLog4
-rw-r--r--doc/gawk.info246
-rw-r--r--doc/gawk.texi26
-rw-r--r--gawkapi.h6
-rw-r--r--io.c11
7 files changed, 184 insertions, 121 deletions
diff --git a/ChangeLog b/ChangeLog
index 63edbba3..0914e2da 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,12 @@
* bootstrap.sh: Touch extension/aclocal.m4 also.
+ Unrelated: Extend input parser API:
+
+ * awk.h (IOBUF): Remove read_func pointer.
+ * gawkapi.h (awk_input_buf_t): Move it to here.
+ * io.c (iop_alloc, get_a_record, get_read_timeout): Adjust code.
+
2012-12-18 Andrew J. Schorr <aschorr@telemetry-investments.com>
* gawkapi.c (sym_update_real): If setting a scalar variable that exists
diff --git a/awk.h b/awk.h
index caf0da1e..60a0db71 100644
--- a/awk.h
+++ b/awk.h
@@ -886,12 +886,6 @@ typedef struct iobuf {
ssize_t count; /* amount read last time */
size_t scanoff; /* where we were in the buffer when we had
to regrow/refill */
- /*
- * No argument prototype on read_func. See get_src_buf()
- * in awkgram.y.
- */
- ssize_t (*read_func)();
-
bool valid;
int errcode;
diff --git a/doc/ChangeLog b/doc/ChangeLog
index b6866938..0e0b806d 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2012-12-18 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawk.texi (Input Parsers): Add info on read_func.
+
2012-12-16 Arnold D. Robbins <arnold@skeeve.com>
* gawk.texi: Move design decisions on new API to appendix C.
diff --git a/doc/gawk.info b/doc/gawk.info
index 8a440f5e..55595e70 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -22126,6 +22126,7 @@ used for `RT', if any.
void *opaque; /* private data for input parsers */
int (*get_record)(char **out, struct awk_input *iobuf,
int *errcode, char **rt_start, size_t *rt_len);
+ ssize_t (*read_func)();
void (*close_func)(struct awk_input *iobuf);
struct stat sbuf; /* stat buf */
} awk_input_buf_t;
@@ -22175,6 +22176,12 @@ function then fills in at least the `get_record' field of the
input records. Said function is the core of the input parser.
Its behavior is described below.
+`ssize_t (*read_func)();'
+ This function pointer should point to function that has the same
+ behavior as the standard POSIX `read()' system call. It is an
+ alternative to the `get_record' pointer. Its behavior is also
+ described below.
+
`void (*close_func)(struct awk_input *iobuf);'
This function pointer should point to a function that does the
"tear down." It should release any resources allocated by
@@ -22227,6 +22234,23 @@ equal -1, `gawk' automatically updates the `ERRNO' variable based on
the value of `*errcode' (e.g., setting `*errcode = errno' should do the
right thing).
+ As an alternative to supplying a function that returns an input
+record, you may instead supply a function that simply reads bytes, and
+let `gawk' parse the data into records. If you do so, the data should
+be returned in the multibyte encoding of the current locale. Such a
+function should follow the same behavior as the `read()' system call,
+and you fill in the `read_func' pointer with its address in the
+`awk_input_buf_t' structure.
+
+ By default, `gawk' sets the `read_func' pointer to point to the
+`read()' system call. So your extension need not set this field
+explicitly.
+
+ NOTE: You must choose one method or the other: either a function
+ that returns a record, or one that returns raw data. In
+ particular, if you supply a function to get a record, `gawk' will
+ call it, and never call the raw read function.
+
`gawk' ships with a sample extension that reads directories,
returning records for each entry in the directory (*note Extension
Sample Readdir::). You may wish to use that code as a guide for writing
@@ -32226,116 +32250,116 @@ Node: Extension Functions887880
Node: Exit Callback Functions890054
Node: Extension Version String891297
Node: Input Parsers891947
-Node: Output Wrappers900534
-Node: Two-way processors904950
-Node: Printing Messages907080
-Ref: Printing Messages-Footnote-1908157
-Node: Updating `ERRNO'908309
-Node: Accessing Parameters909048
-Node: Symbol Table Access910278
-Node: Symbol table by name910790
-Ref: Symbol table by name-Footnote-1912960
-Node: Symbol table by cookie913040
-Ref: Symbol table by cookie-Footnote-1917169
-Node: Cached values917232
-Ref: Cached values-Footnote-1920675
-Node: Array Manipulation920766
-Ref: Array Manipulation-Footnote-1921864
-Node: Array Data Types921903
-Ref: Array Data Types-Footnote-1924606
-Node: Array Functions924698
-Node: Flattening Arrays928464
-Node: Creating Arrays935303
-Node: Extension API Variables940098
-Node: Extension Versioning940734
-Node: Extension API Informational Variables942635
-Node: Extension API Boilerplate943721
-Node: Finding Extensions947552
-Node: Extension Example948099
-Node: Internal File Description948837
-Node: Internal File Ops952525
-Ref: Internal File Ops-Footnote-1963972
-Node: Using Internal File Ops964112
-Ref: Using Internal File Ops-Footnote-1966465
-Node: Extension Samples966731
-Node: Extension Sample File Functions968174
-Node: Extension Sample Fnmatch976647
-Node: Extension Sample Fork978373
-Node: Extension Sample Ord979587
-Node: Extension Sample Readdir980363
-Node: Extension Sample Revout981867
-Node: Extension Sample Rev2way982460
-Node: Extension Sample Read write array983150
-Node: Extension Sample Readfile985033
-Node: Extension Sample API Tests985788
-Node: Extension Sample Time986313
-Node: gawkextlib987620
-Node: Language History990001
-Node: V7/SVR3.1991523
-Node: SVR4993844
-Node: POSIX995286
-Node: BTL996294
-Node: POSIX/GNU997099
-Node: Common Extensions1002634
-Node: Ranges and Locales1003693
-Ref: Ranges and Locales-Footnote-11008311
-Ref: Ranges and Locales-Footnote-21008338
-Ref: Ranges and Locales-Footnote-31008598
-Node: Contributors1008819
-Node: Installation1013115
-Node: Gawk Distribution1014009
-Node: Getting1014493
-Node: Extracting1015319
-Node: Distribution contents1017011
-Node: Unix Installation1022272
-Node: Quick Installation1022889
-Node: Additional Configuration Options1024851
-Node: Configuration Philosophy1026328
-Node: Non-Unix Installation1028670
-Node: PC Installation1029128
-Node: PC Binary Installation1030427
-Node: PC Compiling1032275
-Node: PC Testing1035219
-Node: PC Using1036395
-Node: Cygwin1040580
-Node: MSYS1041580
-Node: VMS Installation1042094
-Node: VMS Compilation1042697
-Ref: VMS Compilation-Footnote-11043704
-Node: VMS Installation Details1043762
-Node: VMS Running1045397
-Node: VMS Old Gawk1047004
-Node: Bugs1047478
-Node: Other Versions1051330
-Node: Notes1056645
-Node: Compatibility Mode1057445
-Node: Additions1058228
-Node: Accessing The Source1059155
-Node: Adding Code1060758
-Node: New Ports1066800
-Node: Derived Files1070935
-Ref: Derived Files-Footnote-11076256
-Ref: Derived Files-Footnote-21076290
-Ref: Derived Files-Footnote-31076890
-Node: Future Extensions1076988
-Node: Implementation Limitations1077569
-Node: Extension Design1078821
-Node: Old Extension Problems1079970
-Ref: Old Extension Problems-Footnote-11081478
-Node: Extension New Mechanism Goals1081535
-Ref: Extension New Mechanism Goals-Footnote-11084894
-Node: Extension Other Design Decisions1085080
-Node: Extension Future Growth1087186
-Node: Old Extension Mechansim1088007
-Node: Basic Concepts1089764
-Node: Basic High Level1090445
-Ref: figure-general-flow1090716
-Ref: figure-process-flow1091315
-Ref: Basic High Level-Footnote-11094544
-Node: Basic Data Typing1094729
-Node: Glossary1098084
-Node: Copying1123395
-Node: GNU Free Documentation License1160952
-Node: Index1186089
+Node: Output Wrappers901664
+Node: Two-way processors906080
+Node: Printing Messages908210
+Ref: Printing Messages-Footnote-1909287
+Node: Updating `ERRNO'909439
+Node: Accessing Parameters910178
+Node: Symbol Table Access911408
+Node: Symbol table by name911920
+Ref: Symbol table by name-Footnote-1914090
+Node: Symbol table by cookie914170
+Ref: Symbol table by cookie-Footnote-1918299
+Node: Cached values918362
+Ref: Cached values-Footnote-1921805
+Node: Array Manipulation921896
+Ref: Array Manipulation-Footnote-1922994
+Node: Array Data Types923033
+Ref: Array Data Types-Footnote-1925736
+Node: Array Functions925828
+Node: Flattening Arrays929594
+Node: Creating Arrays936433
+Node: Extension API Variables941228
+Node: Extension Versioning941864
+Node: Extension API Informational Variables943765
+Node: Extension API Boilerplate944851
+Node: Finding Extensions948682
+Node: Extension Example949229
+Node: Internal File Description949967
+Node: Internal File Ops953655
+Ref: Internal File Ops-Footnote-1965102
+Node: Using Internal File Ops965242
+Ref: Using Internal File Ops-Footnote-1967595
+Node: Extension Samples967861
+Node: Extension Sample File Functions969304
+Node: Extension Sample Fnmatch977777
+Node: Extension Sample Fork979503
+Node: Extension Sample Ord980717
+Node: Extension Sample Readdir981493
+Node: Extension Sample Revout982997
+Node: Extension Sample Rev2way983590
+Node: Extension Sample Read write array984280
+Node: Extension Sample Readfile986163
+Node: Extension Sample API Tests986918
+Node: Extension Sample Time987443
+Node: gawkextlib988750
+Node: Language History991131
+Node: V7/SVR3.1992653
+Node: SVR4994974
+Node: POSIX996416
+Node: BTL997424
+Node: POSIX/GNU998229
+Node: Common Extensions1003764
+Node: Ranges and Locales1004823
+Ref: Ranges and Locales-Footnote-11009441
+Ref: Ranges and Locales-Footnote-21009468
+Ref: Ranges and Locales-Footnote-31009728
+Node: Contributors1009949
+Node: Installation1014245
+Node: Gawk Distribution1015139
+Node: Getting1015623
+Node: Extracting1016449
+Node: Distribution contents1018141
+Node: Unix Installation1023402
+Node: Quick Installation1024019
+Node: Additional Configuration Options1025981
+Node: Configuration Philosophy1027458
+Node: Non-Unix Installation1029800
+Node: PC Installation1030258
+Node: PC Binary Installation1031557
+Node: PC Compiling1033405
+Node: PC Testing1036349
+Node: PC Using1037525
+Node: Cygwin1041710
+Node: MSYS1042710
+Node: VMS Installation1043224
+Node: VMS Compilation1043827
+Ref: VMS Compilation-Footnote-11044834
+Node: VMS Installation Details1044892
+Node: VMS Running1046527
+Node: VMS Old Gawk1048134
+Node: Bugs1048608
+Node: Other Versions1052460
+Node: Notes1057775
+Node: Compatibility Mode1058575
+Node: Additions1059358
+Node: Accessing The Source1060285
+Node: Adding Code1061888
+Node: New Ports1067930
+Node: Derived Files1072065
+Ref: Derived Files-Footnote-11077386
+Ref: Derived Files-Footnote-21077420
+Ref: Derived Files-Footnote-31078020
+Node: Future Extensions1078118
+Node: Implementation Limitations1078699
+Node: Extension Design1079951
+Node: Old Extension Problems1081100
+Ref: Old Extension Problems-Footnote-11082608
+Node: Extension New Mechanism Goals1082665
+Ref: Extension New Mechanism Goals-Footnote-11086024
+Node: Extension Other Design Decisions1086210
+Node: Extension Future Growth1088316
+Node: Old Extension Mechansim1089137
+Node: Basic Concepts1090894
+Node: Basic High Level1091575
+Ref: figure-general-flow1091846
+Ref: figure-process-flow1092445
+Ref: Basic High Level-Footnote-11095674
+Node: Basic Data Typing1095859
+Node: Glossary1099214
+Node: Copying1124525
+Node: GNU Free Documentation License1162082
+Node: Index1187219

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 90058424..922f7ccd 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -29060,6 +29060,7 @@ typedef struct awk_input @{
void *opaque; /* private data for input parsers */
int (*get_record)(char **out, struct awk_input *iobuf,
int *errcode, char **rt_start, size_t *rt_len);
+ ssize_t (*read_func)();
void (*close_func)(struct awk_input *iobuf);
struct stat sbuf; /* stat buf */
@} awk_input_buf_t;
@@ -29115,6 +29116,12 @@ This function pointer should point to a function that creates the input
records. Said function is the core of the input parser. Its behavior
is described below.
+@item ssize_t (*read_func)();
+This function pointer should point to function that has the
+same behavior as the standard POSIX @code{read()} system call.
+It is an alternative to the @code{get_record} pointer. Its behavior
+is also described below.
+
@item void (*close_func)(struct awk_input *iobuf);
This function pointer should point to a function that does
the ``tear down.'' It should release any resources allocated by
@@ -29172,6 +29179,25 @@ does not equal @minus{}1, @command{gawk} automatically updates
the @code{ERRNO} variable based on the value of @code{*errcode} (e.g.,
setting @samp{*errcode = errno} should do the right thing).
+As an alternative to supplying a function that returns an input record,
+you may instead supply a function that simply reads bytes, and let
+@command{gawk} parse the data into records. If you do so, the data
+should be returned in the multibyte encoding of the current locale.
+Such a function should follow the same behavior as the @code{read()}
+system call, and you fill in the @code{read_func} pointer with its
+address in the @code{awk_input_buf_t} structure.
+
+By default, @command{gawk} sets the @code{read_func} pointer to
+point to the @code{read()} system call. So your extension need not
+set this field explicitly.
+
+@quotation NOTE
+You must choose one method or the other: either a function that
+returns a record, or one that returns raw data. In particular,
+if you supply a function to get a record, @command{gawk} will
+call it, and never call the raw read function.
+@end quotation
+
@command{gawk} ships with a sample extension that reads directories,
returning records for each entry in the directory (@pxref{Extension
Sample Readdir}). You may wish to use that code as a guide for writing
diff --git a/gawkapi.h b/gawkapi.h
index 8fc08161..de9197e0 100644
--- a/gawkapi.h
+++ b/gawkapi.h
@@ -152,6 +152,12 @@ typedef struct awk_input {
char **rt_start, size_t *rt_len);
/*
+ * No argument prototype on read_func to allow for older systems
+ * whose headers are not up to date.
+ */
+ ssize_t (*read_func)();
+
+ /*
* The close_func is called to allow the parser to free private data.
* Gawk itself will close the fd unless close_func first sets it to
* INVALID_HANDLE.
diff --git a/io.c b/io.c
index 7559b41f..0b008fc0 100644
--- a/io.c
+++ b/io.c
@@ -2877,7 +2877,7 @@ iop_alloc(int fd, const char *name, int errno_val)
memset(iop, '\0', sizeof(IOBUF));
iop->public.fd = fd;
iop->public.name = name;
- iop->read_func = ( ssize_t(*)() ) read;
+ iop->public.read_func = ( ssize_t(*)() ) read;
iop->valid = false;
iop->errcode = errno_val;
@@ -3343,7 +3343,7 @@ get_a_record(char **out, /* pointer to pointer to data */
/* fill initial buffer */
if (has_no_data(iop) || no_data_left(iop)) {
- iop->count = iop->read_func(iop->public.fd, iop->buf, iop->readsize);
+ iop->count = iop->public.read_func(iop->public.fd, iop->buf, iop->readsize);
if (iop->count == 0) {
iop->flag |= IOP_AT_EOF;
return EOF;
@@ -3409,7 +3409,7 @@ get_a_record(char **out, /* pointer to pointer to data */
amt_to_read = min(amt_to_read, SSIZE_MAX);
#endif
- iop->count = iop->read_func(iop->public.fd, iop->dataend, amt_to_read);
+ iop->count = iop->public.read_func(iop->public.fd, iop->dataend, amt_to_read);
if (iop->count == -1) {
*errcode = errno;
iop->flag |= IOP_AT_EOF;
@@ -3705,7 +3705,10 @@ get_read_timeout(IOBUF *iop)
} else
tmout = read_default_timeout; /* initialized from env. variable in init_io() */
- iop->read_func = tmout > 0 ? read_with_timeout : ( ssize_t(*)() ) read;
+ /* overwrite read routine only if an extension has not done so */
+ if ((iop->public.read_func == ( ssize_t(*)() ) read) && tmout > 0)
+ iop->public.read_func = read_with_timeout;
+
return tmout;
}