From 62fe07b69e522c909aad303b31443cc3c9bdf6c0 Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Sun, 5 Mar 2017 17:05:36 -0500 Subject: Enable an API input parser to supply an array of field widths to override the default gawk field parsing mechanism. --- gawkapi.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'gawkapi.h') diff --git a/gawkapi.h b/gawkapi.h index 5071adce..6d552b8f 100644 --- a/gawkapi.h +++ b/gawkapi.h @@ -150,6 +150,18 @@ typedef struct awk_input { int (*get_record)(char **out, struct awk_input *iobuf, int *errcode, char **rt_start, size_t *rt_len); + /* + * If this pointer is non-NULL, then this record should be parsed + * using the supplied field widths instead of the default gawk + * field parsing mechanism. The field_width array should have + * at least 2*NF+1 elements, and the value of field_width[2*NF] + * must be negative. The first entry field_width[0] should contain + * the number of bytes to skip before $1; field_width[1] contains + * the number of bytes in $1. Note that these values are specified + * in bytes, not (potentially multi-byte) characters! + */ + const int *field_width; + /* * No argument prototype on read_func to allow for older systems * whose headers are not up to date. -- cgit v1.2.3 From d6406b66add5652130385942a7e05ebc9ea799ce Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Mon, 6 Mar 2017 09:20:33 -0500 Subject: Add a 6th argument to the API get_record function instead of having a separate field_width array pointer in the input buf. --- gawkapi.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'gawkapi.h') diff --git a/gawkapi.h b/gawkapi.h index 6d552b8f..6fa022ca 100644 --- a/gawkapi.h +++ b/gawkapi.h @@ -146,21 +146,24 @@ typedef struct awk_input { * than zero, gawk will automatically update the ERRNO variable based * on the value of *errcode (e.g., setting *errcode = errno should do * the right thing). - */ - int (*get_record)(char **out, struct awk_input *iobuf, int *errcode, - char **rt_start, size_t *rt_len); - - /* - * If this pointer is non-NULL, then this record should be parsed - * using the supplied field widths instead of the default gawk - * field parsing mechanism. The field_width array should have + * + * If field_width is non-NULL, then its value will be initialized + * to NULL, and the function may set it to point to an array of + * integers supplying field width information to override the default + * gawk field parsing mechanism. The field_width array should have * at least 2*NF+1 elements, and the value of field_width[2*NF] * must be negative. The first entry field_width[0] should contain * the number of bytes to skip before $1; field_width[1] contains * the number of bytes in $1. Note that these values are specified - * in bytes, not (potentially multi-byte) characters! + * in bytes, not (potentially multi-byte) characters! And note that this + * array will not be copied by gawk; it must persist at least until the + * next call to get_record or close_func. Note that field_width will + * be NULL when getline is assigning the results to a variable, thus + * field parsing is not needed. */ - const int *field_width; + int (*get_record)(char **out, struct awk_input *iobuf, int *errcode, + char **rt_start, size_t *rt_len, + const int **field_width); /* * No argument prototype on read_func to allow for older systems -- cgit v1.2.3 From 39c46265139aa8faf87160b30710876bde4c6ba9 Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Thu, 9 Mar 2017 20:44:09 -0500 Subject: For API input field parsing, use an array of structs instead of an array of integers. --- gawkapi.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'gawkapi.h') diff --git a/gawkapi.h b/gawkapi.h index 6fa022ca..e744a0fc 100644 --- a/gawkapi.h +++ b/gawkapi.h @@ -117,6 +117,19 @@ typedef enum awk_bool { awk_true } awk_bool_t; /* we don't use on purpose */ +/* + * If the input parser would like to specify the field positions in the input + * record, it may populate an array of awk_input_field_info_t structures + * to indicate the location of each field. The 0th array element contains + * the information about field $1, and the NFth element should set skip + * to a negative value. For both skip and len, the value should be in + * bytes, not (potentially multi-byte) characters. + */ +typedef struct { + int skip; /* # of bytes to skip before field starts */ + size_t len; /* # of bytes in field */ +} awk_input_field_info_t; + /* The information about input files that input parsers need to know: */ typedef struct awk_input { const char *name; /* filename */ @@ -149,12 +162,10 @@ typedef struct awk_input { * * If field_width is non-NULL, then its value will be initialized * to NULL, and the function may set it to point to an array of - * integers supplying field width information to override the default + * structures supplying field width information to override the default * gawk field parsing mechanism. The field_width array should have - * at least 2*NF+1 elements, and the value of field_width[2*NF] - * must be negative. The first entry field_width[0] should contain - * the number of bytes to skip before $1; field_width[1] contains - * the number of bytes in $1. Note that these values are specified + * at least NF+1 elements, and the value of field_width[NF].skip + * must be negative. Note that these values are specified * in bytes, not (potentially multi-byte) characters! And note that this * array will not be copied by gawk; it must persist at least until the * next call to get_record or close_func. Note that field_width will @@ -163,7 +174,7 @@ typedef struct awk_input { */ int (*get_record)(char **out, struct awk_input *iobuf, int *errcode, char **rt_start, size_t *rt_len, - const int **field_width); + const awk_input_field_info_t **field_width); /* * No argument prototype on read_func to allow for older systems -- cgit v1.2.3 From d1bebd3cbf60fa25883271512cf63e0c3275e3ef Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Tue, 21 Mar 2017 13:22:18 -0400 Subject: Enhance FIELDWIDTHS syntax to support a skip prefix, and unify logic with API field parsing. --- gawkapi.h | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'gawkapi.h') diff --git a/gawkapi.h b/gawkapi.h index e744a0fc..1ea067f4 100644 --- a/gawkapi.h +++ b/gawkapi.h @@ -119,16 +119,27 @@ typedef enum awk_bool { /* * If the input parser would like to specify the field positions in the input - * record, it may populate an array of awk_input_field_info_t structures - * to indicate the location of each field. The 0th array element contains - * the information about field $1, and the NFth element should set skip - * to a negative value. For both skip and len, the value should be in - * bytes, not (potentially multi-byte) characters. + * record, it may populate an awk_fieldwidth_info_t structure to indicate + * the location of each field. The use_chars boolean controls whether the + * field lengths are specified in terms of bytes or potentially multi-byte + * characters. Performance will be better if the values are supplied in + * terms of bytes. The fields[0].skip value indicates how many bytes (or + * characters to skip) before $1, and fields[0].len is the length of $1, etc. */ typedef struct { - int skip; /* # of bytes to skip before field starts */ - size_t len; /* # of bytes in field */ -} awk_input_field_info_t; + awk_bool_t use_chars; /* false ==> use bytes */ + size_t nf; + struct awk_field_info { + size_t skip; /* # to skip before field starts */ + size_t len; /* length of field */ + } fields[1]; /* actual dimension should be nf */ +} awk_fieldwidth_info_t; +/* + * This macro calculates the total struct size needed. This is useful when + * calling malloc or realloc. + */ +#define awk_fieldwidth_info_size(NF) (sizeof(awk_fieldwidth_info_t) + \ + (((NF)-1) * sizeof(struct awk_field_info))) /* The information about input files that input parsers need to know: */ typedef struct awk_input { @@ -174,7 +185,7 @@ typedef struct awk_input { */ int (*get_record)(char **out, struct awk_input *iobuf, int *errcode, char **rt_start, size_t *rt_len, - const awk_input_field_info_t **field_width); + const awk_fieldwidth_info_t **field_width); /* * No argument prototype on read_func to allow for older systems -- cgit v1.2.3 From 1a417d68f71b18525c572b903086f451ed75902b Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Wed, 22 Mar 2017 08:04:39 -0400 Subject: Fix gawkapi.h comment describing the new get_record field_width argument. --- gawkapi.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'gawkapi.h') diff --git a/gawkapi.h b/gawkapi.h index 1ea067f4..da538115 100644 --- a/gawkapi.h +++ b/gawkapi.h @@ -171,15 +171,12 @@ typedef struct awk_input { * on the value of *errcode (e.g., setting *errcode = errno should do * the right thing). * - * If field_width is non-NULL, then its value will be initialized - * to NULL, and the function may set it to point to an array of - * structures supplying field width information to override the default - * gawk field parsing mechanism. The field_width array should have - * at least NF+1 elements, and the value of field_width[NF].skip - * must be negative. Note that these values are specified - * in bytes, not (potentially multi-byte) characters! And note that this - * array will not be copied by gawk; it must persist at least until the - * next call to get_record or close_func. Note that field_width will + * If field_width is non-NULL, then *field_width will be initialized + * to NULL, and the function may set it to point to a structure + * supplying field width information to override the default + * gawk field parsing mechanism. Note that this structure will not + * be copied by gawk; it must persist at least until the next call + * to get_record or close_func. Note also that field_width will * be NULL when getline is assigning the results to a variable, thus * field parsing is not needed. */ -- cgit v1.2.3 From 61b4108f82f30deaabf03eb6dbc0e64edeffdb6e Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Mon, 27 Mar 2017 21:27:50 +0300 Subject: Minor edits in feature/api-parser prepatory to merging. --- gawkapi.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'gawkapi.h') diff --git a/gawkapi.h b/gawkapi.h index da538115..a8d6279f 100644 --- a/gawkapi.h +++ b/gawkapi.h @@ -124,16 +124,18 @@ typedef enum awk_bool { * field lengths are specified in terms of bytes or potentially multi-byte * characters. Performance will be better if the values are supplied in * terms of bytes. The fields[0].skip value indicates how many bytes (or - * characters to skip) before $1, and fields[0].len is the length of $1, etc. + * characters) to skip before $1, and fields[0].len is the length of $1, etc. */ + typedef struct { awk_bool_t use_chars; /* false ==> use bytes */ size_t nf; struct awk_field_info { - size_t skip; /* # to skip before field starts */ + size_t skip; /* amount to skip before field starts */ size_t len; /* length of field */ - } fields[1]; /* actual dimension should be nf */ + } fields[1]; /* actual dimension should be nf */ } awk_fieldwidth_info_t; + /* * This macro calculates the total struct size needed. This is useful when * calling malloc or realloc. -- cgit v1.2.3 From 215618921d2515040bd02fecc1a3438cd4949a5b Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Sun, 9 Apr 2017 18:53:50 -0400 Subject: Fix comment in fw_parse_field and white space in gawkapi.h. --- gawkapi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gawkapi.h') diff --git a/gawkapi.h b/gawkapi.h index a8d6279f..484ab27e 100644 --- a/gawkapi.h +++ b/gawkapi.h @@ -132,7 +132,7 @@ typedef struct { size_t nf; struct awk_field_info { size_t skip; /* amount to skip before field starts */ - size_t len; /* length of field */ + size_t len; /* length of field */ } fields[1]; /* actual dimension should be nf */ } awk_fieldwidth_info_t; -- cgit v1.2.3