From 62fe07b69e522c909aad303b31443cc3c9bdf6c0 Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Sun, 5 Mar 2017 17:05:36 -0500 Subject: Enable an API input parser to supply an array of field widths to override the default gawk field parsing mechanism. --- field.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 101 insertions(+), 15 deletions(-) (limited to 'field.c') diff --git a/field.c b/field.c index 0799fb1b..4a5884a9 100644 --- a/field.c +++ b/field.c @@ -40,6 +40,8 @@ typedef void (* Setfunc)(long, char *, long, NODE *); static long (*parse_field)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); +static long (*save_parse_field)(long, char **, int, NODE *, + Regexp *, Setfunc, NODE *, NODE *, bool); static long re_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); static long def_parse_field(long, char **, int, NODE *, @@ -50,6 +52,9 @@ static long sc_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); static long fw_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); +static long api_parse_field(long, char **, int, NODE *, + Regexp *, Setfunc, NODE *, NODE *, bool); +static const int *api_fw = NULL; static long fpat_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); static void set_element(long num, char * str, long len, NODE *arr); @@ -252,7 +257,7 @@ rebuild_record() * but better correct than fast. */ void -set_record(const char *buf, int cnt) +set_record(const char *buf, int cnt, const int *fw) { NODE *n; static char *databuf; @@ -306,6 +311,20 @@ set_record(const char *buf, int cnt) n->stfmt = STFMT_UNUSED; n->flags = (STRING|STRCUR|USER_INPUT); /* do not set MALLOC */ fields_arr[0] = n; + if (fw != api_fw) { + if ((api_fw = fw) != NULL) { + if (parse_field != api_parse_field) { + parse_field = api_parse_field; + update_PROCINFO_str("FS", "API"); + } + } + else { + if (parse_field != save_parse_field) { + parse_field = save_parse_field; + update_PROCINFO_str("FS", current_field_sep_str()); + } + } + } #undef INITIAL_SIZE #undef MAX_SIZE @@ -760,6 +779,49 @@ fw_parse_field(long up_to, /* parse only up to this field number */ return nf; } +/* + * api_parse_field --- field parsing using field widths returned by API parser. + * + * This is called from get_field() via (*parse_field)(). + */ +static long +api_parse_field(long up_to, /* parse only up to this field number */ + char **buf, /* on input: string to parse; on output: point to start next */ + int len, + NODE *fs ATTRIBUTE_UNUSED, + Regexp *rp ATTRIBUTE_UNUSED, + Setfunc set, /* routine to set the value of the parsed field */ + NODE *n, + NODE *dummy ATTRIBUTE_UNUSED, /* sep_arr not needed here: hence dummy */ + bool in_middle ATTRIBUTE_UNUSED) +{ + char *scan = *buf; + long nf = parse_high_water; + char *end = scan + len; + int skiplen; + + if (up_to == UNLIMITED) + nf = 0; + if (len == 0) + return nf; + while (nf < up_to) { + if (((skiplen = api_fw[2*nf]) < 0) || + ((len = api_fw[2*nf+1]) < 0)) { + *buf = end; + return nf; + } + if (skiplen > end - scan) + skiplen = end - scan; + scan += skiplen; + if (len > end - scan) + len = end - scan; + (*set)(++nf, scan, (long) len, n); + scan += len; + } + *buf = scan; + return nf; +} + /* invalidate_field0 --- $0 needs reconstruction */ void @@ -845,7 +907,7 @@ get_field(long requested, Func_ptr *assign) if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen) NF = parse_high_water; else if (parse_field == fpat_parse_field) { - /* FPAT parsing is wierd, isolate the special cases */ + /* FPAT parsing is weird, isolate the special cases */ char *rec_start = fields_arr[0]->stptr; char *rec_end = fields_arr[0]->stptr + fields_arr[0]->stlen; @@ -1057,6 +1119,18 @@ do_patsplit(int nargs) return tmp; } +/* set_parser: update the current (non-API) parser */ + +static void +set_parser(long (*func)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool)) +{ + save_parse_field = func; + if (parse_field != api_parse_field && parse_field != func) { + parse_field = func; + update_PROCINFO_str("FS", current_field_sep_str()); + } +} + /* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */ void @@ -1084,7 +1158,7 @@ set_FIELDWIDTHS() if (fields_arr != NULL) (void) get_field(UNLIMITED - 1, 0); - parse_field = fw_parse_field; + set_parser(fw_parse_field); tmp = force_string(FIELDWIDTHS_node->var_value); scan = tmp->stptr; @@ -1134,7 +1208,6 @@ set_FIELDWIDTHS() } FIELDWIDTHS[i+1] = -1; - update_PROCINFO_str("FS", "FIELDWIDTHS"); if (fatal_error) fatal(_("invalid FIELDWIDTHS value, near `%s'"), scan); @@ -1205,7 +1278,7 @@ choose_fs_function: if (! do_traditional && fs->stlen == 0) { static bool warned = false; - parse_field = null_parse_field; + set_parser(null_parse_field); if (do_lint && ! warned) { warned = true; @@ -1214,10 +1287,10 @@ choose_fs_function: } else if (fs->stlen > 1) { if (do_lint_old) warning(_("old awk does not support regexps as value of `FS'")); - parse_field = re_parse_field; + set_parser(re_parse_field); } else if (RS_is_null) { /* we know that fs->stlen <= 1 */ - parse_field = sc_parse_field; + set_parser(sc_parse_field); if (fs->stlen == 1) { if (fs->stptr[0] == ' ') { default_FS = true; @@ -1233,7 +1306,7 @@ choose_fs_function: } } } else { - parse_field = def_parse_field; + set_parser(def_parse_field); if (fs->stlen == 1) { if (fs->stptr[0] == ' ') @@ -1242,7 +1315,7 @@ choose_fs_function: /* same special case */ strcpy(buf, "[\\\\]"); else - parse_field = sc_parse_field; + set_parser(sc_parse_field); } } if (remake_re) { @@ -1254,7 +1327,7 @@ choose_fs_function: FS_re_yes_case = make_regexp(buf, strlen(buf), false, true, true); FS_re_no_case = make_regexp(buf, strlen(buf), true, true, true); FS_regexp = (IGNORECASE ? FS_re_no_case : FS_re_yes_case); - parse_field = re_parse_field; + set_parser(re_parse_field); } else if (parse_field == re_parse_field) { FS_re_yes_case = make_regexp(fs->stptr, fs->stlen, false, true, true); FS_re_no_case = make_regexp(fs->stptr, fs->stlen, true, true, true); @@ -1270,8 +1343,6 @@ choose_fs_function: */ if (fs->stlen == 1 && parse_field == re_parse_field) FS_regexp = FS_re_yes_case; - - update_PROCINFO_str("FS", "FS"); } /* current_field_sep --- return what field separator is */ @@ -1283,10 +1354,27 @@ current_field_sep() return Using_FIELDWIDTHS; else if (parse_field == fpat_parse_field) return Using_FPAT; + else if (parse_field == api_parse_field) + return Using_API; else return Using_FS; } +/* current_field_sep --- return what field separator is */ + +const char * +current_field_sep_str() +{ + if (parse_field == fw_parse_field) + return "FIELDWIDTHS"; + else if (parse_field == fpat_parse_field) + return "FPAT"; + else if (parse_field == api_parse_field) + return "API"; + else + return "FS"; +} + /* update_PROCINFO_str --- update PROCINFO[sub] with string value */ void @@ -1373,7 +1461,7 @@ set_FPAT() set_fpat_function: fpat = force_string(FPAT_node->var_value); - parse_field = fpat_parse_field; + set_parser(fpat_parse_field); if (remake_re) { refree(FPAT_re_yes_case); @@ -1384,8 +1472,6 @@ set_fpat_function: FPAT_re_no_case = make_regexp(fpat->stptr, fpat->stlen, true, true, true); FPAT_regexp = (IGNORECASE ? FPAT_re_no_case : FPAT_re_yes_case); } - - update_PROCINFO_str("FS", "FPAT"); } /* -- cgit v1.2.3 From 54efcb6f4ce81ed0ad2f90e27252f1d8532dd0b5 Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Mon, 6 Mar 2017 14:16:51 -0500 Subject: Rename variable in field.c and added a comment for improved clarity. --- field.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'field.c') diff --git a/field.c b/field.c index 4a5884a9..276669f8 100644 --- a/field.c +++ b/field.c @@ -40,7 +40,12 @@ typedef void (* Setfunc)(long, char *, long, NODE *); static long (*parse_field)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); -static long (*save_parse_field)(long, char **, int, NODE *, +/* + * N.B. The normal_parse_field function pointer contains the parse_field value + * that should be used except when API field parsing is overriding the default + * field parsing mechanism. + */ +static long (*normal_parse_field)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); static long re_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); @@ -319,8 +324,8 @@ set_record(const char *buf, int cnt, const int *fw) } } else { - if (parse_field != save_parse_field) { - parse_field = save_parse_field; + if (parse_field != normal_parse_field) { + parse_field = normal_parse_field; update_PROCINFO_str("FS", current_field_sep_str()); } } @@ -1124,7 +1129,7 @@ do_patsplit(int nargs) static void set_parser(long (*func)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool)) { - save_parse_field = func; + normal_parse_field = func; if (parse_field != api_parse_field && parse_field != func) { parse_field = func; update_PROCINFO_str("FS", current_field_sep_str()); -- cgit v1.2.3 From feb12baf11e39f60e57b988d29aa96bda4dddcff Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Thu, 9 Mar 2017 22:09:09 +0200 Subject: Minor style edits in field.c. --- field.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'field.c') diff --git a/field.c b/field.c index 276669f8..5bdc05f7 100644 --- a/field.c +++ b/field.c @@ -322,12 +322,9 @@ set_record(const char *buf, int cnt, const int *fw) parse_field = api_parse_field; update_PROCINFO_str("FS", "API"); } - } - else { - if (parse_field != normal_parse_field) { - parse_field = normal_parse_field; - update_PROCINFO_str("FS", current_field_sep_str()); - } + } else if (parse_field != normal_parse_field) { + parse_field = normal_parse_field; + update_PROCINFO_str("FS", current_field_sep_str()); } } @@ -789,6 +786,7 @@ fw_parse_field(long up_to, /* parse only up to this field number */ * * This is called from get_field() via (*parse_field)(). */ + static long api_parse_field(long up_to, /* parse only up to this field number */ char **buf, /* on input: string to parse; on output: point to start next */ @@ -1350,7 +1348,7 @@ choose_fs_function: FS_regexp = FS_re_yes_case; } -/* current_field_sep --- return what field separator is */ +/* current_field_sep --- return the field separator type */ field_sep_type current_field_sep() @@ -1365,7 +1363,7 @@ current_field_sep() return Using_FS; } -/* current_field_sep --- return what field separator is */ +/* current_field_sep_str --- return the field separator type as a string */ const char * current_field_sep_str() -- cgit v1.2.3 From 39c46265139aa8faf87160b30710876bde4c6ba9 Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Thu, 9 Mar 2017 20:44:09 -0500 Subject: For API input field parsing, use an array of structs instead of an array of integers. --- field.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'field.c') diff --git a/field.c b/field.c index 5bdc05f7..5ef4d74b 100644 --- a/field.c +++ b/field.c @@ -59,7 +59,7 @@ static long fw_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); static long api_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); -static const int *api_fw = NULL; +static const awk_input_field_info_t *api_fw = NULL; static long fpat_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); static void set_element(long num, char * str, long len, NODE *arr); @@ -262,7 +262,7 @@ rebuild_record() * but better correct than fast. */ void -set_record(const char *buf, int cnt, const int *fw) +set_record(const char *buf, int cnt, const awk_input_field_info_t *fw) { NODE *n; static char *databuf; @@ -802,24 +802,25 @@ api_parse_field(long up_to, /* parse only up to this field number */ long nf = parse_high_water; char *end = scan + len; int skiplen; + size_t flen; if (up_to == UNLIMITED) nf = 0; if (len == 0) return nf; while (nf < up_to) { - if (((skiplen = api_fw[2*nf]) < 0) || - ((len = api_fw[2*nf+1]) < 0)) { + if ((skiplen = api_fw[nf].skip) < 0) { *buf = end; return nf; } if (skiplen > end - scan) skiplen = end - scan; scan += skiplen; - if (len > end - scan) - len = end - scan; - (*set)(++nf, scan, (long) len, n); - scan += len; + flen = api_fw[nf].len; + if (flen > end - scan) + flen = end - scan; + (*set)(++nf, scan, (long) flen, n); + scan += flen; } *buf = scan; return nf; -- cgit v1.2.3 From d1bebd3cbf60fa25883271512cf63e0c3275e3ef Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Tue, 21 Mar 2017 13:22:18 -0400 Subject: Enhance FIELDWIDTHS syntax to support a skip prefix, and unify logic with API field parsing. --- field.c | 196 ++++++++++++++++++++++++++++++---------------------------------- 1 file changed, 92 insertions(+), 104 deletions(-) (limited to 'field.c') diff --git a/field.c b/field.c index 5ef4d74b..bd333c92 100644 --- a/field.c +++ b/field.c @@ -38,6 +38,8 @@ is_blank(int c) typedef void (* Setfunc)(long, char *, long, NODE *); +/* is the API currently overriding the default parsing mechanism? */ +static bool api_parser_override = false; static long (*parse_field)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); /* @@ -57,9 +59,7 @@ static long sc_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); static long fw_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); -static long api_parse_field(long, char **, int, NODE *, - Regexp *, Setfunc, NODE *, NODE *, bool); -static const awk_input_field_info_t *api_fw = NULL; +static const awk_fieldwidth_info_t *api_fw = NULL; static long fpat_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); static void set_element(long num, char * str, long len, NODE *arr); @@ -74,7 +74,7 @@ static bool resave_fs; static NODE *save_FS; /* save current value of FS when line is read, * to be used in deferred parsing */ -static int *FIELDWIDTHS = NULL; +static awk_fieldwidth_info_t *FIELDWIDTHS = NULL; NODE **fields_arr; /* array of pointers to the field nodes */ bool field0_valid; /* $(>0) has not been changed yet */ @@ -262,7 +262,7 @@ rebuild_record() * but better correct than fast. */ void -set_record(const char *buf, int cnt, const awk_input_field_info_t *fw) +set_record(const char *buf, int cnt, const awk_fieldwidth_info_t *fw) { NODE *n; static char *databuf; @@ -318,11 +318,13 @@ set_record(const char *buf, int cnt, const awk_input_field_info_t *fw) fields_arr[0] = n; if (fw != api_fw) { if ((api_fw = fw) != NULL) { - if (parse_field != api_parse_field) { - parse_field = api_parse_field; + if (! api_parser_override) { + api_parser_override = true; + parse_field = fw_parse_field; update_PROCINFO_str("FS", "API"); } - } else if (parse_field != normal_parse_field) { + } else if (api_parser_override) { + api_parser_override = false; parse_field = normal_parse_field; update_PROCINFO_str("FS", current_field_sep_str()); } @@ -712,83 +714,38 @@ sc_parse_field(long up_to, /* parse only up to this field number */ } /* - * fw_parse_field --- field parsing using FIELDWIDTHS spec - * - * This is called from get_field() via (*parse_field)(). - * This variation is for fields are fixed widths. + * calc_mbslen --- calculate the length in bytes of a multi-byte string + * containing len characters. */ -static long -fw_parse_field(long up_to, /* parse only up to this field number */ - char **buf, /* on input: string to parse; on output: point to start next */ - int len, - NODE *fs ATTRIBUTE_UNUSED, - Regexp *rp ATTRIBUTE_UNUSED, - Setfunc set, /* routine to set the value of the parsed field */ - NODE *n, - NODE *dummy ATTRIBUTE_UNUSED, /* sep_arr not needed here: hence dummy */ - bool in_middle ATTRIBUTE_UNUSED) + +static size_t +calc_mbslen(char *scan, char *end, size_t len, mbstate_t *mbs) { - char *scan = *buf; - long nf = parse_high_water; - char *end = scan + len; - int nmbc; - size_t mbclen; - size_t mbslen; - size_t lenrest; - char *mbscan; - mbstate_t mbs; - memset(&mbs, 0, sizeof(mbstate_t)); + size_t mbclen; + char *mbscan = scan; - if (up_to == UNLIMITED) - nf = 0; - if (len == 0) - return nf; - for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) { - if (gawk_mb_cur_max > 1) { - nmbc = 0; - mbslen = 0; - mbscan = scan; - lenrest = end - scan; - while (nmbc < len && mbslen < lenrest) { - mbclen = mbrlen(mbscan, end - mbscan, &mbs); - if ( mbclen == 1 - || mbclen == (size_t) -1 - || mbclen == (size_t) -2 - || mbclen == 0) { - /* We treat it as a singlebyte character. */ - mbclen = 1; - } - if (mbclen <= end - mbscan) { - mbscan += mbclen; - mbslen += mbclen; - ++nmbc; - } - } - (*set)(++nf, scan, (long) mbslen, n); - scan += mbslen; - } else { - if (len > end - scan) - len = end - scan; - (*set)(++nf, scan, (long) len, n); - scan += len; - } + while (len-- > 0 && mbscan < end) { + mbclen = mbrlen(mbscan, end - mbscan, mbs); + if (!(mbclen > 0 && mbclen <= (size_t)(end - mbscan))) + /* + * We treat it as a singlebyte character. This should + * catch error codes 0, (size_t) -1, and (size_t) -2. + */ + mbclen = 1; + mbscan += mbclen; } - if (len == -1) - *buf = end; - else - *buf = scan; - return nf; + return mbscan - scan; } /* - * api_parse_field --- field parsing using field widths returned by API parser. + * fw_parse_field --- field parsing using FIELDWIDTHS spec * * This is called from get_field() via (*parse_field)(). + * This variation is for fields are fixed widths. */ - static long -api_parse_field(long up_to, /* parse only up to this field number */ +fw_parse_field(long up_to, /* parse only up to this field number */ char **buf, /* on input: string to parse; on output: point to start next */ int len, NODE *fs ATTRIBUTE_UNUSED, @@ -801,26 +758,50 @@ api_parse_field(long up_to, /* parse only up to this field number */ char *scan = *buf; long nf = parse_high_water; char *end = scan + len; - int skiplen; + const awk_fieldwidth_info_t *fw; + mbstate_t mbs; + size_t skiplen; size_t flen; + fw = (api_parser_override ? api_fw : FIELDWIDTHS); + if (up_to == UNLIMITED) nf = 0; if (len == 0) return nf; - while (nf < up_to) { - if ((skiplen = api_fw[nf].skip) < 0) { - *buf = end; - return nf; + if (gawk_mb_cur_max > 1 && fw->use_chars) { + /* + * XXX This may be a bug. Most likely, shift state should + * persist across all fields in a record, if not across record + * boundaries as well. + */ + memset(&mbs, 0, sizeof(mbstate_t)); + while (nf < up_to) { + if (nf >= fw->nf) { + *buf = end; + return nf; + } + scan += calc_mbslen(scan, end, fw->fields[nf].skip, &mbs); + flen = calc_mbslen(scan, end, fw->fields[nf].len, &mbs); + (*set)(++nf, scan, (long) flen, n); + scan += flen; + } + } else { + while (nf < up_to) { + if (nf >= fw->nf) { + *buf = end; + return nf; + } + skiplen = fw->fields[nf].skip; + if (skiplen > end - scan) + skiplen = end - scan; + scan += skiplen; + flen = fw->fields[nf].len; + if (flen > end - scan) + flen = end - scan; + (*set)(++nf, scan, (long) flen, n); + scan += flen; } - if (skiplen > end - scan) - skiplen = end - scan; - scan += skiplen; - flen = api_fw[nf].len; - if (flen > end - scan) - flen = end - scan; - (*set)(++nf, scan, (long) flen, n); - scan += flen; } *buf = scan; return nf; @@ -1129,7 +1110,7 @@ static void set_parser(long (*func)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool)) { normal_parse_field = func; - if (parse_field != api_parse_field && parse_field != func) { + if (! api_parser_override && parse_field != func) { parse_field = func; update_PROCINFO_str("FS", current_field_sep_str()); } @@ -1156,7 +1137,7 @@ set_FIELDWIDTHS() return; /* - * If changing the way fields are split, obey least-suprise + * If changing the way fields are split, obey least-surprise * semantics, and force $0 to be split totally. */ if (fields_arr != NULL) @@ -1166,17 +1147,17 @@ set_FIELDWIDTHS() tmp = force_string(FIELDWIDTHS_node->var_value); scan = tmp->stptr; - if (FIELDWIDTHS == NULL) - emalloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS"); - FIELDWIDTHS[0] = 0; - for (i = 1; ; i++) { + if (FIELDWIDTHS == NULL) { + emalloc(FIELDWIDTHS, awk_fieldwidth_info_t *, awk_fieldwidth_info_size(fw_alloc), "set_FIELDWIDTHS"); + FIELDWIDTHS->use_chars = awk_true; + } + FIELDWIDTHS->nf = 0; + for (i = 0; ; i++) { unsigned long int tmp; - if (i + 1 >= fw_alloc) { + if (i >= fw_alloc) { fw_alloc *= 2; - erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS"); + erealloc(FIELDWIDTHS, awk_fieldwidth_info_t *, awk_fieldwidth_info_size(fw_alloc), "set_FIELDWIDTHS"); } - /* Initialize value to be end of list */ - FIELDWIDTHS[i] = -1; /* Ensure that there is no leading `-' sign. Otherwise, strtoul would accept it and return a bogus result. */ while (is_blank(*scan)) { @@ -1194,6 +1175,13 @@ set_FIELDWIDTHS() or a value that is not in the range [1..INT_MAX]. */ errno = 0; tmp = strtoul(scan, &end, 10); + if (errno == 0 && *end == ':' && (0 < tmp && tmp <= INT_MAX)) { + FIELDWIDTHS->fields[i].skip = tmp; + scan = end + 1; + tmp = strtoul(scan, &end, 10); + } + else + FIELDWIDTHS->fields[i].skip = 0; if (errno != 0 || (*end != '\0' && ! is_blank(*end)) || !(0 < tmp && tmp <= INT_MAX) @@ -1201,7 +1189,8 @@ set_FIELDWIDTHS() fatal_error = true; break; } - FIELDWIDTHS[i] = tmp; + FIELDWIDTHS->fields[i].len = tmp; + FIELDWIDTHS->nf = i+1; scan = end; /* Skip past any trailing blanks. */ while (is_blank(*scan)) { @@ -1210,7 +1199,6 @@ set_FIELDWIDTHS() if (*scan == '\0') break; } - FIELDWIDTHS[i+1] = -1; if (fatal_error) fatal(_("invalid FIELDWIDTHS value, near `%s'"), @@ -1354,12 +1342,12 @@ choose_fs_function: field_sep_type current_field_sep() { - if (parse_field == fw_parse_field) + if (api_parser_override) + return Using_API; + else if (parse_field == fw_parse_field) return Using_FIELDWIDTHS; else if (parse_field == fpat_parse_field) return Using_FPAT; - else if (parse_field == api_parse_field) - return Using_API; else return Using_FS; } @@ -1369,12 +1357,12 @@ current_field_sep() const char * current_field_sep_str() { - if (parse_field == fw_parse_field) + if (api_parser_override) + return "API"; + else if (parse_field == fw_parse_field) return "FIELDWIDTHS"; else if (parse_field == fpat_parse_field) return "FPAT"; - else if (parse_field == api_parse_field) - return "API"; else return "FS"; } -- cgit v1.2.3 From 61b4108f82f30deaabf03eb6dbc0e64edeffdb6e Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Mon, 27 Mar 2017 21:27:50 +0300 Subject: Minor edits in feature/api-parser prepatory to merging. --- field.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'field.c') diff --git a/field.c b/field.c index bd333c92..b5f28c17 100644 --- a/field.c +++ b/field.c @@ -40,15 +40,15 @@ typedef void (* Setfunc)(long, char *, long, NODE *); /* is the API currently overriding the default parsing mechanism? */ static bool api_parser_override = false; -static long (*parse_field)(long, char **, int, NODE *, +typedef long (*parse_field_func_t)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); +static parse_field_func_t parse_field; /* * N.B. The normal_parse_field function pointer contains the parse_field value * that should be used except when API field parsing is overriding the default * field parsing mechanism. */ -static long (*normal_parse_field)(long, char **, int, NODE *, - Regexp *, Setfunc, NODE *, NODE *, bool); +static parse_field_func_t normal_parse_field; static long re_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); static long def_parse_field(long, char **, int, NODE *, @@ -771,9 +771,10 @@ fw_parse_field(long up_to, /* parse only up to this field number */ return nf; if (gawk_mb_cur_max > 1 && fw->use_chars) { /* - * XXX This may be a bug. Most likely, shift state should - * persist across all fields in a record, if not across record - * boundaries as well. + * Reset the shift state for each field, since there might + * be who-knows-what kind of stuff in between fields, + * and we assume each field starts with a valid (possibly + * multibyte) character. */ memset(&mbs, 0, sizeof(mbstate_t)); while (nf < up_to) { @@ -1104,10 +1105,10 @@ do_patsplit(int nargs) return tmp; } -/* set_parser: update the current (non-API) parser */ +/* set_parser --- update the current (non-API) parser */ static void -set_parser(long (*func)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool)) +set_parser(parse_field_func_t func) { normal_parse_field = func; if (! api_parser_override && parse_field != func) { @@ -1149,7 +1150,7 @@ set_FIELDWIDTHS() if (FIELDWIDTHS == NULL) { emalloc(FIELDWIDTHS, awk_fieldwidth_info_t *, awk_fieldwidth_info_size(fw_alloc), "set_FIELDWIDTHS"); - FIELDWIDTHS->use_chars = awk_true; + FIELDWIDTHS->use_chars = true; } FIELDWIDTHS->nf = 0; for (i = 0; ; i++) { @@ -1201,8 +1202,8 @@ set_FIELDWIDTHS() } if (fatal_error) - fatal(_("invalid FIELDWIDTHS value, near `%s'"), - scan); + fatal(_("invalid FIELDWIDTHS value, for field %d, near `%s'"), + i, scan); } /* set_FS --- handle things when FS is assigned to */ -- cgit v1.2.3 From 215618921d2515040bd02fecc1a3438cd4949a5b Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Sun, 9 Apr 2017 18:53:50 -0400 Subject: Fix comment in fw_parse_field and white space in gawkapi.h. --- field.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'field.c') diff --git a/field.c b/field.c index b5f28c17..38105745 100644 --- a/field.c +++ b/field.c @@ -771,10 +771,10 @@ fw_parse_field(long up_to, /* parse only up to this field number */ return nf; if (gawk_mb_cur_max > 1 && fw->use_chars) { /* - * Reset the shift state for each field, since there might - * be who-knows-what kind of stuff in between fields, - * and we assume each field starts with a valid (possibly - * multibyte) character. + * Reset the shift state. Arguably, the shift state should + * be part of the file state and carried forward at all times, + * but nobody has complained so far, so this may not matter + * in practice. */ memset(&mbs, 0, sizeof(mbstate_t)); while (nf < up_to) { -- cgit v1.2.3 From 627836fc67d52e54f004dc4f7ad7ec0f609840f6 Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Mon, 10 Apr 2017 12:13:08 -0400 Subject: Trivial change to field.c set_FIELDWIDTHS for type consistency. --- field.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'field.c') diff --git a/field.c b/field.c index 38105745..a3be9773 100644 --- a/field.c +++ b/field.c @@ -1150,7 +1150,7 @@ set_FIELDWIDTHS() if (FIELDWIDTHS == NULL) { emalloc(FIELDWIDTHS, awk_fieldwidth_info_t *, awk_fieldwidth_info_size(fw_alloc), "set_FIELDWIDTHS"); - FIELDWIDTHS->use_chars = true; + FIELDWIDTHS->use_chars = awk_true; } FIELDWIDTHS->nf = 0; for (i = 0; ; i++) { -- cgit v1.2.3