From 9e419cfbc401e9b9fd45c8e854fdf5ae799261d5 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Tue, 11 Jul 2017 08:17:14 +0300 Subject: Some cleanup about checking letters and identifiers. --- ChangeLog | 11 +++++++++++ awk.h | 1 + awkgram.c | 14 +++++++++++++- awkgram.y | 14 +++++++++++++- command.c | 2 +- command.y | 4 ++-- ext.c | 43 +++++++++++++++++++++---------------------- main.c | 2 +- 8 files changed, 63 insertions(+), 28 deletions(-) diff --git a/ChangeLog b/ChangeLog index b4f65ac9..494964b0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2017-07-11 Arnold D. Robbins + + * awk.h (is_letter): Add declaration. + * ext.c (is_valid_identifier): New function. + (make_builtin): Use is_valid_identifier instead of inline code. + (is_letter): Moved from here ... + * awkgram.y (is_letter): ... to here. + (yylex): Use is_letter instead of a test. + * command.y (yylex): Ditto. + * main.c (arg_assign): Ditto. + 2017-07-07 Arnold D. Robbins Remove warnings from GCC 7.1 compilation. diff --git a/awk.h b/awk.h index 3edfe5bd..f0fd7478 100644 --- a/awk.h +++ b/awk.h @@ -1418,6 +1418,7 @@ extern builtin_func_t lookup_builtin(const char *name); extern void install_builtins(void); extern bool is_alpha(int c); extern bool is_alnum(int c); +extern bool is_letter(int c); extern bool is_identchar(int c); extern NODE *make_regnode(int type, NODE *exp); /* builtin.c */ diff --git a/awkgram.c b/awkgram.c index 82ac3589..db295267 100644 --- a/awkgram.c +++ b/awkgram.c @@ -6506,7 +6506,7 @@ retry: } } - if (c != '_' && ! is_alpha(c)) { + if (! is_letter(c)) { yyerror(_("invalid char '%c' in expression"), c); return lasttok = LEX_EOF; } @@ -8728,6 +8728,18 @@ is_alnum(int c) } +/* + * is_letter --- function to check letters + * isalpha() isn't good enough since it can look at the locale. + * Underscore counts as a letter in awk identifiers + */ + +bool +is_letter(int c) +{ + return (is_alpha(c) || c == '_'); +} + /* is_identchar --- return true if c can be in an identifier */ bool diff --git a/awkgram.y b/awkgram.y index d06faf8d..ddcc9d72 100644 --- a/awkgram.y +++ b/awkgram.y @@ -4086,7 +4086,7 @@ retry: } } - if (c != '_' && ! is_alpha(c)) { + if (! is_letter(c)) { yyerror(_("invalid char '%c' in expression"), c); return lasttok = LEX_EOF; } @@ -6308,6 +6308,18 @@ is_alnum(int c) } +/* + * is_letter --- function to check letters + * isalpha() isn't good enough since it can look at the locale. + * Underscore counts as a letter in awk identifiers + */ + +bool +is_letter(int c) +{ + return (is_alpha(c) || c == '_'); +} + /* is_identchar --- return true if c can be in an identifier */ bool diff --git a/command.c b/command.c index 1d804c75..31f356da 100644 --- a/command.c +++ b/command.c @@ -3038,7 +3038,7 @@ err: || c == ',' || c == '=') return *lexptr++; - if (c != '_' && ! is_alpha(c)) { + if (! is_letter(c)) { yyerror(_("invalid character")); return '\n'; } diff --git a/command.y b/command.y index 65d21853..0080c9b2 100644 --- a/command.y +++ b/command.y @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 2004, 2010, 2011, 2014, 2016 + * Copyright (C) 2004, 2010, 2011, 2014, 2016, 2017 * the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the @@ -1288,7 +1288,7 @@ err: || c == ',' || c == '=') return *lexptr++; - if (c != '_' && ! is_alpha(c)) { + if (! is_letter(c)) { yyerror(_("invalid character")); return '\n'; } diff --git a/ext.c b/ext.c index 609b3b2b..a2225d14 100644 --- a/ext.c +++ b/ext.c @@ -35,18 +35,6 @@ extern SRCFILE *srcfiles; #include -/* - * is_letter --- function to check letters - * isalpha() isn't good enough since it can look at the locale. - * Underscore counts as a letter in awk identifiers - */ - -static bool -is_letter(unsigned char c) -{ - return (is_alpha(c) || c == '_'); -} - #define INIT_FUNC "dl_load" /* load_ext --- load an external library */ @@ -89,6 +77,25 @@ load_ext(const char *lib_name) lib_name, INIT_FUNC); } +/* is_valid_identifier --- return true if name is a valid simple identifier */ + +static bool +is_valid_identifier(const char *name) +{ + const char *sp = name; + int c; + + if (! is_letter(*sp)) + return false; + + for (sp++; (c = *sp++) != '\0';) { + if (! is_identchar(c)) + return false; + } + + return true; +} + /* make_builtin --- register name to be called as func with a builtin body */ awk_bool_t @@ -96,23 +103,15 @@ make_builtin(const awk_ext_func_t *funcinfo) { NODE *symbol, *f; INSTRUCTION *b; - const char *sp; - char c; const char *name = funcinfo->name; int count = funcinfo->max_expected_args; - sp = name; - if (sp == NULL || *sp == '\0') + if (name == NULL || *name == '\0') fatal(_("make_builtin: missing function name")); - if (! is_letter(*sp)) + if (! is_valid_identifier(name)) return awk_false; - for (sp++; (c = *sp++) != '\0';) { - if (! is_identchar(c)) - return awk_false; - } - f = lookup(name); if (f != NULL) { diff --git a/main.c b/main.c index 195684c4..e5e71853 100644 --- a/main.c +++ b/main.c @@ -1125,7 +1125,7 @@ arg_assign(char *arg, bool initing) /* first check that the variable name has valid syntax */ badvar = false; - if (! is_alpha((unsigned char) arg[0]) && arg[0] != '_') + if (! is_letter((unsigned char) arg[0])) badvar = true; else for (cp2 = arg+1; *cp2; cp2++) -- cgit v1.2.3