diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2013-06-03 20:58:05 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2013-06-03 20:58:05 +0300 |
commit | 4c23f750aabed696da10529c8cd6a7af24b185f1 (patch) | |
tree | a328f3b842081f46ca0764b2e51591602de36407 /helpers | |
parent | 10216cc37ad6dd9086aeacca813d3551b7c209ef (diff) | |
download | egawk-4c23f750aabed696da10529c8cd6a7af24b185f1.tar.gz egawk-4c23f750aabed696da10529c8cd6a7af24b185f1.tar.bz2 egawk-4c23f750aabed696da10529c8cd6a7af24b185f1.zip |
New helper: testdfa.c.
Diffstat (limited to 'helpers')
-rw-r--r-- | helpers/testdfa.c | 1071 |
1 files changed, 1071 insertions, 0 deletions
diff --git a/helpers/testdfa.c b/helpers/testdfa.c new file mode 100644 index 00000000..653a28f8 --- /dev/null +++ b/helpers/testdfa.c @@ -0,0 +1,1071 @@ +/* + * testdfa.c --- abstracted from gawk. + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991-2013 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Programming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <locale.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <regex.h> +#include <unistd.h> +#include <wchar.h> +#include <sys/types.h> +#include <sys/stat.h> + + +#define _Noreturn +#include "dfa.h" + +const char *regexflags2str(int flags); +char *databuf(int fd); +const char * reflags2str(int flagval); +int parse_escape(const char **string_ptr); +char *setup_pattern(const char *pattern, size_t len); +char casetable[]; + +reg_syntax_t syn; + +struct flagtab { + int val; + const char *name; +}; + +/* usage --- print an error message and die */ + +void usage(const char *myname) +{ + fprintf(stderr, "usage: %s [-ipt] 'awk-regex' < file\n", myname); + exit(EXIT_FAILURE); +} + +/* main --- parse options, compile and run the regex */ + +int main(int argc, char **argv) +{ + int c, ret, try_backref; + struct re_pattern_buffer pat; + struct re_registers regs; + struct dfa *dfareg; + size_t len; + const char *pattern; + const char *rerr; + int infd; + char *data; + reg_syntax_t dfa_syn; + bool ignorecase = false; + char save; + size_t count = 0; + char *place; + + if (argc < 2) + usage(argv[0]); + + memset(& pat, 0, sizeof(pat)); + memset(& regs, 0, sizeof(regs)); + + /* default syntax */ + syn = RE_SYNTAX_GNU_AWK; + + /* parse options, update syntax, ignorecase */ + while ((c = getopt(argc, argv, "pit")) != -1) { + switch (c) { + case 'i': + ignorecase = true; + break; + case 'p': + syn = RE_SYNTAX_POSIX_AWK; + break; + case 't': + syn = RE_SYNTAX_AWK; + break; + case '?': + default: + usage(argv[0]); + break; + } + } + + if (optind == argc) + usage(argv[0]); + + pattern = argv[optind]; + len = strlen(pattern); + + setlocale(LC_CTYPE, ""); + setlocale(LC_COLLATE, ""); + setlocale(LC_MESSAGES, ""); + setlocale(LC_NUMERIC, ""); + setlocale(LC_TIME, ""); + + printf("Ignorecase: %s\nSyntax: %s\n", + (ignorecase ? "true" : "false"), + reflags2str(syn)); + printf("Pattern: /%s/\n", pattern); + + pattern = setup_pattern(pattern, len); + len = strlen(pattern); + + pat.fastmap = (char *) malloc(256); + if (pat.fastmap == NULL) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + printf("MB_CUR_MAX = %d\n", (int) MB_CUR_MAX); + + if (ignorecase) { + if (MB_CUR_MAX > 1) { + syn |= RE_ICASE; + pat.translate = NULL; + } else { + syn &= ~RE_ICASE; + pat.translate = (RE_TRANSLATE_TYPE) casetable; + } + } else { + pat.translate = NULL; + syn &= ~RE_ICASE; + } + + + dfa_syn = syn; + if (ignorecase) + dfa_syn |= RE_ICASE; + dfasyntax(dfa_syn, ignorecase, '\n'); + re_set_syntax(syn); + + if ((rerr = re_compile_pattern(pattern, len, & pat)) != NULL) { + fprintf(stderr, "%s: %s: cannot compile pattern '%s'\n", + argv[0], rerr, pattern); + exit(EXIT_FAILURE); + } + + /* gack. this must be done *after* re_compile_pattern */ + pat.newline_anchor = false; /* don't get \n in middle of string */ + + dfareg = dfaalloc(); + printf("Calling dfacomp(%s, %d, %p, true)\n", + pattern, (int) len, dfareg); + + dfacomp(pattern, len, dfareg, true); + + + data = databuf(STDIN_FILENO); + + /* run the regex matcher */ + ret = re_search(& pat, data, len, 0, len, NULL); + printf("re_search returned %d (%s)\n", ret, (ret != 0) ? "true" : "false"); + + /* run the dfa matcher */ + /* + * dfa likes to stick a '\n' right after the matched + * text. So we just save and restore the character. + */ + save = data[len]; + place = dfaexec(dfareg, data, data+len, true, + &count, &try_backref); + data[len] = save; + + printf("dfaexec returned %p (%.3s)\n", place, place); + + /* release storage */ + regfree(& pat); + if (regs.start) + free(regs.start); + if (regs.end) + free(regs.end); + dfafree(dfareg); + free(dfareg); +} + +/* genflags2str --- general routine to convert a flag value to a string */ + +const char * +genflags2str(int flagval, const struct flagtab *tab) +{ + static char buffer[BUFSIZ]; + char *sp; + int i, space_left, space_needed; + + sp = buffer; + space_left = BUFSIZ; + for (i = 0; tab[i].name != NULL; i++) { + if ((flagval & tab[i].val) != 0) { + /* + * note the trick, we want 1 or 0 for whether we need + * the '|' character. + */ + space_needed = (strlen(tab[i].name) + (sp != buffer)); + if (space_left <= space_needed) { + fprintf(stderr, "buffer overflow in genflags2str"); + exit(EXIT_FAILURE); + } + + if (sp != buffer) { + *sp++ = '|'; + space_left--; + } + strcpy(sp, tab[i].name); + /* note ordering! */ + space_left -= strlen(sp); + sp += strlen(sp); + } + } + + *sp = '\0'; + return buffer; +} + + +/* reflags2str --- make a regex flags value readable */ + +const char * +reflags2str(int flagval) +{ + static const struct flagtab values[] = { + { RE_BACKSLASH_ESCAPE_IN_LISTS, "RE_BACKSLASH_ESCAPE_IN_LISTS" }, + { RE_BK_PLUS_QM, "RE_BK_PLUS_QM" }, + { RE_CHAR_CLASSES, "RE_CHAR_CLASSES" }, + { RE_CONTEXT_INDEP_ANCHORS, "RE_CONTEXT_INDEP_ANCHORS" }, + { RE_CONTEXT_INDEP_OPS, "RE_CONTEXT_INDEP_OPS" }, + { RE_CONTEXT_INVALID_OPS, "RE_CONTEXT_INVALID_OPS" }, + { RE_DOT_NEWLINE, "RE_DOT_NEWLINE" }, + { RE_DOT_NOT_NULL, "RE_DOT_NOT_NULL" }, + { RE_HAT_LISTS_NOT_NEWLINE, "RE_HAT_LISTS_NOT_NEWLINE" }, + { RE_INTERVALS, "RE_INTERVALS" }, + { RE_LIMITED_OPS, "RE_LIMITED_OPS" }, + { RE_NEWLINE_ALT, "RE_NEWLINE_ALT" }, + { RE_NO_BK_BRACES, "RE_NO_BK_BRACES" }, + { RE_NO_BK_PARENS, "RE_NO_BK_PARENS" }, + { RE_NO_BK_REFS, "RE_NO_BK_REFS" }, + { RE_NO_BK_VBAR, "RE_NO_BK_VBAR" }, + { RE_NO_EMPTY_RANGES, "RE_NO_EMPTY_RANGES" }, + { RE_UNMATCHED_RIGHT_PAREN_ORD, "RE_UNMATCHED_RIGHT_PAREN_ORD" }, + { RE_NO_POSIX_BACKTRACKING, "RE_NO_POSIX_BACKTRACKING" }, + { RE_NO_GNU_OPS, "RE_NO_GNU_OPS" }, + { RE_INVALID_INTERVAL_ORD, "RE_INVALID_INTERVAL_ORD" }, + { RE_ICASE, "RE_ICASE" }, + { RE_CARET_ANCHORS_HERE, "RE_CARET_ANCHORS_HERE" }, + { RE_CONTEXT_INVALID_DUP, "RE_CONTEXT_INVALID_DUP" }, + { RE_NO_SUB, "RE_NO_SUB" }, + { 0, NULL }, + }; + + if (flagval == RE_SYNTAX_EMACS) /* == 0 */ + return "RE_SYNTAX_EMACS"; + + return genflags2str(flagval, values); +} + +/* + * dfawarn() is called by the dfa routines whenever a regex is compiled + * must supply a dfawarn. + */ + +void +dfawarn(const char *dfa_warning) +{ + fprintf(stderr, "dfa warning: %s\n", dfa_warning); +} + +/* dfaerror --- print an error message for the dfa routines */ + +void +dfaerror(const char *s) +{ + fprintf(stderr, "dfa-error: %s\n", s); + exit(EXIT_FAILURE); +} + +/* databuf --- read the input file */ + +char * +databuf(int fd) +{ + char *buf; + struct stat sbuf; + ssize_t count; + + if (fstat(fd, & sbuf) < 0) + return NULL; + + buf = (char *) malloc(sbuf.st_size + 3); + if (buf == NULL) + return NULL; + + if ((count = read(fd, buf, sbuf.st_size)) != sbuf.st_size) { + perror("read"); + return NULL; + } + buf[sbuf.st_size] = '\0'; + + (void) close(fd); + + return buf; +} + +/* xmalloc --- for dfa.c */ + +void * +xmalloc(size_t bytes) +{ + void *p = malloc(bytes); + + if (p == NULL) { + fprintf(stderr, "xmalloc: malloc failed: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + + return p; +} + +/* r_fatal --- print a fatal error message. also for dfa.c */ + +void +r_fatal(const char *mesg, ...) +{ + va_list args; + va_start(args, mesg); + fprintf(stderr, "fatal: "); + vfprintf(stderr, mesg, args); + va_end(args); + + exit(EXIT_FAILURE); +} + +/* setup_pattern --- do what gawk does with the pattern string */ + +char * +setup_pattern(const char *pattern, size_t len) +{ + size_t is_multibyte = 0; + int c, c2; + size_t buflen; + mbstate_t mbs; + bool has_anchor = false; + char *buf, *dest; + const char *src, *end; + + src = pattern; + end = pattern + len; + + /* Handle escaped characters first. */ + + /* + * Build a copy of the string (in buf) with the + * escaped characters translated, and generate the regex + * from that. + */ + if (buf == NULL) { + buf = (char *) malloc(len + 2); + if (buf == NULL) { + fprintf(stderr, "%s: malloc failed\n", __func__); + exit(EXIT_FAILURE); + } + buflen = len; + } else if (len > buflen) { + buf = (char *) realloc(buf, len + 2); + if (buf == NULL) { + fprintf(stderr, "%s: realloc failed\n", __func__); + exit(EXIT_FAILURE); + } + buflen = len; + } + dest = buf; + + while (src < end) { + if (MB_CUR_MAX > 1 && ! is_multibyte) { + /* The previous byte is a singlebyte character, or last byte + of a multibyte character. We check the next character. */ + is_multibyte = mbrlen(src, end - src, &mbs); + if ( is_multibyte == 1 + || is_multibyte == (size_t) -1 + || is_multibyte == (size_t) -2 + || is_multibyte == 0) { + /* We treat it as a single-byte character. */ + is_multibyte = 0; + } + } + + /* We skip multibyte character, since it must not be a special + character. */ + if ((MB_CUR_MAX == 1 || ! is_multibyte) && + (*src == '\\')) { + c = *++src; + switch (c) { + case 'a': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + case 'x': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + c2 = parse_escape(&src); + if (c2 < 0) { + fprintf(stderr, "%s: parse_escape failed\n", __func__); + exit(EXIT_FAILURE); + } + /* + * Unix awk treats octal (and hex?) chars + * literally in re's, so escape regexp + * metacharacters. + */ + if (syn == RE_SYNTAX_AWK + && (isdigit(c) || c == 'x') + && strchr("()|*+?.^$\\[]", c2) != NULL) + *dest++ = '\\'; + *dest++ = (char) c2; + break; + case '8': + case '9': /* a\9b not valid */ + *dest++ = c; + src++; + break; + case 'y': /* normally \b */ + /* gnu regex op */ + if (syn == RE_SYNTAX_GNU_AWK) { + *dest++ = '\\'; + *dest++ = 'b'; + src++; + break; + } + /* else, fall through */ + default: + *dest++ = '\\'; + *dest++ = (char) c; + src++; + break; + } /* switch */ + } else { + c = *src; + if (c == '^' || c == '$') + has_anchor = true; + + *dest++ = *src++; /* not '\\' */ + } + if (MB_CUR_MAX > 1 && is_multibyte) + is_multibyte--; + } /* while */ + + *dest = '\0'; + len = dest - buf; + + return buf; +} + +/* + * parse_escape: + * + * Parse a C escape sequence. STRING_PTR points to a variable containing a + * pointer to the string to parse. That pointer is updated past the + * characters we use. The value of the escape sequence is returned. + * + * A negative value means the sequence \ newline was seen, which is supposed to + * be equivalent to nothing at all. + * + * If \ is followed by a null character, we return a negative value and leave + * the string pointer pointing at the null character. + * + * If \ is followed by 000, we return 0 and leave the string pointer after the + * zeros. A value of 0 does not mean end of string. + * + * POSIX doesn't allow \x. + */ + +int +parse_escape(const char **string_ptr) +{ + int c = *(*string_ptr)++; + int i; + int count; + int j; + const char *start; + + switch (c) { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case '\n': + return -2; + case 0: + (*string_ptr)--; + return -1; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + i = c - '0'; + count = 0; + while (++count < 3) { + if ((c = *(*string_ptr)++) >= '0' && c <= '7') { + i *= 8; + i += c - '0'; + } else { + (*string_ptr)--; + break; + } + } + return i; + case 'x': + if (! isxdigit((unsigned char) (*string_ptr)[0])) { + return ('x'); + } + i = j = 0; + start = *string_ptr; + for (;; j++) { + /* do outside test to avoid multiple side effects */ + c = *(*string_ptr)++; + if (isxdigit(c)) { + i *= 16; + if (isdigit(c)) + i += c - '0'; + else if (isupper(c)) + i += c - 'A' + 10; + else + i += c - 'a' + 10; + } else { + (*string_ptr)--; + break; + } + } + return i; + case '\\': + case '"': + return c; + default: + return c; + } +} + +/* This rather ugly macro is for VMS C */ +#ifdef C +#undef C +#endif +#define C(c) ((char)c) +/* + * This table is used by the regexp routines to do case independent + * matching. Basically, every ascii character maps to itself, except + * uppercase letters map to lower case ones. This table has 256 + * entries, for ISO 8859-1. Note also that if the system this + * is compiled on doesn't use 7-bit ascii, casetable[] should not be + * defined to the linker, so gawk should not load. + * + * Do NOT make this array static, it is used in several spots, not + * just in this file. + * + * 6/2004: + * This table is also used for IGNORECASE for == and !=, and index(). + * Although with GLIBC, we could use tolower() everywhere and RE_ICASE + * for the regex matcher, precomputing this table once gives us a + * performance improvement. I also think it's better for portability + * to non-GLIBC systems. All the world is not (yet :-) GNU/Linux. + */ +#if 'a' == 97 /* it's ascii */ +char casetable[] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + /* ' ' '!' '"' '#' '$' '%' '&' ''' */ + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + /* '(' ')' '*' '+' ',' '-' '.' '/' */ + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + /* '0' '1' '2' '3' '4' '5' '6' '7' */ + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + /* '8' '9' ':' ';' '<' '=' '>' '?' */ + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */ + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */ + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */ + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */ + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */ + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */ + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */ + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + /* 'x' 'y' 'z' '{' '|' '}' '~' */ + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + + /* Latin 1: */ + C('\200'), C('\201'), C('\202'), C('\203'), C('\204'), C('\205'), C('\206'), C('\207'), + C('\210'), C('\211'), C('\212'), C('\213'), C('\214'), C('\215'), C('\216'), C('\217'), + C('\220'), C('\221'), C('\222'), C('\223'), C('\224'), C('\225'), C('\226'), C('\227'), + C('\230'), C('\231'), C('\232'), C('\233'), C('\234'), C('\235'), C('\236'), C('\237'), + C('\240'), C('\241'), C('\242'), C('\243'), C('\244'), C('\245'), C('\246'), C('\247'), + C('\250'), C('\251'), C('\252'), C('\253'), C('\254'), C('\255'), C('\256'), C('\257'), + C('\260'), C('\261'), C('\262'), C('\263'), C('\264'), C('\265'), C('\266'), C('\267'), + C('\270'), C('\271'), C('\272'), C('\273'), C('\274'), C('\275'), C('\276'), C('\277'), + C('\340'), C('\341'), C('\342'), C('\343'), C('\344'), C('\345'), C('\346'), C('\347'), + C('\350'), C('\351'), C('\352'), C('\353'), C('\354'), C('\355'), C('\356'), C('\357'), + C('\360'), C('\361'), C('\362'), C('\363'), C('\364'), C('\365'), C('\366'), C('\327'), + C('\370'), C('\371'), C('\372'), C('\373'), C('\374'), C('\375'), C('\376'), C('\337'), + C('\340'), C('\341'), C('\342'), C('\343'), C('\344'), C('\345'), C('\346'), C('\347'), + C('\350'), C('\351'), C('\352'), C('\353'), C('\354'), C('\355'), C('\356'), C('\357'), + C('\360'), C('\361'), C('\362'), C('\363'), C('\364'), C('\365'), C('\366'), C('\367'), + C('\370'), C('\371'), C('\372'), C('\373'), C('\374'), C('\375'), C('\376'), C('\377'), +}; +#elif 'a' == 0x81 /* it's EBCDIC */ +char casetable[] = { + /*00 NU SH SX EX PF HT LC DL */ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + /*08 SM VT FF CR SO SI */ + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + /*10 DE D1 D2 TM RS NL BS IL */ + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + /*18 CN EM CC C1 FS GS RS US */ + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + /*20 DS SS FS BP LF EB EC */ + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + /*28 SM C2 EQ AK BL */ + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + /*30 SY PN RS UC ET */ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + /*38 C3 D4 NK SU */ + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + /*40 SP */ + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + /*48 CENT . < ( + | */ + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + /*50 & */ + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + /*58 ! $ * ) ; ^ */ + 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, + /*60 - / */ + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + /*68 | , % _ > ? */ + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + /*70 */ + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + /*78 ` : # @ ' = " */ + 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + /*80 a b c d e f g */ + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + /*88 h i { */ + 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + /*90 j k l m n o p */ + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + /*98 q r } */ + 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, + /*A0 ~ s t u v w x */ + 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + /*A8 y z [ */ + 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, + /*B0 */ + 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, + /*B8 ] */ + 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, + /*C0 { A B C D E F G */ + 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + /*C8 H I */ + 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + /*D0 } J K L M N O P */ + 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + /*D8 Q R */ + 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + /*E0 \ S T U V W X */ + 0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + /*E8 Y Z */ + 0xA8, 0xA9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + /*F0 0 1 2 3 4 5 6 7 */ + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + /*F8 8 9 */ + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF +}; +#else +#include "You lose. You will need a translation table for your character set." +#endif + +#undef C + +/* xalloc.h -- malloc with out-of-memory checking + + Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, + 2000, 2003, 2004, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef XALLOC_H_ +# define XALLOC_H_ + +# include <stddef.h> + + +# ifdef __cplusplus +extern "C" { +# endif + + +# ifndef __attribute__ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 8) +# define __attribute__(x) +# endif +# endif + +# ifndef ATTRIBUTE_NORETURN +# define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__)) +# endif + +# ifndef ATTRIBUTE_MALLOC +# if __GNUC__ >= 3 +# define ATTRIBUTE_MALLOC __attribute__ ((__malloc__)) +# else +# define ATTRIBUTE_MALLOC +# endif +# endif + +/* This function is always triggered when memory is exhausted. + It must be defined by the application, either explicitly + or by using gnulib's xalloc-die module. This is the + function to call when one wants the program to die because of a + memory allocation failure. */ +extern void xalloc_die (void); + +void *xmalloc (size_t s) ATTRIBUTE_MALLOC; +void *xzalloc (size_t s) ATTRIBUTE_MALLOC; +void *xcalloc (size_t n, size_t s) ATTRIBUTE_MALLOC; +void *xrealloc (void *p, size_t s); +void *x2realloc (void *p, size_t *pn); +void *xmemdup (void const *p, size_t s) ATTRIBUTE_MALLOC; +char *xstrdup (char const *str) ATTRIBUTE_MALLOC; + +/* Return 1 if an array of N objects, each of size S, cannot exist due + to size arithmetic overflow. S must be positive and N must be + nonnegative. This is a macro, not an inline function, so that it + works correctly even when SIZE_MAX < N. + + By gnulib convention, SIZE_MAX represents overflow in size + calculations, so the conservative dividend to use here is + SIZE_MAX - 1, since SIZE_MAX might represent an overflowed value. + However, malloc (SIZE_MAX) fails on all known hosts where + sizeof (ptrdiff_t) <= sizeof (size_t), so do not bother to test for + exactly-SIZE_MAX allocations on such hosts; this avoids a test and + branch when S is known to be 1. */ +# define xalloc_oversized(n, s) \ + ((size_t) (sizeof (ptrdiff_t) <= sizeof (size_t) ? -1 : -2) / (s) < (n)) + + +/* In the following macros, T must be an elementary or structure/union or + typedef'ed type, or a pointer to such a type. To apply one of the + following macros to a function pointer or array type, you need to typedef + it first and use the typedef name. */ + +/* Allocate an object of type T dynamically, with error checking. */ +/* extern t *XMALLOC (typename t); */ +# define XMALLOC(t) ((t *) xmalloc (sizeof (t))) + +/* Allocate memory for N elements of type T, with error checking. */ +/* extern t *XNMALLOC (size_t n, typename t); */ +# define XNMALLOC(n, t) \ + ((t *) (sizeof (t) == 1 ? xmalloc (n) : xnmalloc (n, sizeof (t)))) + +/* Allocate an object of type T dynamically, with error checking, + and zero it. */ +/* extern t *XZALLOC (typename t); */ +# define XZALLOC(t) ((t *) xzalloc (sizeof (t))) + +/* Allocate memory for N elements of type T, with error checking, + and zero it. */ +/* extern t *XCALLOC (size_t n, typename t); */ +# define XCALLOC(n, t) \ + ((t *) (sizeof (t) == 1 ? xzalloc (n) : xcalloc (n, sizeof (t)))) + +/* + * Gawk uses this file only to keep dfa.c happy. + * We're therefore safe in manually defining HAVE_INLINE to + * make the !@#$%^&*() thing just work. + */ +#ifdef GAWK +#define HAVE_INLINE 1 /* so there. nyah, nyah, nyah. */ +#endif + +# if HAVE_INLINE +# define static_inline static inline +# else +void *xnmalloc (size_t n, size_t s) ATTRIBUTE_MALLOC; +void *xnrealloc (void *p, size_t n, size_t s); +void *x2nrealloc (void *p, size_t *pn, size_t s); +char *xcharalloc (size_t n) ATTRIBUTE_MALLOC; +# endif + + +/* Allocate an array of N objects, each with S bytes of memory, + dynamically, with error checking. S must be nonzero. */ + +void * +xnmalloc (size_t n, size_t s) +{ + if (xalloc_oversized (n, s)) + xalloc_die (); + return xmalloc (n * s); +} + +/* Allocate an array of N objects, each with S bytes of memory, + dynamically, with error checking. S must be nonzero. + Clear the contents afterwards. */ + +void * +xcalloc(size_t nmemb, size_t size) +{ + void *p = xmalloc (nmemb * size); + memset(p, '\0', nmemb * size); + return p; +} + +/* Reallocate a pointer to a new size, with error checking. */ + +void * +xrealloc(void *p, size_t size) +{ + void *new_p = realloc(p, size); + if (new_p == 0) + xalloc_die (); + + return new_p; +} + +/* xalloc_die --- fatal error message when malloc fails, needed by dfa.c */ + +void +xalloc_die (void) +{ + r_fatal("xalloc: malloc failed: %s"), strerror(errno); +} + +/* Clone an object P of size S, with error checking. There's no need + for xnmemdup (P, N, S), since xmemdup (P, N * S) works without any + need for an arithmetic overflow check. */ + +void * +xmemdup (void const *p, size_t s) +{ + return memcpy (xmalloc (s), p, s); +} + +/* Change the size of an allocated block of memory P to an array of N + objects each of S bytes, with error checking. S must be nonzero. */ + +void * +xnrealloc (void *p, size_t n, size_t s) +{ + if (xalloc_oversized (n, s)) + xalloc_die (); + return xrealloc (p, n * s); +} + +/* If P is null, allocate a block of at least *PN such objects; + otherwise, reallocate P so that it contains more than *PN objects + each of S bytes. *PN must be nonzero unless P is null, and S must + be nonzero. Set *PN to the new number of objects, and return the + pointer to the new block. *PN is never set to zero, and the + returned pointer is never null. + + Repeated reallocations are guaranteed to make progress, either by + allocating an initial block with a nonzero size, or by allocating a + larger block. + + In the following implementation, nonzero sizes are increased by a + factor of approximately 1.5 so that repeated reallocations have + O(N) overall cost rather than O(N**2) cost, but the + specification for this function does not guarantee that rate. + + Here is an example of use: + + int *p = NULL; + size_t used = 0; + size_t allocated = 0; + + void + append_int (int value) + { + if (used == allocated) + p = x2nrealloc (p, &allocated, sizeof *p); + p[used++] = value; + } + + This causes x2nrealloc to allocate a block of some nonzero size the + first time it is called. + + To have finer-grained control over the initial size, set *PN to a + nonzero value before calling this function with P == NULL. For + example: + + int *p = NULL; + size_t used = 0; + size_t allocated = 0; + size_t allocated1 = 1000; + + void + append_int (int value) + { + if (used == allocated) + { + p = x2nrealloc (p, &allocated1, sizeof *p); + allocated = allocated1; + } + p[used++] = value; + } + + */ + +void * +x2nrealloc (void *p, size_t *pn, size_t s) +{ + size_t n = *pn; + + if (! p) + { + if (! n) + { + /* The approximate size to use for initial small allocation + requests, when the invoking code specifies an old size of + zero. 64 bytes is the largest "small" request for the + GNU C library malloc. */ + enum { DEFAULT_MXFAST = 64 }; + + n = DEFAULT_MXFAST / s; + n += !n; + } + } + else + { + /* Set N = ceil (1.5 * N) so that progress is made if N == 1. + Check for overflow, so that N * S stays in size_t range. + The check is slightly conservative, but an exact check isn't + worth the trouble. */ + if ((size_t) -1 / 3 * 2 / s <= n) + xalloc_die (); + n += (n + 1) / 2; + } + + *pn = n; + return xrealloc (p, n * s); +} + +/* Return a pointer to a new buffer of N bytes. This is like xmalloc, + except it returns char *. */ + +char * +xcharalloc (size_t n) +{ + return XNMALLOC (n, char); +} + +/* Allocate S bytes of zeroed memory dynamically, with error checking. + There's no need for xnzalloc (N, S), since it would be equivalent + to xcalloc (N, S). */ + +void * +xzalloc (size_t s) +{ + return memset (xmalloc (s), 0, s); +} + +# endif + +# ifdef __cplusplus +} + +/* C++ does not allow conversions from void * to other pointer types + without a cast. Use templates to work around the problem when + possible. */ + +template <typename T> inline T * +xrealloc (T *p, size_t s) +{ + return (T *) xrealloc ((void *) p, s); +} + +template <typename T> inline T * +xnrealloc (T *p, size_t n, size_t s) +{ + return (T *) xnrealloc ((void *) p, n, s); +} + +template <typename T> inline T * +x2realloc (T *p, size_t *pn) +{ + return (T *) x2realloc ((void *) p, pn); +} + +template <typename T> inline T * +x2nrealloc (T *p, size_t *pn, size_t s) +{ + return (T *) x2nrealloc ((void *) p, pn, s); +} + +template <typename T> inline T * +xmemdup (T const *p, size_t s) +{ + return (T *) xmemdup ((void const *) p, s); +} + + + +#endif /* !XALLOC_H_ */ |