aboutsummaryrefslogtreecommitdiffstats
path: root/dfa.c
diff options
context:
space:
mode:
Diffstat (limited to 'dfa.c')
-rw-r--r--dfa.c71
1 files changed, 12 insertions, 59 deletions
diff --git a/dfa.c b/dfa.c
index e658ad8a..53a8c2cc 100644
--- a/dfa.c
+++ b/dfa.c
@@ -58,15 +58,15 @@
#include "gettext.h"
#define _(str) gettext (str)
-#include "mbsupport.h" /* Define MBS_SUPPORT to 1 or 0, as appropriate. */
-#if MBS_SUPPORT
-/* We can handle multibyte strings. */
-# include <wchar.h>
-# include <wctype.h>
-#endif
+#include <wchar.h>
+#include <wctype.h>
#include "xalloc.h"
+#if defined(__DJGPP__)
+#include "mbsupport.h"
+#endif
+
#include "dfa.h"
#ifdef GAWK
@@ -399,12 +399,10 @@ struct dfa
*/
int *multibyte_prop;
-#if MBS_SUPPORT
/* A table indexed by byte values that contains the corresponding wide
character (if any) for that byte. WEOF means the byte is not a
valid single-byte character. */
wint_t mbrtowc_cache[NOTCHAR];
-#endif
/* Array of the bracket expression in the DFA. */
struct mb_char_classes *mbcsets;
@@ -489,7 +487,6 @@ static void regexp (void);
static void
dfambcache (struct dfa *d)
{
-#if MBS_SUPPORT
int i;
for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
{
@@ -499,10 +496,8 @@ dfambcache (struct dfa *d)
wchar_t wc;
d->mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF;
}
-#endif
}
-#if MBS_SUPPORT
/* Store into *PWC the result of converting the leading bytes of the
multibyte buffer S of length N bytes, using the mbrtowc_cache in *D
and updating the conversion state in *D. On conversion error,
@@ -541,9 +536,6 @@ mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d)
*pwc = wc;
return 1;
}
-#else
-#define mbs_to_wchar(pwc, s, n, d) (WEOF)
-#endif
#ifdef DEBUG
@@ -738,7 +730,7 @@ static charclass newline;
#ifdef __GLIBC__
# define is_valid_unibyte_character(c) 1
#else
-# define is_valid_unibyte_character(c) (! (MBS_SUPPORT && btowc (c) == WEOF))
+# define is_valid_unibyte_character(c) (btowc (c) != WEOF)
#endif
/* C is a "word-constituent" byte. */
@@ -799,17 +791,12 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
static bool
setbit_wc (wint_t wc, charclass c)
{
-#if MBS_SUPPORT
int b = wctob (wc);
if (b == EOF)
return false;
setbit (b, c);
return true;
-#else
- abort ();
- /*NOTREACHED*/ return false;
-#endif
}
/* Set a bit for B and its case variants in the charclass C.
@@ -907,7 +894,6 @@ static wint_t wctok; /* Wide character representation of the current
MB_CUR_MAX > 1. */
-#if MBS_SUPPORT
/* Fetch the next lexical input character. Set C (of type int) to the
next input byte, except set C to EOF if the input is a multibyte
character of length greater than 1. Set WC (of type wint_t) to the
@@ -936,23 +922,6 @@ static wint_t wctok; /* Wide character representation of the current
} \
} while (0)
-#else
-/* Note that characters become unsigned here. */
-# define FETCH_WC(c, unused, eoferr) \
- do { \
- if (! lexleft) \
- { \
- if ((eoferr) != 0) \
- dfaerror (eoferr); \
- else \
- return lasttok = END; \
- } \
- (c) = to_uchar (*lexptr++); \
- --lexleft; \
- } while (0)
-
-#endif /* MBS_SUPPORT */
-
#ifndef MIN
# define MIN(a,b) ((a) < (b) ? (a) : (b))
#endif
@@ -1764,7 +1733,6 @@ addtok (token t)
}
}
-#if MBS_SUPPORT
/* We treat a multibyte character as a single atom, so that DFA
can treat a multibyte character as a single expression.
@@ -1796,17 +1764,10 @@ addtok_wc (wint_t wc)
addtok (CAT);
}
}
-#else
-static void
-addtok_wc (wint_t wc)
-{
-}
-#endif
static void
add_utf8_anychar (void)
{
-#if MBS_SUPPORT
static const charclass utf8_classes[5] = {
/* 80-bf: non-leading bytes. */
{0, 0, 0, 0, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, 0, 0},
@@ -1861,7 +1822,6 @@ add_utf8_anychar (void)
addtok (CAT);
addtok (OR);
}
-#endif
}
/* The grammar understood by the parser is as follows.
@@ -1902,7 +1862,7 @@ add_utf8_anychar (void)
static void
atom (void)
{
- if (MBS_SUPPORT && tok == WCHAR)
+ if (tok == WCHAR)
{
if (wctok == WEOF)
addtok (BACKREF);
@@ -1924,7 +1884,7 @@ atom (void)
tok = lex ();
}
- else if (MBS_SUPPORT && tok == ANYCHAR && using_utf8 ())
+ else if (tok == ANYCHAR && using_utf8 ())
{
/* For UTF-8 expand the period to a series of CSETs that define a valid
UTF-8 character. This avoids using the slow multibyte path. I'm
@@ -1938,9 +1898,7 @@ atom (void)
}
else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|| tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
-#if MBS_SUPPORT
|| tok == ANYCHAR || tok == MBCSET
-#endif /* MBS_SUPPORT */
|| tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
{
addtok (tok);
@@ -2273,10 +2231,8 @@ epsclosure (position_set *s, struct dfa const *d, char *visited)
for (i = 0; i < s->nelem; ++i)
if (d->tokens[s->elems[i].index] >= NOTCHAR
&& d->tokens[s->elems[i].index] != BACKREF
-#if MBS_SUPPORT
&& d->tokens[s->elems[i].index] != ANYCHAR
&& d->tokens[s->elems[i].index] != MBCSET
-#endif
&& d->tokens[s->elems[i].index] < CSET)
{
if (!initialized)
@@ -2595,9 +2551,7 @@ dfaanalyze (struct dfa *d, int searchflag)
it with its epsilon closure. */
for (i = 0; i < d->tindex; ++i)
if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
-#if MBS_SUPPORT
|| d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET
-#endif
|| d->tokens[i] >= CSET)
{
#ifdef DEBUG
@@ -2707,9 +2661,8 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
copyset (d->charclasses[d->tokens[pos.index] - CSET], matches);
else
{
- if (MBS_SUPPORT
- && (d->tokens[pos.index] == MBCSET
- || d->tokens[pos.index] == ANYCHAR))
+ if (d->tokens[pos.index] == MBCSET
+ || d->tokens[pos.index] == ANYCHAR)
{
/* MB_CUR_MAX > 1 */
if (d->tokens[pos.index] == MBCSET)
@@ -3684,7 +3637,7 @@ dfaoptimize (struct dfa *d)
size_t i;
bool have_backref = false;
- if (!MBS_SUPPORT || !using_utf8 ())
+ if (!using_utf8 ())
return;
for (i = 0; i < d->tindex; ++i)