From 10bef02e8f8f6bec4f1c18d9c634aa6927f4611a Mon Sep 17 00:00:00 2001
From: Rainer Gerhards <rgerhards@adiscon.com>
Date: Thu, 13 Sep 2012 09:30:20 +0200
Subject: bugfix: missing support for escape sequences in RainerScript

Only \' was supported. Now the usual set is supported. Note that v5
used \x as escape where x was any character (e.g. "\n" meant "n" and NOT
LF). This also means there is some incompatibility to v5 for well-know
sequences. Better break it now than later.
---
 grammar/rainerscript.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)

(limited to 'grammar/rainerscript.c')

diff --git a/grammar/rainerscript.c b/grammar/rainerscript.c
index a5cc10c2..de63f692 100644
--- a/grammar/rainerscript.c
+++ b/grammar/rainerscript.c
@@ -1616,3 +1616,142 @@ cstrPrint(char *text, es_str_t *estr)
 	dbgprintf("%s%s", text, str);
 	free(str);
 }
+
+
+/* we need a function to check for octal digits */
+static inline int
+isodigit(uchar c)
+{
+	return(c >= '0' && c <= '7');
+}
+
+/**
+ * Get numerical value of a hex digit. This is a helper function.
+ * @param[in] c a character containing 0..9, A..Z, a..z anything else
+ * is an (undetected) error.
+ */
+static inline int
+hexDigitVal(char c)
+{
+	int r;
+	if(c < 'A')
+		r = c - '0';
+	else if(c < 'a')
+		r = c - 'A' + 10;
+	else
+		r = c - 'a' + 10;
+	return r;
+}
+
+/* Handle the actual unescaping.
+ * a helper to unescapeStr(), to help make the function easier to read.
+ */
+static inline void
+doUnescape(unsigned char *c, int len, int *iSrc, int iDst)
+{
+	if(c[*iSrc] == '\\') {
+		if(++(*iSrc) == len) {
+			/* error, incomplete escape, treat as single char */
+			c[iDst] = '\\';
+		}
+		/* regular case, unescape */
+		switch(c[*iSrc]) {
+		case 'a':
+			c[iDst] = '\007';
+			break;
+		case 'b':
+			c[iDst] = '\b';
+			break;
+		case 'f':
+			c[iDst] = '\014';
+			break;
+		case 'n':
+			c[iDst] = '\n';
+			break;
+		case 'r':
+			c[iDst] = '\r';
+			break;
+		case 't':
+			c[iDst] = '\t';
+			break;
+		case '\'':
+			c[iDst] = '\'';
+			break;
+		case '"':
+			c[iDst] = '"';
+			break;
+		case '?':
+			c[iDst] = '?';
+			break;
+		case '$':
+			c[iDst] = '$';
+			break;
+		case '\\':
+			c[iDst] = '\\';
+			break;
+		case 'x':
+			if(    (*iSrc)+2 >= len
+			   || !isxdigit(c[(*iSrc)+1])
+			   || !isxdigit(c[(*iSrc)+2])) {
+				/* error, incomplete escape, use as is */
+				c[iDst] = '\\';
+				--(*iSrc);
+			}
+			c[iDst] = (hexDigitVal(c[(*iSrc)+1]) << 4) +
+				  hexDigitVal(c[(*iSrc)+2]);
+			*iSrc += 2;
+			break;
+		case '0': /* octal escape */
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+			if(    (*iSrc)+2 >= len
+			   || !isodigit(c[(*iSrc)+1])
+			   || !isodigit(c[(*iSrc)+2])) {
+				/* error, incomplete escape, use as is */
+				c[iDst] = '\\';
+				--(*iSrc);
+			}
+			c[iDst] = ((c[(*iSrc)  ] - '0') << 6) +
+			          ((c[(*iSrc)+1] - '0') << 3) +
+			          ( c[(*iSrc)+2] - '0');
+			*iSrc += 2;
+			break;
+		default:
+			/* error, incomplete escape, indicate by '?' */
+			c[iDst] = '?';
+			break;
+		}
+	} else {
+		/* regular character */
+		c[iDst] = c[*iSrc];
+	}
+}
+
+void
+unescapeStr(uchar *s, int len)
+{
+	int iSrc, iDst;
+	assert(s != NULL);
+
+	/* scan for first escape sequence (if we are luky, there is none!) */
+	iSrc = 0;
+	while(iSrc < len && s[iSrc] != '\\')
+		++iSrc;
+	/* now we have a sequence or end of string. In any case, we process
+	 * all remaining characters (maybe 0!) and unescape.
+	 */
+	if(iSrc != len) {
+		iDst = iSrc;
+		while(iSrc < len) {
+			doUnescape(s, len, &iSrc, iDst);
+			++iSrc;
+			++iDst;
+		}
+	}
+	s[iDst] = '\0';
+}
-- 
cgit v1.2.3