From 99f18190a1f911224d45ca61706ae3fbc9ad7a80 Mon Sep 17 00:00:00 2001
From: Rainer Gerhards
-
It is possible to specify some parametes after the "R". These are +comma-separated. They are: +
R,<regexp-type>,<submatch> +
regexp-type is either "BRE" for Posix basic regular expressions or +"ERE" for extended ones. The string must be given in upper case. The +default is "BRE" to be consistent with earlier versions of rsyslog that +did not support ERE. The submatch identifies the submatch to be used +with the result. A single digit is supported. Match 0 is the full match, +while 1 to 9 are the acutal submatches. +
The following is a sample of an ERE expression that takes the first +submatch from the message string: +
%msg:R,ERE,1:for (vlan[0-9]*):--end%
Also, extraction can be done based on so-called "fields". To do so, place a "F" into FromChar. A field in its current definition is anything that is delimited by a delimiter diff --git a/runtime/msg.c b/runtime/msg.c index b421c88f..2798b7be 100644 --- a/runtime/msg.c +++ b/runtime/msg.c @@ -1605,8 +1605,8 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, #ifdef FEATURE_REGEXP /* Variables necessary for regular expression matching */ - size_t nmatch = 1; - regmatch_t pmatch[1]; + size_t nmatch = 10; + regmatch_t pmatch[10]; #endif assert(pMsg != NULL); @@ -1839,7 +1839,7 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, /* Could not compile regex before! */ return "**NO MATCH** **BAD REGULAR EXPRESSION**"; - dbgprintf("debug: String to match for regex is: %s\n", pRes); + dbgprintf("string to match for regex is: %s\n", pRes); if(objUse(regexp, LM_REGEXP_FILENAME) == RS_RET_OK) { if (0 != regexp.regexec(&pTpe->data.field.re, pRes, nmatch, pmatch, 0)) { @@ -1850,12 +1850,26 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, } return "**NO MATCH**"; } else { - /* Match! */ - /* I need to malloc pB */ +{int i; for(i = 0 ; i < 10 ; ++i) { +dbgprintf("rqtd regex match (nmatch %d) # %d, idx %d: so %d, eo %d\n", nmatch, pTpe->data.field.iMatchToUse, i, +pmatch[i].rm_so, +pmatch[i].rm_eo); +}} + /* Match- but did it match the one we wanted? */ + /* we got no match! */ + if(pmatch[pTpe->data.field.iMatchToUse].rm_so == -1) { + if (*pbMustBeFreed == 1) { + free(pRes); + *pbMustBeFreed = 0; + } + return "**NO MATCH**"; + } + /* OK, we have a usable match - we now need to malloc pB */ int iLenBuf; char *pB; - iLenBuf = pmatch[0].rm_eo - pmatch[0].rm_so; + iLenBuf = pmatch[pTpe->data.field.iMatchToUse].rm_eo + - pmatch[pTpe->data.field.iMatchToUse].rm_so; pB = (char *) malloc((iLenBuf + 1) * sizeof(char)); if (pB == NULL) { @@ -1866,7 +1880,7 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, } /* Lets copy the matched substring to the buffer */ - memcpy(pB, pRes + pmatch[0].rm_so, iLenBuf); + memcpy(pB, pRes + pmatch[pTpe->data.field.iMatchToUse].rm_so, iLenBuf); pB[iLenBuf] = '\0';/* terminate string, did not happen before */ if (*pbMustBeFreed == 1) diff --git a/template.c b/template.c index e5021f35..bccc6516 100644 --- a/template.c +++ b/template.c @@ -514,17 +514,47 @@ static int do_Parameter(unsigned char **pp, struct template *pTpl) if(*p == ':') { ++p; /* eat ':' */ #ifdef FEATURE_REGEXP - if (*p == 'R') { + if(*p == 'R') { /* APR: R found! regex alarm ! :) */ ++p; /* eat ':' */ - if (*p != ':') { + /* first come the regex type */ + if(*p == ',') { + ++p; /* eat ',' */ + if(*p == 'B' && *(p+1) == 'R' && *(p+2) == 'E' && *(p+3) == ',') { + pTpe->data.field.typeRegex = TPL_REGEX_BRE; + p += 3; /* eat indicator sequence */ + } else if(*p == 'E' && *(p+1) == 'R' && *(p+2) == 'E' && *(p+3) == ',') { + pTpe->data.field.typeRegex = TPL_REGEX_ERE; + p += 3; /* eat indicator sequence */ + } else { + errmsg.LogError(NO_ERRCODE, "error: invalid regular expression type, rest of line %s", + (char*) p); + } + } + + /* now check for submatch ID */ + pTpe->data.field.iMatchToUse = 0; + if(*p == ',') { + /* in this case a number follows, which indicates which match + * shall be used. This must be a single digit. + */ + ++p; /* eat ',' */ + if(isdigit((int) *p)) { + pTpe->data.field.iMatchToUse = *p - '0'; + ++p; /* eat digit */ + } + } + + if(*p != ':') { /* There is something more than an R , this is invalid ! */ /* Complain on extra characters */ errmsg.LogError(NO_ERRCODE, "error: invalid character in frompos after \"R\", property: '%%%s'", (char*) *pp); } else { pTpe->data.field.has_regex = 1; + dbgprintf("we have a regexp and use match #%d\n", + pTpe->data.field.iMatchToUse); } } else { /* now we fall through the "regular" FromPos code */ @@ -620,8 +650,9 @@ static int do_Parameter(unsigned char **pp, struct template *pTpl) /* Now i compile the regex */ /* Remember that the re is an attribute of the Template entry */ if((iRetLocal = objUse(regexp, LM_REGEXP_FILENAME)) == RS_RET_OK) { -dbgprintf("compile data.field.re ptr: %p (pTpe %p)\n", (&(pTpe->data.field.re)), pTpe); - if(regexp.regcomp(&(pTpe->data.field.re), (char*) regex_char, 0) != 0) { + int iOptions; + iOptions = (pTpe->data.field.typeRegex == TPL_REGEX_ERE) ? REG_EXTENDED : 0; + if(regexp.regcomp(&(pTpe->data.field.re), (char*) regex_char, iOptions) != 0) { dbgprintf("error: can not compile regex: '%s'\n", regex_char); pTpe->data.field.has_regex = 2; } diff --git a/template.h b/template.h index 5b0bcdb4..daeeb5fd 100644 --- a/template.h +++ b/template.h @@ -67,7 +67,13 @@ struct templateEntry { unsigned iToPos; /* up to that one... */ #ifdef FEATURE_REGEXP regex_t re; /* APR: this is the regular expression */ - unsigned has_regex; + short has_regex; + short iMatchToUse;/* which match should be obtained (10 max) */ + enum { + TPL_REGEX_BRE = 0, /* posix BRE */ + TPL_REGEX_ERE = 1 /* posix ERE */ + } typeRegex; + #endif unsigned has_fields; /* support for field-counting: field to extract */ unsigned char field_delim; /* support for field-counting: field delemiter char */ -- cgit v1.2.3