From ec56b763b83677d1e9cd02a7ae610caf62e902bb Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 8 Oct 2009 16:36:17 +0200 Subject: bugfix in debug system and more instrumentation to find an issue bugfix: debug string larger than 1K were improperly displayed. Max size is now 32K, and if a string is even longer it is meaningful truncated. --- runtime/parser.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 466066e7..3c90c447 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -231,14 +231,14 @@ sanitizeMessage(msg_t *pMsg) * can not handle it! -- rgerhards, 2009-08-26 */ if(pszMsg[iSrc] == '\0' || bEscapeCCOnRcv) { - /* we are configured to escape control characters. Please note - * that this most probably break non-western character sets like - * Japanese, Korean or Chinese. rgerhards, 2007-07-17 - */ - pDst[iDst++] = cCCEscapeChar; - pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0300) >> 6); - pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0070) >> 3); - pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0007)); + /* we are configured to escape control characters. Please note + * that this most probably break non-western character sets like + * Japanese, Korean or Chinese. rgerhards, 2007-07-17 + */ + pDst[iDst++] = cCCEscapeChar; + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0300) >> 6); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0070) >> 3); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0007)); } } else { pDst[iDst++] = pszMsg[iSrc]; -- cgit v1.2.3 From e04e1b50025f5fa9c26abd946190dce8f797d08f Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 22 Oct 2009 11:33:38 +0200 Subject: enhanced test environment (including testbench) support for enhancing probability of memory addressing failure by using non-NULL default value for malloced memory (optional, only if requested by configure option). This helps to track down some otherwise undetected issues within the testbench and is expected to be very useful in the future. --- runtime/parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 3c90c447..e29f3b0b 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -96,7 +96,7 @@ static inline rsRetVal uncompressMessage(msg_t *pMsg) */ int ret; iLenDefBuf = glbl.GetMaxLine(); - CHKmalloc(deflateBuf = malloc(sizeof(uchar) * (iLenDefBuf + 1))); + CHKmalloc(deflateBuf = MALLOC(sizeof(uchar) * (iLenDefBuf + 1))); ret = uncompress((uchar *) deflateBuf, &iLenDefBuf, (uchar *) pszMsg+1, lenMsg-1); DBGPRINTF("Compressed message uncompressed with status %d, length: new %ld, old %d.\n", ret, (long) iLenDefBuf, (int) (lenMsg-1)); @@ -223,7 +223,7 @@ sanitizeMessage(msg_t *pMsg) if(maxDest < sizeof(szSanBuf)) pDst = szSanBuf; else - CHKmalloc(pDst = malloc(sizeof(uchar) * (iMaxLine + 1))); + CHKmalloc(pDst = MALLOC(sizeof(uchar) * (iMaxLine + 1))); iSrc = iDst = 0; while(iSrc < lenMsg && iDst < maxDest - 3) { /* leave some space if last char must be escaped */ if(iscntrl((int) pszMsg[iSrc])) { -- cgit v1.2.3 From 7d78b3bdfd357dd921797ce983eb96532c56a7f6 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 3 Nov 2009 10:41:47 +0100 Subject: restructured parser part of rsyslog now cleaner and hopefully usuable as a basis for loadable parser modules. I also cleaned up/consolidated some of the internal message generation functionality in rsyslogd. --- runtime/parser.c | 382 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 382 insertions(+) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index e29f3b0b..89e59f87 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -37,7 +37,10 @@ #include "dirty.h" #include "msg.h" #include "obj.h" +#include "datetime.h" #include "errmsg.h" +#include "unicode-helper.h" +#include "dirty.h" /* some defines */ #define DEFUPRI (LOG_USER|LOG_NOTICE) @@ -46,6 +49,7 @@ DEFobjStaticHelpers DEFobjCurrIf(glbl) DEFobjCurrIf(errmsg) +DEFobjCurrIf(datetime) /* static data */ @@ -60,11 +64,389 @@ rsRetVal parserClassInit(void) CHKiRet(objGetObjInterface(&obj)); /* this provides the root pointer for all other queries */ CHKiRet(objUse(glbl, CORE_COMPONENT)); CHKiRet(objUse(errmsg, CORE_COMPONENT)); + CHKiRet(objUse(datetime, CORE_COMPONENT)); // TODO: free components! see action.c finalize_it: RETiRet; } +/***************************RFC 5425 PARSER ******************************************************/ + + +/* Helper to parseRFCSyslogMsg. This function parses a field up to + * (and including) the SP character after it. The field contents is + * returned in a caller-provided buffer. The parsepointer is advanced + * to after the terminating SP. The caller must ensure that the + * provided buffer is large enough to hold the to be extracted value. + * Returns 0 if everything is fine or 1 if either the field is not + * SP-terminated or any other error occurs. -- rger, 2005-11-24 + * The function now receives the size of the string and makes sure + * that it does not process more than that. The *pLenStr counter is + * updated on exit. -- rgerhards, 2009-09-23 + */ +static int parseRFCField(uchar **pp2parse, uchar *pResult, int *pLenStr) +{ + uchar *p2parse; + int iRet = 0; + + assert(pp2parse != NULL); + assert(*pp2parse != NULL); + assert(pResult != NULL); + + p2parse = *pp2parse; + + /* this is the actual parsing loop */ + while(*pLenStr > 0 && *p2parse != ' ') { + *pResult++ = *p2parse++; + --(*pLenStr); + } + + if(*pLenStr > 0 && *p2parse == ' ') { + ++p2parse; /* eat SP, but only if not at end of string */ + --(*pLenStr); + } else { + iRet = 1; /* there MUST be an SP! */ + } + *pResult = '\0'; + + /* set the new parse pointer */ + *pp2parse = p2parse; + return 0; +} + + +/* Helper to parseRFCSyslogMsg. This function parses the structured + * data field of a message. It does NOT parse inside structured data, + * just gets the field as whole. Parsing the single entities is left + * to other functions. The parsepointer is advanced + * to after the terminating SP. The caller must ensure that the + * provided buffer is large enough to hold the to be extracted value. + * Returns 0 if everything is fine or 1 if either the field is not + * SP-terminated or any other error occurs. -- rger, 2005-11-24 + * The function now receives the size of the string and makes sure + * that it does not process more than that. The *pLenStr counter is + * updated on exit. -- rgerhards, 2009-09-23 + */ +static int parseRFCStructuredData(uchar **pp2parse, uchar *pResult, int *pLenStr) +{ + uchar *p2parse; + int bCont = 1; + int iRet = 0; + int lenStr; + + assert(pp2parse != NULL); + assert(*pp2parse != NULL); + assert(pResult != NULL); + + p2parse = *pp2parse; + lenStr = *pLenStr; + + /* this is the actual parsing loop + * Remeber: structured data starts with [ and includes any characters + * until the first ] followed by a SP. There may be spaces inside + * structured data. There may also be \] inside the structured data, which + * do NOT terminate an element. + */ + if(lenStr == 0 || *p2parse != '[') + return 1; /* this is NOT structured data! */ + + if(*p2parse == '-') { /* empty structured data? */ + *pResult++ = '-'; + ++p2parse; + --lenStr; + } else { + while(bCont) { + if(lenStr < 2) { + /* we now need to check if we have only structured data */ + if(lenStr > 0 && *p2parse == ']') { + *pResult++ = *p2parse; + p2parse++; + lenStr--; + bCont = 0; + } else { + iRet = 1; /* this is not valid! */ + bCont = 0; + } + } else if(*p2parse == '\\' && *(p2parse+1) == ']') { + /* this is escaped, need to copy both */ + *pResult++ = *p2parse++; + *pResult++ = *p2parse++; + lenStr -= 2; + } else if(*p2parse == ']' && *(p2parse+1) == ' ') { + /* found end, just need to copy the ] and eat the SP */ + *pResult++ = *p2parse; + p2parse += 2; + lenStr -= 2; + bCont = 0; + } else { + *pResult++ = *p2parse++; + --lenStr; + } + } + } + + if(lenStr > 0 && *p2parse == ' ') { + ++p2parse; /* eat SP, but only if not at end of string */ + --lenStr; + } else { + iRet = 1; /* there MUST be an SP! */ + } + *pResult = '\0'; + + /* set the new parse pointer */ + *pp2parse = p2parse; + *pLenStr = lenStr; + return 0; +} + +/* parse a RFC5424-formatted syslog message. This function returns + * 0 if processing of the message shall continue and 1 if something + * went wrong and this messe should be ignored. This function has been + * implemented in the effort to support syslog-protocol. Please note that + * the name (parse *RFC*) stems from the hope that syslog-protocol will + * some time become an RFC. Do not confuse this with informational + * RFC 3164 (which is legacy syslog). + * + * currently supported format: + * + * VERSION SP TIMESTAMP SP HOSTNAME SP APP-NAME SP PROCID SP MSGID SP [SD-ID]s SP MSG + * + * is already stripped when this function is entered. VERSION already + * has been confirmed to be "1", but has NOT been stripped from the message. + * + * rger, 2005-11-24 + */ +static int parseRFCSyslogMsg(msg_t *pMsg, int flags) +{ + uchar *p2parse; + uchar *pBuf; + int lenMsg; + int bContParse = 1; + + BEGINfunc + assert(pMsg != NULL); + assert(pMsg->pszRawMsg != NULL); + p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ + lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI; + + /* do a sanity check on the version and eat it (the caller checked this already) */ + assert(p2parse[0] == '1' && p2parse[1] == ' '); + p2parse += 2; + lenMsg -= 2; + + /* Now get us some memory we can use as a work buffer while parsing. + * We simply allocated a buffer sufficiently large to hold all of the + * message, so we can not run into any troubles. I think this is + * more wise then to use individual buffers. + */ + if((pBuf = MALLOC(sizeof(uchar) * (lenMsg + 1))) == NULL) + return 1; + + /* IMPORTANT NOTE: + * Validation is not actually done below nor are any errors handled. I have + * NOT included this for the current proof of concept. However, it is strongly + * advisable to add it when this code actually goes into production. + * rgerhards, 2005-11-24 + */ + + /* TIMESTAMP */ + if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + if(flags & IGNDATE) { + /* we need to ignore the msg data, so simply copy over reception date */ + memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); + } + } else { + DBGPRINTF("no TIMESTAMP detected!\n"); + bContParse = 0; + } + + /* HOSTNAME */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetHOSTNAME(pMsg, pBuf, ustrlen(pBuf)); + } + + /* APP-NAME */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetAPPNAME(pMsg, (char*)pBuf); + } + + /* PROCID */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetPROCID(pMsg, (char*)pBuf); + } + + /* MSGID */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetMSGID(pMsg, (char*)pBuf); + } + + /* STRUCTURED-DATA */ + if(bContParse) { + parseRFCStructuredData(&p2parse, pBuf, &lenMsg); + MsgSetStructuredData(pMsg, (char*)pBuf); + } + + /* MSG */ + MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); + + free(pBuf); + ENDfunc + return 0; /* all ok */ +} + + +/*********************** END RFC5425 PARSER ******************************************/ + +/***************************RFC 5425 PARSER ******************************************************/ + + +/* parse a legay-formatted syslog message. This function returns + * 0 if processing of the message shall continue and 1 if something + * went wrong and this messe should be ignored. This function has been + * implemented in the effort to support syslog-protocol. + * rger, 2005-11-24 + * As of 2006-01-10, I am removing the logic to continue parsing only + * when a valid TIMESTAMP is detected. Validity of other fields already + * is ignored. This is due to the fact that the parser has grown smarter + * and is now more able to understand different dialects of the syslog + * message format. I do not expect any bad side effects of this change, + * but I thought I log it in this comment. + * rgerhards, 2006-01-10 + */ +static int parseLegacySyslogMsg(msg_t *pMsg, int flags) +{ + uchar *p2parse; + int lenMsg; + int bTAGCharDetected; + int i; /* general index for parsing */ + uchar bufParseTAG[CONF_TAG_MAXSIZE]; + uchar bufParseHOSTNAME[CONF_TAG_HOSTNAME]; + BEGINfunc + + assert(pMsg != NULL); + assert(pMsg->pszRawMsg != NULL); + lenMsg = pMsg->iLenRawMsg - (pMsg->offAfterPRI + 1); + p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ + + /* Check to see if msg contains a timestamp. We start by assuming + * that the message timestamp is the time of reception (which we + * generated ourselfs and then try to actually find one inside the + * message. There we go from high-to low precison and are done + * when we find a matching one. -- rgerhards, 2008-09-16 + */ + if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + /* we are done - parse pointer is moved by ParseTIMESTAMP3339 */; + } else if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; + } else if(*p2parse == ' ' && lenMsg > 1) { /* try to see if it is slighly malformed - HP procurve seems to do that sometimes */ + ++p2parse; /* move over space */ + --lenMsg; + if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + /* indeed, we got it! */ + /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; + } else {/* parse pointer needs to be restored, as we moved it off-by-one + * for this try. + */ + --p2parse; + ++lenMsg; + } + } + + if(flags & IGNDATE) { + /* we need to ignore the msg data, so simply copy over reception date */ + memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); + } + + /* rgerhards, 2006-03-13: next, we parse the hostname and tag. But we + * do this only when the user has not forbidden this. I now introduce some + * code that allows a user to configure rsyslogd to treat the rest of the + * message as MSG part completely. In this case, the hostname will be the + * machine that we received the message from and the tag will be empty. This + * is meant to be an interim solution, but for now it is in the code. + */ + if(bParseHOSTNAMEandTAG && !(flags & INTERNAL_MSG)) { + /* parse HOSTNAME - but only if this is network-received! + * rger, 2005-11-14: we still have a problem with BSD messages. These messages + * do NOT include a host name. In most cases, this leads to the TAG to be treated + * as hostname and the first word of the message as the TAG. Clearly, this is not + * of advantage ;) I think I have now found a way to handle this situation: there + * are certain characters which are frequently used in TAG (e.g. ':'), which are + * *invalid* in host names. So while parsing the hostname, I check for these characters. + * If I find them, I set a simple flag but continue. After parsing, I check the flag. + * If it was set, then we most probably do not have a hostname but a TAG. Thus, I change + * the fields. I think this logic shall work with any type of syslog message. + * rgerhards, 2009-06-23: and I now have extended this logic to every character + * that is not a valid hostname. + */ + bTAGCharDetected = 0; + if(lenMsg > 0 && flags & PARSE_HOSTNAME) { + i = 0; + while(i < lenMsg && (isalnum(p2parse[i]) || p2parse[i] == '.' || p2parse[i] == '.' + || p2parse[i] == '_' || p2parse[i] == '-') && i < CONF_TAG_MAXSIZE) { + bufParseHOSTNAME[i] = p2parse[i]; + ++i; + } + + if(i > 0 && p2parse[i] == ' ' && isalnum(p2parse[i-1])) { + /* we got a hostname! */ + p2parse += i + 1; /* "eat" it (including SP delimiter) */ + lenMsg -= i + 1; + bufParseHOSTNAME[i] = '\0'; + MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i); + } + } + + /* now parse TAG - that should be present in message from all sources. + * This code is somewhat not compliant with RFC 3164. As of 3164, + * the TAG field is ended by any non-alphanumeric character. In + * practice, however, the TAG often contains dashes and other things, + * which would end the TAG. So it is not desirable. As such, we only + * accept colon and SP to be terminators. Even there is a slight difference: + * a colon is PART of the TAG, while a SP is NOT part of the tag + * (it is CONTENT). Starting 2008-04-04, we have removed the 32 character + * size limit (from RFC3164) on the tag. This had bad effects on existing + * envrionments, as sysklogd didn't obey it either (probably another bug + * in RFC3164...). We now receive the full size, but will modify the + * outputs so that only 32 characters max are used by default. + */ + i = 0; + while(lenMsg > 0 && *p2parse != ':' && *p2parse != ' ' && i < CONF_TAG_MAXSIZE) { + bufParseTAG[i++] = *p2parse++; + --lenMsg; + } + if(lenMsg > 0 && *p2parse == ':') { + ++p2parse; + --lenMsg; + bufParseTAG[i++] = ':'; + } + + /* no TAG can only be detected if the message immediatly ends, in which case an empty TAG + * is considered OK. So we do not need to check for empty TAG. -- rgerhards, 2009-06-23 + */ + bufParseTAG[i] = '\0'; /* terminate string */ + MsgSetTAG(pMsg, bufParseTAG, i); + } else {/* we enter this code area when the user has instructed rsyslog NOT + * to parse HOSTNAME and TAG - rgerhards, 2006-03-13 + */ + if(!(flags & INTERNAL_MSG)) { + DBGPRINTF("HOSTNAME and TAG not parsed by user configuraton.\n"); + } + } + + /* The rest is the actual MSG */ + MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); + + ENDfunc + return 0; /* all ok */ +} + + +/***************************END RFC 5425 PARSER ******************************************************/ + /* uncompress a received message if it is compressed. * pMsg->pszRawMsg buffer is updated. -- cgit v1.2.3 From 6f511cecfae3592f271627ebcb41e6a8c4f831e9 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 3 Nov 2009 12:39:48 +0100 Subject: more cleanup and working towards a parser module calling interface I cleaned up a lot of config variable access along the way. This version compiles and runs, but does not yet offer any enhanced functionality. pmrfc5424 is just a dummy that is not yet being used. --- runtime/parser.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 14 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 89e59f87..e2ad69e4 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -39,8 +39,10 @@ #include "obj.h" #include "datetime.h" #include "errmsg.h" +#include "parser.h" #include "unicode-helper.h" #include "dirty.h" +#include "cfsysline.h" /* some defines */ #define DEFUPRI (LOG_USER|LOG_NOTICE) @@ -52,23 +54,22 @@ DEFobjCurrIf(errmsg) DEFobjCurrIf(datetime) /* static data */ +static int bParseHOSTNAMEandTAG; /* cache for the equally-named global param - performance enhancement */ +/* config data */ +static uchar cCCEscapeChar = '#';/* character to be used to start an escape sequence for control chars */ +static int bEscapeCCOnRcv = 1; /* escape control characters on reception: 0 - no, 1 - yes */ +static int bDropTrailingLF = 1; /* drop trailing LF's on reception? */ -/* this is a dummy class init +/* we need to provide standard constructors and destructors, even though + * we only have static methods. The framework requires that. */ -rsRetVal parserClassInit(void) -{ - DEFiRet; +BEGINobjConstruct(parser) /* be sure to specify the object type also in END macro! */ +ENDobjConstruct(parser) - /* request objects we use */ - CHKiRet(objGetObjInterface(&obj)); /* this provides the root pointer for all other queries */ - CHKiRet(objUse(glbl, CORE_COMPONENT)); - CHKiRet(objUse(errmsg, CORE_COMPONENT)); - CHKiRet(objUse(datetime, CORE_COMPONENT)); -// TODO: free components! see action.c -finalize_it: - RETiRet; -} +BEGINobjDestruct(parser) /* be sure to specify the object type also in END and CODESTART macros! */ +CODESTARTobjDestruct(parser) +ENDobjDestruct(parser) /***************************RFC 5425 PARSER ******************************************************/ @@ -643,7 +644,8 @@ finalize_it: * extended to support configured parsers. * rgerhards, 2008-10-09 */ -rsRetVal parseMsg(msg_t *pMsg) +static rsRetVal +ParseMsg(msg_t *pMsg) { DEFiRet; uchar *msg; @@ -709,3 +711,57 @@ rsRetVal parseMsg(msg_t *pMsg) finalize_it: RETiRet; } + + +/* queryInterface function-- rgerhards, 2009-11-03 + */ +BEGINobjQueryInterface(parser) +CODESTARTobjQueryInterface(parser) + if(pIf->ifVersion != parserCURR_IF_VERSION) { /* check for current version, increment on each change */ + ABORT_FINALIZE(RS_RET_INTERFACE_NOT_SUPPORTED); + } + + /* ok, we have the right interface, so let's fill it + * Please note that we may also do some backwards-compatibility + * work here (if we can support an older interface version - that, + * of course, also affects the "if" above). + */ + pIf->ParseMsg = ParseMsg; +finalize_it: +ENDobjQueryInterface(parser) + + + +/* Reset config variables to default values. + * rgerhards, 2007-07-17 + */ +static rsRetVal +resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unused)) *pVal) +{ + cCCEscapeChar = '#'; + bEscapeCCOnRcv = 1; /* default is to escape control characters */ + bDropTrailingLF = 1; /* default is to drop trailing LF's on reception */ + + return RS_RET_OK; +} + + +/* Initialize the parser class. Must be called as the very first method + * before anything else is called inside this class. + * rgerhards, 2009-11-02 + */ +//BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ +BEGINAbstractObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ + /* request objects we use */ + CHKiRet(objUse(glbl, CORE_COMPONENT)); + CHKiRet(objUse(errmsg, CORE_COMPONENT)); + CHKiRet(objUse(datetime, CORE_COMPONENT)); + + bParseHOSTNAMEandTAG = glbl.GetParseHOSTNAMEandTAG(); /* cache value, is set only during rsyslogd option processing */ + + CHKiRet(regCfSysLineHdlr((uchar *)"controlcharacterescapeprefix", 0, eCmdHdlrGetChar, NULL, &cCCEscapeChar, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscapeCCOnRcv, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL)); +ENDObjClassInit(parser) + -- cgit v1.2.3 From b1db196953713dd09c499a3edf81347bd903c19e Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 3 Nov 2009 18:44:02 +0100 Subject: one step closer to dynamically loadable parsers This is a milestone commit, which adds new code that breaks nothing, but also does not add any visible change. Just prep work... --- runtime/parser.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 159 insertions(+), 6 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index e2ad69e4..f59d1974 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -55,20 +55,113 @@ DEFobjCurrIf(datetime) /* static data */ static int bParseHOSTNAMEandTAG; /* cache for the equally-named global param - performance enhancement */ + /* config data */ static uchar cCCEscapeChar = '#';/* character to be used to start an escape sequence for control chars */ static int bEscapeCCOnRcv = 1; /* escape control characters on reception: 0 - no, 1 - yes */ static int bDropTrailingLF = 1; /* drop trailing LF's on reception? */ +/* we create a small helper list of parsers so that we can obtain their + * handles if the config needs one. This is also used to unload all modules on + * shutdown. + */ +parserList_t *pParsLstRoot = NULL; + + +/* intialize (but NOT allocate) a parser list. Primarily meant as a hook + * which can be used to extend the list in the future. So far, just sets + * it to NULL. + */ +static rsRetVal +InitParserList(parserList_t **pListRoot) +{ + *pListRoot = NULL; + return RS_RET_OK; +} + -/* we need to provide standard constructors and destructors, even though - * we only have static methods. The framework requires that. +/* Add a parser to the list. We use a VERY simple and ineffcient algorithm, + * but it is employed only for a few milliseconds during config processing. So + * I prefer to keep it very simple and with simple data structures. Unfortunately, + * we need to preserve the order, but I don't like to add a tail pointer as that + * would require a container object. So I do the extra work to skip to the tail + * when adding elements... + * rgerhards, 2009-11-03 */ +static rsRetVal +AddParserToList(parserList_t **ppListRoot, parser_t *pParser) +{ + parserList_t *pThis; + parserList_t *pTail; + DEFiRet; + + CHKmalloc(pThis = MALLOC(sizeof(parserList_t))); + pThis->pParser = pParser; + pThis->pNext = NULL; + + if(*ppListRoot == NULL) { + pThis->pNext = *ppListRoot; + *ppListRoot = pThis; + } else { + /* find tail first */ + for(pTail = *ppListRoot ; pTail->pNext != NULL ; pTail = pTail->pNext) + /* just search, do nothing else */; + /* add at tail */ + pTail->pNext = pThis; + } + +finalize_it: + RETiRet; +} + + +/* find a parser based on the provided name */ +static rsRetVal +FindParser(parser_t **ppParser, uchar *pName) +{ + parserList_t *pThis; + DEFiRet; + + for(pThis = pParsLstRoot ; pThis != NULL ; pThis = pThis->pNext) { + if(ustrcmp(pThis->pParser->pName, pName) == 0) { + *ppParser = pThis->pParser; + FINALIZE; /* found it, iRet still eq. OK! */ + } + } + + iRet = RS_RET_PARSER_NOT_FOUND; + +finalize_it: + RETiRet; +} + + +/* --- END helper functions for parser list handling --- */ + + BEGINobjConstruct(parser) /* be sure to specify the object type also in END macro! */ ENDobjConstruct(parser) +/* ConstructionFinalizer. The most important chore is to add the parser object + * to our global list of available parsers. + * rgerhards, 2009-11-03 + */ +rsRetVal parserConstructFinalize(parser_t *pThis) +{ + DEFiRet; + + ISOBJ_TYPE_assert(pThis, parser); + CHKiRet(AddParserToList(&pParsLstRoot, pThis)); + DBGPRINTF("parser '%s' added to list of available parser\n", pThis->pName); + +finalize_it: + RETiRet; +} + BEGINobjDestruct(parser) /* be sure to specify the object type also in END and CODESTART macros! */ CODESTARTobjDestruct(parser) + free(pThis->pName); + //TODO: free module! ENDobjDestruct(parser) /***************************RFC 5425 PARSER ******************************************************/ @@ -536,7 +629,7 @@ finalize_it: * rgerhards, 2007-09-14 */ static inline rsRetVal -sanitizeMessage(msg_t *pMsg) +SanitizeMsg(msg_t *pMsg) { DEFiRet; uchar *pszMsg; @@ -656,7 +749,7 @@ ParseMsg(msg_t *pMsg) if(pMsg->iLenRawMsg == 0) ABORT_FINALIZE(RS_RET_EMPTY_MSG); - CHKiRet(sanitizeMessage(pMsg)); + CHKiRet(SanitizeMsg(pMsg)); /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ DBGPRINTF("msg parser: flags %x, from '%s', msg '%s'\n", pMsg->msgFlags, getRcvFrom(pMsg), pMsg->pszRawMsg); @@ -712,6 +805,54 @@ finalize_it: RETiRet; } +/* set the parser name - string is copied over, call can continue to use it, + * but must free it if desired. + */ +static rsRetVal +SetName(parser_t *pThis, uchar *name) +{ + DEFiRet; + + ISOBJ_TYPE_assert(pThis, parser); + assert(name != NULL); + + if(pThis->pName != NULL) { + free(pThis->pName); + pThis->pName = NULL; + } + + CHKmalloc(pThis->pName = ustrdup(name)); + +finalize_it: + RETiRet; +} + + +/* set a pointer to "our" module. Note that no module + * pointer must already be set. + */ +static rsRetVal +SetModPtr(parser_t *pThis, modInfo_t *pMod) +{ + ISOBJ_TYPE_assert(pThis, parser); + assert(pMod != NULL); + assert(pThis->pModule == NULL); + pThis->pModule = pMod; + return RS_RET_OK; +} + + +/* Specify if we should do standard message sanitazion before we pass the data + * down to the parser. + */ +static rsRetVal +SetDoSanitazion(parser_t *pThis, int bDoIt) +{ + ISOBJ_TYPE_assert(pThis, parser); + pThis->bDoSanitazion = bDoIt; + return RS_RET_OK; +} + /* queryInterface function-- rgerhards, 2009-11-03 */ @@ -726,7 +867,17 @@ CODESTARTobjQueryInterface(parser) * work here (if we can support an older interface version - that, * of course, also affects the "if" above). */ + pIf->Construct = parserConstruct; + pIf->ConstructFinalize = parserConstructFinalize; + pIf->Destruct = parserDestruct; + pIf->SetName = SetName; + pIf->SetModPtr = SetModPtr; + pIf->SetDoSanitazion = SetDoSanitazion; pIf->ParseMsg = ParseMsg; + pIf->SanitizeMsg = SanitizeMsg; + pIf->InitParserList = InitParserList; + pIf->AddParserToList = AddParserToList; + pIf->FindParser = FindParser; finalize_it: ENDobjQueryInterface(parser) @@ -750,8 +901,8 @@ resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unus * before anything else is called inside this class. * rgerhards, 2009-11-02 */ -//BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ -BEGINAbstractObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ +BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ +//BEGINAbstractObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ /* request objects we use */ CHKiRet(objUse(glbl, CORE_COMPONENT)); CHKiRet(objUse(errmsg, CORE_COMPONENT)); @@ -763,5 +914,7 @@ BEGINAbstractObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscapeCCOnRcv, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL)); + + InitParserList(&pParsLstRoot); ENDObjClassInit(parser) -- cgit v1.2.3 From 1b7f5c54684db29c096e09238648a45dce78ebee Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 4 Nov 2009 10:40:27 +0100 Subject: moved rfc3164/5424 code to new parser modules another milestone commit: the program works, the new interface is used, some more cleanup is needed and the per-ruleset config options are still missing. But we are getting closer... --- runtime/parser.c | 476 ++++++++----------------------------------------------- 1 file changed, 69 insertions(+), 407 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index f59d1974..2c764739 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -40,6 +40,7 @@ #include "datetime.h" #include "errmsg.h" #include "parser.h" +#include "ruleset.h" #include "unicode-helper.h" #include "dirty.h" #include "cfsysline.h" @@ -52,21 +53,25 @@ DEFobjStaticHelpers DEFobjCurrIf(glbl) DEFobjCurrIf(errmsg) DEFobjCurrIf(datetime) +DEFobjCurrIf(ruleset) /* static data */ -static int bParseHOSTNAMEandTAG; /* cache for the equally-named global param - performance enhancement */ /* config data */ static uchar cCCEscapeChar = '#';/* character to be used to start an escape sequence for control chars */ static int bEscapeCCOnRcv = 1; /* escape control characters on reception: 0 - no, 1 - yes */ static int bDropTrailingLF = 1; /* drop trailing LF's on reception? */ -/* we create a small helper list of parsers so that we can obtain their - * handles if the config needs one. This is also used to unload all modules on - * shutdown. +/* This is the list of all parsers known to us. + * This is also used to unload all modules on shutdown. */ parserList_t *pParsLstRoot = NULL; +/* this is the list of the default parsers, to be used if no others + * are specified. + */ +parserList_t *pDfltParsLst = NULL; + /* intialize (but NOT allocate) a parser list. Primarily meant as a hook * which can be used to extend the list in the future. So far, just sets @@ -139,6 +144,27 @@ finalize_it: /* --- END helper functions for parser list handling --- */ +/* Add a an already existing parser to the default list. As usual, order + * of calls is important (most importantly, that means the legacy parser, + * which can process everything, MUST be added last!). + * rgerhards, 2009-11-04 + */ +static rsRetVal +AddDfltParser(uchar *pName) +{ + parser_t *pParser; + DEFiRet; + + CHKiRet(FindParser(&pParser, pName)); + CHKiRet(AddParserToList(&pDfltParsLst, pParser)); + dbgprintf("Parser '%s' added to default parser set.\n", pName); + +finalize_it: + RETiRet; +} + + + BEGINobjConstruct(parser) /* be sure to specify the object type also in END macro! */ ENDobjConstruct(parser) @@ -152,7 +178,7 @@ rsRetVal parserConstructFinalize(parser_t *pThis) ISOBJ_TYPE_assert(pThis, parser); CHKiRet(AddParserToList(&pParsLstRoot, pThis)); - DBGPRINTF("parser '%s' added to list of available parser\n", pThis->pName); + DBGPRINTF("Parser '%s' added to list of available parsers.\n", pThis->pName); finalize_it: RETiRet; @@ -164,383 +190,6 @@ CODESTARTobjDestruct(parser) //TODO: free module! ENDobjDestruct(parser) -/***************************RFC 5425 PARSER ******************************************************/ - - -/* Helper to parseRFCSyslogMsg. This function parses a field up to - * (and including) the SP character after it. The field contents is - * returned in a caller-provided buffer. The parsepointer is advanced - * to after the terminating SP. The caller must ensure that the - * provided buffer is large enough to hold the to be extracted value. - * Returns 0 if everything is fine or 1 if either the field is not - * SP-terminated or any other error occurs. -- rger, 2005-11-24 - * The function now receives the size of the string and makes sure - * that it does not process more than that. The *pLenStr counter is - * updated on exit. -- rgerhards, 2009-09-23 - */ -static int parseRFCField(uchar **pp2parse, uchar *pResult, int *pLenStr) -{ - uchar *p2parse; - int iRet = 0; - - assert(pp2parse != NULL); - assert(*pp2parse != NULL); - assert(pResult != NULL); - - p2parse = *pp2parse; - - /* this is the actual parsing loop */ - while(*pLenStr > 0 && *p2parse != ' ') { - *pResult++ = *p2parse++; - --(*pLenStr); - } - - if(*pLenStr > 0 && *p2parse == ' ') { - ++p2parse; /* eat SP, but only if not at end of string */ - --(*pLenStr); - } else { - iRet = 1; /* there MUST be an SP! */ - } - *pResult = '\0'; - - /* set the new parse pointer */ - *pp2parse = p2parse; - return 0; -} - - -/* Helper to parseRFCSyslogMsg. This function parses the structured - * data field of a message. It does NOT parse inside structured data, - * just gets the field as whole. Parsing the single entities is left - * to other functions. The parsepointer is advanced - * to after the terminating SP. The caller must ensure that the - * provided buffer is large enough to hold the to be extracted value. - * Returns 0 if everything is fine or 1 if either the field is not - * SP-terminated or any other error occurs. -- rger, 2005-11-24 - * The function now receives the size of the string and makes sure - * that it does not process more than that. The *pLenStr counter is - * updated on exit. -- rgerhards, 2009-09-23 - */ -static int parseRFCStructuredData(uchar **pp2parse, uchar *pResult, int *pLenStr) -{ - uchar *p2parse; - int bCont = 1; - int iRet = 0; - int lenStr; - - assert(pp2parse != NULL); - assert(*pp2parse != NULL); - assert(pResult != NULL); - - p2parse = *pp2parse; - lenStr = *pLenStr; - - /* this is the actual parsing loop - * Remeber: structured data starts with [ and includes any characters - * until the first ] followed by a SP. There may be spaces inside - * structured data. There may also be \] inside the structured data, which - * do NOT terminate an element. - */ - if(lenStr == 0 || *p2parse != '[') - return 1; /* this is NOT structured data! */ - - if(*p2parse == '-') { /* empty structured data? */ - *pResult++ = '-'; - ++p2parse; - --lenStr; - } else { - while(bCont) { - if(lenStr < 2) { - /* we now need to check if we have only structured data */ - if(lenStr > 0 && *p2parse == ']') { - *pResult++ = *p2parse; - p2parse++; - lenStr--; - bCont = 0; - } else { - iRet = 1; /* this is not valid! */ - bCont = 0; - } - } else if(*p2parse == '\\' && *(p2parse+1) == ']') { - /* this is escaped, need to copy both */ - *pResult++ = *p2parse++; - *pResult++ = *p2parse++; - lenStr -= 2; - } else if(*p2parse == ']' && *(p2parse+1) == ' ') { - /* found end, just need to copy the ] and eat the SP */ - *pResult++ = *p2parse; - p2parse += 2; - lenStr -= 2; - bCont = 0; - } else { - *pResult++ = *p2parse++; - --lenStr; - } - } - } - - if(lenStr > 0 && *p2parse == ' ') { - ++p2parse; /* eat SP, but only if not at end of string */ - --lenStr; - } else { - iRet = 1; /* there MUST be an SP! */ - } - *pResult = '\0'; - - /* set the new parse pointer */ - *pp2parse = p2parse; - *pLenStr = lenStr; - return 0; -} - -/* parse a RFC5424-formatted syslog message. This function returns - * 0 if processing of the message shall continue and 1 if something - * went wrong and this messe should be ignored. This function has been - * implemented in the effort to support syslog-protocol. Please note that - * the name (parse *RFC*) stems from the hope that syslog-protocol will - * some time become an RFC. Do not confuse this with informational - * RFC 3164 (which is legacy syslog). - * - * currently supported format: - * - * VERSION SP TIMESTAMP SP HOSTNAME SP APP-NAME SP PROCID SP MSGID SP [SD-ID]s SP MSG - * - * is already stripped when this function is entered. VERSION already - * has been confirmed to be "1", but has NOT been stripped from the message. - * - * rger, 2005-11-24 - */ -static int parseRFCSyslogMsg(msg_t *pMsg, int flags) -{ - uchar *p2parse; - uchar *pBuf; - int lenMsg; - int bContParse = 1; - - BEGINfunc - assert(pMsg != NULL); - assert(pMsg->pszRawMsg != NULL); - p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ - lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI; - - /* do a sanity check on the version and eat it (the caller checked this already) */ - assert(p2parse[0] == '1' && p2parse[1] == ' '); - p2parse += 2; - lenMsg -= 2; - - /* Now get us some memory we can use as a work buffer while parsing. - * We simply allocated a buffer sufficiently large to hold all of the - * message, so we can not run into any troubles. I think this is - * more wise then to use individual buffers. - */ - if((pBuf = MALLOC(sizeof(uchar) * (lenMsg + 1))) == NULL) - return 1; - - /* IMPORTANT NOTE: - * Validation is not actually done below nor are any errors handled. I have - * NOT included this for the current proof of concept. However, it is strongly - * advisable to add it when this code actually goes into production. - * rgerhards, 2005-11-24 - */ - - /* TIMESTAMP */ - if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - if(flags & IGNDATE) { - /* we need to ignore the msg data, so simply copy over reception date */ - memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); - } - } else { - DBGPRINTF("no TIMESTAMP detected!\n"); - bContParse = 0; - } - - /* HOSTNAME */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetHOSTNAME(pMsg, pBuf, ustrlen(pBuf)); - } - - /* APP-NAME */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetAPPNAME(pMsg, (char*)pBuf); - } - - /* PROCID */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetPROCID(pMsg, (char*)pBuf); - } - - /* MSGID */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetMSGID(pMsg, (char*)pBuf); - } - - /* STRUCTURED-DATA */ - if(bContParse) { - parseRFCStructuredData(&p2parse, pBuf, &lenMsg); - MsgSetStructuredData(pMsg, (char*)pBuf); - } - - /* MSG */ - MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); - - free(pBuf); - ENDfunc - return 0; /* all ok */ -} - - -/*********************** END RFC5425 PARSER ******************************************/ - -/***************************RFC 5425 PARSER ******************************************************/ - - -/* parse a legay-formatted syslog message. This function returns - * 0 if processing of the message shall continue and 1 if something - * went wrong and this messe should be ignored. This function has been - * implemented in the effort to support syslog-protocol. - * rger, 2005-11-24 - * As of 2006-01-10, I am removing the logic to continue parsing only - * when a valid TIMESTAMP is detected. Validity of other fields already - * is ignored. This is due to the fact that the parser has grown smarter - * and is now more able to understand different dialects of the syslog - * message format. I do not expect any bad side effects of this change, - * but I thought I log it in this comment. - * rgerhards, 2006-01-10 - */ -static int parseLegacySyslogMsg(msg_t *pMsg, int flags) -{ - uchar *p2parse; - int lenMsg; - int bTAGCharDetected; - int i; /* general index for parsing */ - uchar bufParseTAG[CONF_TAG_MAXSIZE]; - uchar bufParseHOSTNAME[CONF_TAG_HOSTNAME]; - BEGINfunc - - assert(pMsg != NULL); - assert(pMsg->pszRawMsg != NULL); - lenMsg = pMsg->iLenRawMsg - (pMsg->offAfterPRI + 1); - p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ - - /* Check to see if msg contains a timestamp. We start by assuming - * that the message timestamp is the time of reception (which we - * generated ourselfs and then try to actually find one inside the - * message. There we go from high-to low precison and are done - * when we find a matching one. -- rgerhards, 2008-09-16 - */ - if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - /* we are done - parse pointer is moved by ParseTIMESTAMP3339 */; - } else if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; - } else if(*p2parse == ' ' && lenMsg > 1) { /* try to see if it is slighly malformed - HP procurve seems to do that sometimes */ - ++p2parse; /* move over space */ - --lenMsg; - if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - /* indeed, we got it! */ - /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; - } else {/* parse pointer needs to be restored, as we moved it off-by-one - * for this try. - */ - --p2parse; - ++lenMsg; - } - } - - if(flags & IGNDATE) { - /* we need to ignore the msg data, so simply copy over reception date */ - memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); - } - - /* rgerhards, 2006-03-13: next, we parse the hostname and tag. But we - * do this only when the user has not forbidden this. I now introduce some - * code that allows a user to configure rsyslogd to treat the rest of the - * message as MSG part completely. In this case, the hostname will be the - * machine that we received the message from and the tag will be empty. This - * is meant to be an interim solution, but for now it is in the code. - */ - if(bParseHOSTNAMEandTAG && !(flags & INTERNAL_MSG)) { - /* parse HOSTNAME - but only if this is network-received! - * rger, 2005-11-14: we still have a problem with BSD messages. These messages - * do NOT include a host name. In most cases, this leads to the TAG to be treated - * as hostname and the first word of the message as the TAG. Clearly, this is not - * of advantage ;) I think I have now found a way to handle this situation: there - * are certain characters which are frequently used in TAG (e.g. ':'), which are - * *invalid* in host names. So while parsing the hostname, I check for these characters. - * If I find them, I set a simple flag but continue. After parsing, I check the flag. - * If it was set, then we most probably do not have a hostname but a TAG. Thus, I change - * the fields. I think this logic shall work with any type of syslog message. - * rgerhards, 2009-06-23: and I now have extended this logic to every character - * that is not a valid hostname. - */ - bTAGCharDetected = 0; - if(lenMsg > 0 && flags & PARSE_HOSTNAME) { - i = 0; - while(i < lenMsg && (isalnum(p2parse[i]) || p2parse[i] == '.' || p2parse[i] == '.' - || p2parse[i] == '_' || p2parse[i] == '-') && i < CONF_TAG_MAXSIZE) { - bufParseHOSTNAME[i] = p2parse[i]; - ++i; - } - - if(i > 0 && p2parse[i] == ' ' && isalnum(p2parse[i-1])) { - /* we got a hostname! */ - p2parse += i + 1; /* "eat" it (including SP delimiter) */ - lenMsg -= i + 1; - bufParseHOSTNAME[i] = '\0'; - MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i); - } - } - - /* now parse TAG - that should be present in message from all sources. - * This code is somewhat not compliant with RFC 3164. As of 3164, - * the TAG field is ended by any non-alphanumeric character. In - * practice, however, the TAG often contains dashes and other things, - * which would end the TAG. So it is not desirable. As such, we only - * accept colon and SP to be terminators. Even there is a slight difference: - * a colon is PART of the TAG, while a SP is NOT part of the tag - * (it is CONTENT). Starting 2008-04-04, we have removed the 32 character - * size limit (from RFC3164) on the tag. This had bad effects on existing - * envrionments, as sysklogd didn't obey it either (probably another bug - * in RFC3164...). We now receive the full size, but will modify the - * outputs so that only 32 characters max are used by default. - */ - i = 0; - while(lenMsg > 0 && *p2parse != ':' && *p2parse != ' ' && i < CONF_TAG_MAXSIZE) { - bufParseTAG[i++] = *p2parse++; - --lenMsg; - } - if(lenMsg > 0 && *p2parse == ':') { - ++p2parse; - --lenMsg; - bufParseTAG[i++] = ':'; - } - - /* no TAG can only be detected if the message immediatly ends, in which case an empty TAG - * is considered OK. So we do not need to check for empty TAG. -- rgerhards, 2009-06-23 - */ - bufParseTAG[i] = '\0'; /* terminate string */ - MsgSetTAG(pMsg, bufParseTAG, i); - } else {/* we enter this code area when the user has instructed rsyslog NOT - * to parse HOSTNAME and TAG - rgerhards, 2006-03-13 - */ - if(!(flags & INTERNAL_MSG)) { - DBGPRINTF("HOSTNAME and TAG not parsed by user configuraton.\n"); - } - } - - /* The rest is the actual MSG */ - MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); - - ENDfunc - return 0; /* all ok */ -} - - -/***************************END RFC 5425 PARSER ******************************************************/ - /* uncompress a received message if it is compressed. * pMsg->pszRawMsg buffer is updated. @@ -645,10 +294,6 @@ SanitizeMsg(msg_t *pMsg) assert(pMsg != NULL); assert(pMsg->iLenRawMsg > 0); -# ifdef USE_NETZIP - CHKiRet(uncompressMessage(pMsg)); -# endif - pszMsg = pMsg->pszRawMsg; lenMsg = pMsg->iLenRawMsg; @@ -740,15 +385,22 @@ finalize_it: static rsRetVal ParseMsg(msg_t *pMsg) { - DEFiRet; uchar *msg; int pri; int lenMsg; int iPriText; + rsRetVal localRet; + parserList_t *pParserList; + static int iErrMsgRateLimiter = 0; + DEFiRet; if(pMsg->iLenRawMsg == 0) ABORT_FINALIZE(RS_RET_EMPTY_MSG); +# ifdef USE_NETZIP + CHKiRet(uncompressMessage(pMsg)); +# endif + CHKiRet(SanitizeMsg(pMsg)); /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ @@ -777,25 +429,35 @@ ParseMsg(msg_t *pMsg) pMsg->iSeverity = LOG_PRI(pri); MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg); - /* rger 2005-11-24 (happy thanksgiving!): we now need to check if we have - * a traditional syslog message or one formatted according to syslog-protocol. - * We need to apply different parsers depending on that. We use the - * -protocol VERSION field for the detection. + /* we now need to go through our list of parsers and see which one is capable of + * parsing the message. */ - if(msg[0] == '1' && msg[1] == ' ') { - dbgprintf("Message has syslog-protocol format.\n"); - setProtocolVersion(pMsg, 1); - if(parseRFCSyslogMsg(pMsg, pMsg->msgFlags) == 1) { - msgDestruct(&pMsg); - ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! - } - } else { /* we have legacy syslog */ - dbgprintf("Message has legacy syslog format.\n"); - setProtocolVersion(pMsg, 0); - if(parseLegacySyslogMsg(pMsg, pMsg->msgFlags) == 1) { - msgDestruct(&pMsg); - ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! + pParserList = ruleset.GetParserList(pMsg); + if(pParserList == NULL) + pParserList = pDfltParsLst; + DBGPRINTF("Using parser list %p%s.\n", pParserList, + (pParserList == pDfltParsLst) ? " (the default list)" : ""); + + while(pParserList != NULL) { + localRet = pParserList->pParser->pModule->mod.pm.parse(pMsg); + if(localRet != RS_RET_COULD_NOT_PARSE) + break; + pParserList = pParserList->pNext; + } + + /* We need to log a warning message and drop the message if we did not find a parser. + * Note that we log at most the first 1000 message, as this may very well be a problem + * that causes a message generation loop. We do not synchronize that counter, it doesn't + * matter if we log a handful messages more than we should... + */ + if(localRet != RS_RET_OK) { + if(++iErrMsgRateLimiter > 1000) { + errmsg.LogError(0, localRet, "Error: one message could not be processed by " + "any parser, message is being discarded (start of raw msg: '%50s')", + pMsg->pszRawMsg); } + DBGPRINTF("No parser could process the message (state %d), we need to discard it.\n", localRet); + ABORT_FINALIZE(localRet); } /* finalize message object */ @@ -877,6 +539,7 @@ CODESTARTobjQueryInterface(parser) pIf->SanitizeMsg = SanitizeMsg; pIf->InitParserList = InitParserList; pIf->AddParserToList = AddParserToList; + pIf->AddDfltParser = AddDfltParser; pIf->FindParser = FindParser; finalize_it: ENDobjQueryInterface(parser) @@ -902,13 +565,11 @@ resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unus * rgerhards, 2009-11-02 */ BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ -//BEGINAbstractObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ /* request objects we use */ CHKiRet(objUse(glbl, CORE_COMPONENT)); CHKiRet(objUse(errmsg, CORE_COMPONENT)); CHKiRet(objUse(datetime, CORE_COMPONENT)); - - bParseHOSTNAMEandTAG = glbl.GetParseHOSTNAMEandTAG(); /* cache value, is set only during rsyslogd option processing */ + CHKiRet(objUse(ruleset, CORE_COMPONENT)); CHKiRet(regCfSysLineHdlr((uchar *)"controlcharacterescapeprefix", 0, eCmdHdlrGetChar, NULL, &cCCEscapeChar, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL)); @@ -916,5 +577,6 @@ BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL)); InitParserList(&pParsLstRoot); + InitParserList(&pDfltParsLst); ENDObjClassInit(parser) -- cgit v1.2.3 From ef661fe13c0ab5018afedb1cb5b8cffab05ad7c4 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 4 Nov 2009 11:33:09 +0100 Subject: finalized parser module calling interface looks like we are almost done and need only to add the ruleset parser-specific config options. --- runtime/parser.c | 93 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 26 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 2c764739..e87068fc 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -376,36 +376,19 @@ finalize_it: RETiRet; } - -/* Parse a received message. The object's rawmsg property is taken and - * parsed according to the relevant standards. This can later be - * extended to support configured parsers. - * rgerhards, 2008-10-09 +/* A standard parser to parse out the PRI. This is made available in + * this module as it is expected that allmost all parsers will need + * that functionality and so they do not need to implement it themsleves. */ -static rsRetVal -ParseMsg(msg_t *pMsg) +static inline rsRetVal +ParsePRI(msg_t *pMsg) { - uchar *msg; int pri; + uchar *msg; int lenMsg; int iPriText; - rsRetVal localRet; - parserList_t *pParserList; - static int iErrMsgRateLimiter = 0; DEFiRet; - if(pMsg->iLenRawMsg == 0) - ABORT_FINALIZE(RS_RET_EMPTY_MSG); - -# ifdef USE_NETZIP - CHKiRet(uncompressMessage(pMsg)); -# endif - - CHKiRet(SanitizeMsg(pMsg)); - - /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ - DBGPRINTF("msg parser: flags %x, from '%s', msg '%s'\n", pMsg->msgFlags, getRcvFrom(pMsg), pMsg->pszRawMsg); - /* pull PRI */ lenMsg = pMsg->iLenRawMsg; msg = pMsg->pszRawMsg; @@ -428,9 +411,43 @@ ParseMsg(msg_t *pMsg) pMsg->iFacility = LOG_FAC(pri); pMsg->iSeverity = LOG_PRI(pri); MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg); + RETiRet; +} + + +/* Parse a received message. The object's rawmsg property is taken and + * parsed according to the relevant standards. This can later be + * extended to support configured parsers. + * rgerhards, 2008-10-09 + */ +static rsRetVal +ParseMsg(msg_t *pMsg) +{ + rsRetVal localRet; + parserList_t *pParserList; + parser_t *pParser; + bool bIsSanitized; + bool bPRIisParsed; + static int iErrMsgRateLimiter = 0; + DEFiRet; + + if(pMsg->iLenRawMsg == 0) + ABORT_FINALIZE(RS_RET_EMPTY_MSG); + +# ifdef USE_NETZIP + CHKiRet(uncompressMessage(pMsg)); +# endif + + /* we take the risk to print a non-sanitized string, because this is the best we can get + * (and that functionality is too important for debugging to drop it...). + */ + DBGPRINTF("msg parser: flags %x, from '%s', msg '%.50s'\n", pMsg->msgFlags, + getRcvFrom(pMsg), pMsg->pszRawMsg); /* we now need to go through our list of parsers and see which one is capable of - * parsing the message. + * parsing the message. Note that the first parser that requires message sanitization + * will cause it to happen. After that, access to the unsanitized message is no + * loger possible. */ pParserList = ruleset.GetParserList(pMsg); if(pParserList == NULL) @@ -438,7 +455,18 @@ ParseMsg(msg_t *pMsg) DBGPRINTF("Using parser list %p%s.\n", pParserList, (pParserList == pDfltParsLst) ? " (the default list)" : ""); + bIsSanitized = FALSE; + bPRIisParsed = FALSE; while(pParserList != NULL) { + pParser = pParserList->pParser; + if(pParser->bDoSanitazion && bIsSanitized == FALSE) { + CHKiRet(SanitizeMsg(pMsg)); + if(pParser->bDoPRIParsing && bPRIisParsed == FALSE) { + CHKiRet(ParsePRI(pMsg)); + bPRIisParsed = TRUE; + } + bIsSanitized = TRUE; + } localRet = pParserList->pParser->pModule->mod.pm.parse(pMsg); if(localRet != RS_RET_COULD_NOT_PARSE) break; @@ -453,14 +481,14 @@ ParseMsg(msg_t *pMsg) if(localRet != RS_RET_OK) { if(++iErrMsgRateLimiter > 1000) { errmsg.LogError(0, localRet, "Error: one message could not be processed by " - "any parser, message is being discarded (start of raw msg: '%50s')", + "any parser, message is being discarded (start of raw msg: '%.50s')", pMsg->pszRawMsg); } DBGPRINTF("No parser could process the message (state %d), we need to discard it.\n", localRet); ABORT_FINALIZE(localRet); } - /* finalize message object */ + /* "finalize" message object */ pMsg->msgFlags &= ~NEEDS_PARSING; /* this message is now parsed */ finalize_it: @@ -516,6 +544,18 @@ SetDoSanitazion(parser_t *pThis, int bDoIt) } +/* Specify if we should do standard PRI parsing before we pass the data + * down to the parser module. + */ +static rsRetVal +SetDoPRIParsing(parser_t *pThis, int bDoIt) +{ + ISOBJ_TYPE_assert(pThis, parser); + pThis->bDoPRIParsing = bDoIt; + return RS_RET_OK; +} + + /* queryInterface function-- rgerhards, 2009-11-03 */ BEGINobjQueryInterface(parser) @@ -535,6 +575,7 @@ CODESTARTobjQueryInterface(parser) pIf->SetName = SetName; pIf->SetModPtr = SetModPtr; pIf->SetDoSanitazion = SetDoSanitazion; + pIf->SetDoPRIParsing = SetDoPRIParsing; pIf->ParseMsg = ParseMsg; pIf->SanitizeMsg = SanitizeMsg; pIf->InitParserList = InitParserList; -- cgit v1.2.3 From b6a92e2f23895cadf9f0ea2de64021912bd2eeb0 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 4 Nov 2009 14:21:08 +0100 Subject: added $RulesetParser config directive The implementation is now almost done and works, including doc. I now need to verify shutdown, guess there are some resource leaks left... --- runtime/parser.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index e87068fc..0689657f 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -450,9 +450,10 @@ ParseMsg(msg_t *pMsg) * loger possible. */ pParserList = ruleset.GetParserList(pMsg); - if(pParserList == NULL) + if(pParserList == NULL) { pParserList = pDfltParsLst; - DBGPRINTF("Using parser list %p%s.\n", pParserList, + } + DBGPRINTF("parse using parser list %p%s.\n", pParserList, (pParserList == pDfltParsLst) ? " (the default list)" : ""); bIsSanitized = FALSE; @@ -467,7 +468,8 @@ ParseMsg(msg_t *pMsg) } bIsSanitized = TRUE; } - localRet = pParserList->pParser->pModule->mod.pm.parse(pMsg); + localRet = pParser->pModule->mod.pm.parse(pMsg); + dbgprintf("Parser '%s' returned %d\n", pParser->pName, localRet); if(localRet != RS_RET_COULD_NOT_PARSE) break; pParserList = pParserList->pNext; -- cgit v1.2.3 From aa2e8ea15b2001f131ebd196c180cc82aceb57b4 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 4 Nov 2009 15:34:31 +0100 Subject: first complete implementation of loadable parser system I have now done the necessary cleanup. Looks like everything is in place. Unfortunately, I do not yet have any actual parser that is not built-in, but I think we can postpone working on that when the first one appears. I don't expect troubles in that case, but you never know ;) --- runtime/parser.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 0689657f..38f72986 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -85,6 +85,27 @@ InitParserList(parserList_t **pListRoot) } +/* destruct a parser list. The list elements are destroyed, but the parser objects + * themselves are not modified. (That is done at a late stage during rsyslogd + * shutdown and need not be considered here.) + */ +static rsRetVal +DestructParserList(parserList_t **ppListRoot) +{ + parserList_t *pParsLst; + parserList_t *pParsLstDel; + + pParsLst = *ppListRoot; + while(pParsLst != NULL) { + pParsLstDel = pParsLst; + pParsLst = pParsLst->pNext; + free(pParsLstDel); + } + *ppListRoot = NULL; + return RS_RET_OK; +} + + /* Add a parser to the list. We use a VERY simple and ineffcient algorithm, * but it is employed only for a few milliseconds during config processing. So * I prefer to keep it very simple and with simple data structures. Unfortunately, @@ -143,7 +164,6 @@ finalize_it: /* --- END helper functions for parser list handling --- */ - /* Add a an already existing parser to the default list. As usual, order * of calls is important (most importantly, that means the legacy parser, * which can process everything, MUST be added last!). @@ -186,8 +206,8 @@ finalize_it: BEGINobjDestruct(parser) /* be sure to specify the object type also in END and CODESTART macros! */ CODESTARTobjDestruct(parser) + dbgprintf("destructing parser '%s'\n", pThis->pName); free(pThis->pName); - //TODO: free module! ENDobjDestruct(parser) @@ -581,6 +601,7 @@ CODESTARTobjQueryInterface(parser) pIf->ParseMsg = ParseMsg; pIf->SanitizeMsg = SanitizeMsg; pIf->InitParserList = InitParserList; + pIf->DestructParserList = DestructParserList; pIf->AddParserToList = AddParserToList; pIf->AddDfltParser = AddDfltParser; pIf->FindParser = FindParser; @@ -602,6 +623,37 @@ resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unus return RS_RET_OK; } +/* This destroys the master parserlist and all of its parser entries. MUST only be + * done when the module is shut down. Parser modules are NOT unloaded, rsyslog + * does that at a later stage for all dynamically loaded modules. + */ +static void +destroyMasterParserList(void) +{ + parserList_t *pParsLst; + parserList_t *pParsLstDel; + + pParsLst = pParsLstRoot; + while(pParsLst != NULL) { + parserDestruct(&pParsLst->pParser); + pParsLstDel = pParsLst; + pParsLst = pParsLst->pNext; + free(pParsLstDel); + } +} + +/* Exit our class. + * rgerhards, 2009-11-04 + */ +BEGINObjClassExit(parser, OBJ_IS_CORE_MODULE) /* class, version */ + DestructParserList(&pDfltParsLst); + destroyMasterParserList(); + objRelease(glbl, CORE_COMPONENT); + objRelease(errmsg, CORE_COMPONENT); + objRelease(datetime, CORE_COMPONENT); + objRelease(ruleset, CORE_COMPONENT); +ENDObjClassExit(parser) + /* Initialize the parser class. Must be called as the very first method * before anything else is called inside this class. -- cgit v1.2.3