From ace4f2f75202aec39449dac11b9eb1deca7428d7 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 8 Oct 2008 18:55:11 +0200 Subject: reordered imudp processing. Message parsing is now done as part of main message queue worker processing (was part of the input thread) This should also improve performance, as potentially more work is done in parallel. --- runtime/parser.c | 314 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 runtime/parser.c (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c new file mode 100644 index 00000000..8c4272a0 --- /dev/null +++ b/runtime/parser.c @@ -0,0 +1,314 @@ +/* parser.c + * This module contains functions for message parsers. It still needs to be + * converted into an object (and much extended). + * + * Module begun 2008-10-09 by Rainer Gerhards (based on previous code from syslogd.c) + * + * Copyright 2008 Rainer Gerhards and Adiscon GmbH. + * + * This file is part of the rsyslog runtime library. + * + * The rsyslog runtime library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The rsyslog runtime library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the rsyslog runtime library. If not, see . + * + * A copy of the GPL can be found in the file "COPYING" in this distribution. + * A copy of the LGPL can be found in the file "COPYING.LESSER" in this distribution. + */ +#include "config.h" +#include +#include +#include +#ifdef USE_NETZIP +#include +#endif + +#include "rsyslog.h" +#include "dirty.h" +#include "msg.h" +#include "obj.h" +#include "errmsg.h" + +/* some defines */ +#define DEFUPRI (LOG_USER|LOG_NOTICE) + +#warning "msg object must be updated with new property for persisting the queue!" +/* definitions for objects we access */ +DEFobjStaticHelpers +DEFobjCurrIf(glbl) +DEFobjCurrIf(errmsg) + +/* static data */ + + +/* this is a dummy class init + */ +rsRetVal parserClassInit(void) +{ + DEFiRet; + + /* request objects we use */ + CHKiRet(objGetObjInterface(&obj)); /* this provides the root pointer for all other queries */ + CHKiRet(objUse(glbl, CORE_COMPONENT)); + CHKiRet(objUse(errmsg, CORE_COMPONENT)); +// TODO: free components! see action.c +finalize_it: + RETiRet; +} + + +/* uncompress a received message if it is compressed. + * pMsg->pszRawMsg buffer is updated. + * rgerhards, 2008-10-09 + */ +static inline rsRetVal uncompressMessage(msg_t *pMsg) +{ + DEFiRet; +# ifdef USE_NETZIP + uchar *deflateBuf = NULL; + uLongf iLenDefBuf; + uchar *pszMsg; + size_t lenMsg; + + assert(pMsg != NULL); + pszMsg = pMsg->pszRawMsg; + lenMsg = pMsg->iLenRawMsg; + + /* we first need to check if we have a compressed record. If so, + * we must decompress it. + */ + if(lenMsg > 0 && *pszMsg == 'z') { /* compressed data present? (do NOT change order if conditions!) */ + /* we have compressed data, so let's deflate it. We support a maximum + * message size of iMaxLine. If it is larger, an error message is logged + * and the message is dropped. We do NOT try to decompress larger messages + * as such might be used for denial of service. It might happen to later + * builds that such functionality be added as an optional, operator-configurable + * feature. + */ + int ret; + iLenDefBuf = glbl.GetMaxLine(); + CHKmalloc(deflateBuf = malloc(sizeof(uchar) * (iLenDefBuf + 1))); + ret = uncompress((uchar *) deflateBuf, &iLenDefBuf, (uchar *) pszMsg+1, lenMsg-1); + DBGPRINTF("Compressed message uncompressed with status %d, length: new %ld, old %d.\n", + ret, (long) iLenDefBuf, (int) (lenMsg-1)); + /* Now check if the uncompression worked. If not, there is not much we can do. In + * that case, we log an error message but ignore the message itself. Storing the + * compressed text is dangerous, as it contains control characters. So we do + * not do this. If someone would like to have a copy, this code here could be + * modified to do a hex-dump of the buffer in question. We do not include + * this functionality right now. + * rgerhards, 2006-12-07 + */ + if(ret != Z_OK) { + errmsg.LogError(0, NO_ERRCODE, "Uncompression of a message failed with return code %d " + "- enable debug logging if you need further information. " + "Message ignored.", ret); + FINALIZE; /* unconditional exit, nothing left to do... */ + } + free(pMsg->pszRawMsg); + pMsg->pszRawMsg = deflateBuf; + pMsg->iLenRawMsg = iLenDefBuf; + deflateBuf = NULL; /* logically "freed" - caller is now responsible */ + } +finalize_it: + if(deflateBuf != NULL) + free(deflateBuf); + +# else /* ifdef USE_NETZIP */ + + /* in this case, we still need to check if the message is compressed. If so, we must + * tell the user we can not accept it. + */ + if(len > 0 && *msg == 'z') { + errmsg.LogError(0, NO_ERRCODE, "Received a compressed message, but rsyslogd does not have compression " + "support enabled. The message will be ignored."); + ABORT_FINALIZE(RS_RET_NO_ZIP); + } + +# endif /* ifdef USE_NETZIP */ + + RETiRet; +} + + +/* sanitize a received message + * if a message gets to large during sanitization, it is truncated. This is + * as specified in the upcoming syslog RFC series. + * rgerhards, 2008-10-09 + * We check if we have a NUL character at the very end of the + * message. This seems to be a frequent problem with a number of senders. + * So I have now decided to drop these NULs. However, if they are intentional, + * that may cause us some problems, e.g. with syslog-sign. On the other hand, + * current code always has problems with intentional NULs (as it needs to escape + * them to prevent problems with the C string libraries), so that does not + * really matter. Just to be on the save side, we'll log destruction of such + * NULs in the debug log. + * rgerhards, 2007-09-14 + */ +static inline rsRetVal +sanitizeMessage(msg_t *pMsg) +{ + DEFiRet; + uchar *pszMsg; + uchar *pDst; /* destination for copy job */ + size_t lenMsg; + size_t iSrc; + size_t iDst; + size_t iMaxLine; + + assert(pMsg != NULL); + +# ifdef USE_NETZIP + CHKiRet(uncompressMessage(pMsg)); +# endif + + pszMsg = pMsg->pszRawMsg; + lenMsg = pMsg->iLenRawMsg; + + /* remove NUL character at end of message (see comment in function header) */ + if(pszMsg[lenMsg-1] == '\0') { + DBGPRINTF("dropped NUL at very end of message\n"); + lenMsg--; + } + + /* then we check if we need to drop trailing LFs, which often make + * their way into syslog messages unintentionally. In order to remain + * compatible to recent IETF developments, we allow the user to + * turn on/off this handling. rgerhards, 2007-07-23 + */ + if(bDropTrailingLF && pszMsg[lenMsg-1] == '\n') { + DBGPRINTF("dropped LF at very end of message (DropTrailingLF is set)\n"); + lenMsg--; + } + + /* now copy over the message and sanitize it */ + /* TODO: can we get cheaper memory alloc? {alloca()?}*/ + iMaxLine = glbl.GetMaxLine(); + CHKmalloc(pDst = malloc(sizeof(uchar) * (iMaxLine + 1))); + iSrc = iDst = 0; + while(iSrc < lenMsg && iDst < iMaxLine) { + if(pszMsg[iSrc] == '\0') { /* guard against \0 characters... */ + /* changed to the sequence (somewhat) proposed in + * draft-ietf-syslog-protocol-19. rgerhards, 2006-11-30 + */ + if(iDst + 3 < iMaxLine) { /* do we have space? */ + pDst[iDst++] = cCCEscapeChar; + pDst[iDst++] = '0'; + pDst[iDst++] = '0'; + pDst[iDst++] = '0'; + } /* if we do not have space, we simply ignore the '\0'... */ + /* log an error? Very questionable... rgerhards, 2006-11-30 */ + /* decided: we do not log an error, it won't help... rger, 2007-06-21 */ + } else if(bEscapeCCOnRcv && iscntrl((int) pszMsg[iSrc])) { + /* we are configured to escape control characters. Please note + * that this most probably break non-western character sets like + * Japanese, Korean or Chinese. rgerhards, 2007-07-17 + * Note: sysklogd logs octal values only for DEL and CCs above 127. + * For others, it logs ^n where n is the control char converted to an + * alphabet character. We like consistency and thus escape it to octal + * in all cases. If someone complains, we may change the mode. At least + * we known now what's going on. + * rgerhards, 2007-07-17 + */ + if(iDst + 3 < iMaxLine) { /* do we have space? */ + pDst[iDst++] = cCCEscapeChar; + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0300) >> 6); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0070) >> 3); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0007)); + } /* again, if we do not have space, we ignore the char - see comment at '\0' */ + } else { + pDst[iDst++] = pszMsg[iSrc]; + } + ++iSrc; + } + pDst[iDst] = '\0'; /* space *is* reserved for this! */ + + /* we have a sanitized string. Let's save it now */ + free(pMsg->pszRawMsg); + if((pMsg->pszRawMsg = malloc((iDst+1) * sizeof(uchar))) == NULL) { + /* when we get no new buffer, we use what we already have ;) */ + pMsg->pszRawMsg = pDst; + } else { + /* trim buffer */ + memcpy(pMsg->pszRawMsg, pDst, iDst+1); + free(pDst); /* too big! */ + pMsg->iLenRawMsg = iDst; + } + +finalize_it: + RETiRet; +} + +/* Parse a received message. The object's rawmsg property is taken and + * parsed according to the relevant standards. This can later be + * extended to support configured parsers. + * rgerhards, 2008-10-09 + */ +rsRetVal parseMsg(msg_t *pMsg) +{ + DEFiRet; + uchar *msg; + int pri; + + CHKiRet(sanitizeMessage(pMsg)); + + /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ + DBGPRINTF("msg parser: flags %x, from '%s', msg %s\n", pMsg->msgFlags, pMsg->pszRcvFrom, pMsg->pszRawMsg); + + /* pull PRI */ + pri = DEFUPRI; + msg = pMsg->pszRawMsg; + if(*msg == '<') { + pri = 0; + while(isdigit((int) *++msg)) { + pri = 10 * pri + (*msg - '0'); + } + if(*msg == '>') + ++msg; + if(pri & ~(LOG_FACMASK|LOG_PRIMASK)) + pri = DEFUPRI; + } + pMsg->iFacility = LOG_FAC(pri); + pMsg->iSeverity = LOG_PRI(pri); + MsgSetUxTradMsg(pMsg, (char*) msg); + + if(pMsg->bParseHOSTNAME == 0) + MsgSetHOSTNAME(pMsg, (char*) pMsg->pszRcvFrom); + + /* rger 2005-11-24 (happy thanksgiving!): we now need to check if we have + * a traditional syslog message or one formatted according to syslog-protocol. + * We need to apply different parsers depending on that. We use the + * -protocol VERSION field for the detection. + */ + if(msg[0] == '1' && msg[1] == ' ') { + dbgprintf("Message has syslog-protocol format.\n"); + setProtocolVersion(pMsg, 1); + if(parseRFCSyslogMsg(pMsg, pMsg->msgFlags) == 1) { // TODO: parseRFC... should pull flags from pMsg + msgDestruct(&pMsg); + ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! + } + } else { /* we have legacy syslog */ + dbgprintf("Message has legacy syslog format.\n"); + setProtocolVersion(pMsg, 0); + if(parseLegacySyslogMsg(pMsg, pMsg->msgFlags) == 1) { + msgDestruct(&pMsg); + ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! + } + } + + /* finalize message object */ + pMsg->bIsParsed = 1; /* this message is now parsed */ + MsgPrepareEnqueue(pMsg); /* "historical" name - preparese for multi-threading */ + +finalize_it: + RETiRet; +} -- cgit v1.2.3 From 6c6e9a0f3f7d454ba9553a750b195d7f99c7299a Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 9 Oct 2008 13:45:56 +0200 Subject: moved bParseHostname and bIsParsed to msgFlags This enables us to use more efficient calling conventions and also helps us keep the on-disk structure of a msg object more consistent in future releases. --- runtime/parser.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 8c4272a0..fbdeebeb 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -41,7 +41,6 @@ /* some defines */ #define DEFUPRI (LOG_USER|LOG_NOTICE) -#warning "msg object must be updated with new property for persisting the queue!" /* definitions for objects we access */ DEFobjStaticHelpers DEFobjCurrIf(glbl) @@ -306,7 +305,7 @@ rsRetVal parseMsg(msg_t *pMsg) } /* finalize message object */ - pMsg->bIsParsed = 1; /* this message is now parsed */ + pMsg->msgFlags &= ~NEEDS_PARSING; /* this message is now parsed */ MsgPrepareEnqueue(pMsg); /* "historical" name - preparese for multi-threading */ finalize_it: -- cgit v1.2.3 From 6b905b511b685f2ae28ef94d2e0ba14d1a3f4df3 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 3 Dec 2008 10:45:11 +0100 Subject: bugfix: code did not compile without zlib --- runtime/parser.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index fbdeebeb..15dfd4e0 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -127,12 +127,16 @@ finalize_it: /* in this case, we still need to check if the message is compressed. If so, we must * tell the user we can not accept it. */ - if(len > 0 && *msg == 'z') { + //pszMsg = pMsg->pszRawMsg; + //lenMsg = pMsg->iLenRawMsg; + //if(lenMsg > 0 && *msg == 'z') { + if(pMsg->iLenRawMsg > 0 && *pMsg->pszRawMsg == 'z') { errmsg.LogError(0, NO_ERRCODE, "Received a compressed message, but rsyslogd does not have compression " "support enabled. The message will be ignored."); ABORT_FINALIZE(RS_RET_NO_ZIP); } +finalize_it: # endif /* ifdef USE_NETZIP */ RETiRet; -- cgit v1.2.3 From c5bfd2b24ca8c490401a0835ec741c05acf0ed3e Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 3 Dec 2008 10:46:27 +0100 Subject: some cleanup (forgotten...) --- runtime/parser.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 15dfd4e0..ec2a28c7 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -127,9 +127,6 @@ finalize_it: /* in this case, we still need to check if the message is compressed. If so, we must * tell the user we can not accept it. */ - //pszMsg = pMsg->pszRawMsg; - //lenMsg = pMsg->iLenRawMsg; - //if(lenMsg > 0 && *msg == 'z') { if(pMsg->iLenRawMsg > 0 && *pMsg->pszRawMsg == 'z') { errmsg.LogError(0, NO_ERRCODE, "Received a compressed message, but rsyslogd does not have compression " "support enabled. The message will be ignored."); @@ -295,7 +292,7 @@ rsRetVal parseMsg(msg_t *pMsg) if(msg[0] == '1' && msg[1] == ' ') { dbgprintf("Message has syslog-protocol format.\n"); setProtocolVersion(pMsg, 1); - if(parseRFCSyslogMsg(pMsg, pMsg->msgFlags) == 1) { // TODO: parseRFC... should pull flags from pMsg + if(parseRFCSyslogMsg(pMsg, pMsg->msgFlags) == 1) { msgDestruct(&pMsg); ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! } -- cgit v1.2.3 From 3e1220f434533b5e91de51f5de17cc76eaa8af45 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 5 Dec 2008 01:10:06 +0100 Subject: fixed some compiler warnings --- runtime/parser.c | 1 + 1 file changed, 1 insertion(+) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index ec2a28c7..b549cd19 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -27,6 +27,7 @@ #include "config.h" #include #include +#include #include #ifdef USE_NETZIP #include -- cgit v1.2.3 From 60b8ce14bf33e76237cf82dd1f68acc750e64316 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 8 Dec 2008 15:42:47 +0100 Subject: added $PreserveFQDN config file directive Enables to use FQDNs in sender names where the legacy default --- runtime/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index b549cd19..b4ab0a3e 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -263,7 +263,7 @@ rsRetVal parseMsg(msg_t *pMsg) CHKiRet(sanitizeMessage(pMsg)); /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ - DBGPRINTF("msg parser: flags %x, from '%s', msg %s\n", pMsg->msgFlags, pMsg->pszRcvFrom, pMsg->pszRawMsg); + DBGPRINTF("msg parser: flags %x, from '%s', msg '%s'\n", pMsg->msgFlags, pMsg->pszRcvFrom, pMsg->pszRawMsg); /* pull PRI */ pri = DEFUPRI; -- cgit v1.2.3 From aba90e82484118f3568ec51c01de5ba845da589a Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 22 May 2009 17:06:52 +0200 Subject: added capability to run multiple tcp listeners (on different ports) Well, actually this and a lot of related things. I improved the testbench so that the new capabilities are automatically tested and also did some general cleanup. The current multiple tcp listener solution will probably receive some further cleanup, too, but looks quite OK so far. I also reviewed the way tcpsrv et all work, in preparation of using this code for imdiag. I need to document the findings, especially as the code is rather complicated "thanks" to the combination of plain tcp and gssapi transport modes. --- runtime/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index b4ab0a3e..212d40f3 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -283,7 +283,7 @@ rsRetVal parseMsg(msg_t *pMsg) MsgSetUxTradMsg(pMsg, (char*) msg); if(pMsg->bParseHOSTNAME == 0) - MsgSetHOSTNAME(pMsg, (char*) pMsg->pszRcvFrom); + MsgSetHOSTNAME(pMsg, pMsg->pszRcvFrom); /* rger 2005-11-24 (happy thanksgiving!): we now need to check if we have * a traditional syslog message or one formatted according to syslog-protocol. -- cgit v1.2.3 From 015d17ca70e81ad998e32cdfeed3cd925fd7dedc Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 16 Jun 2009 08:46:45 +0200 Subject: some performance optimizations - saved gettimeofday() calls in imtcp (and increased reception buffer) - somewhat optimized stringbuf.c - some other optimizations --- runtime/parser.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 212d40f3..64e03094 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -191,6 +191,23 @@ sanitizeMessage(msg_t *pMsg) lenMsg--; } + /* it is much quicker to sweep over the message and see if it actually + * needs sanitation than to do the sanitation in any case. So we first do + * this and terminate when it is not needed - which is expectedly the case + * for the vast majority of messages. -- rgerhards, 2009-06-15 + */ + int bNeedSanitize = 0; + for(iSrc = 0 ; iSrc < lenMsg ; iSrc++) { + if(pszMsg[iSrc] < 32) { + if(pszMsg[iSrc] == '\0' || bEscapeCCOnRcv) { + bNeedSanitize = 1; + break; + } + } + } + if(bNeedSanitize == 0) + FINALIZE; + /* now copy over the message and sanitize it */ /* TODO: can we get cheaper memory alloc? {alloca()?}*/ iMaxLine = glbl.GetMaxLine(); -- cgit v1.2.3 From f7579e68a67364c8040966be57c2eae4c9550ee5 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 16 Jun 2009 11:36:05 +0200 Subject: done various optimizations to the stringbuf and its users --- runtime/parser.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 64e03094..13fb51ec 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -205,8 +205,16 @@ sanitizeMessage(msg_t *pMsg) } } } - if(bNeedSanitize == 0) + if(bNeedSanitize == 0) { + /* what a shame - we do not have a \0 byte... + * TODO: think about adding it or otherwise be able to use it... + */ + uchar *pRaw; + CHKmalloc(pRaw = realloc(pMsg->pszRawMsg, pMsg->iLenRawMsg + 1)); + pRaw[pMsg->iLenRawMsg] = '\0'; + pMsg->pszRawMsg = pRaw; FINALIZE; + } /* now copy over the message and sanitize it */ /* TODO: can we get cheaper memory alloc? {alloca()?}*/ @@ -276,6 +284,7 @@ rsRetVal parseMsg(msg_t *pMsg) DEFiRet; uchar *msg; int pri; + int iPriText; CHKiRet(sanitizeMessage(pMsg)); @@ -285,11 +294,19 @@ rsRetVal parseMsg(msg_t *pMsg) /* pull PRI */ pri = DEFUPRI; msg = pMsg->pszRawMsg; + iPriText = 0; if(*msg == '<') { + /* while we process the PRI, we also fill the PRI textual representation + * inside the msg object. This may not be ideal from an OOP point of view, + * but it offers us performance... + */ pri = 0; while(isdigit((int) *++msg)) { + pMsg->bufPRI[iPriText++ % 4] = *msg; /* mod 4 to guard against malformed messages! */ pri = 10 * pri + (*msg - '0'); } + pMsg->bufPRI[iPriText % 4] = '\0'; + pMsg->iLenPRI = iPriText % 4; if(*msg == '>') ++msg; if(pri & ~(LOG_FACMASK|LOG_PRIMASK)) -- cgit v1.2.3 From 74b2b24f508be90d20961304d5e3cce648f3eb7c Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 16 Jun 2009 12:31:27 +0200 Subject: removed long-obsoleted property UxTradMsg ... as well as some cleanup (some commented-out code is left to support UxTradMsg again is someone really complains ;)). --- runtime/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 13fb51ec..0b45bfd5 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -314,7 +314,7 @@ rsRetVal parseMsg(msg_t *pMsg) } pMsg->iFacility = LOG_FAC(pri); pMsg->iSeverity = LOG_PRI(pri); - MsgSetUxTradMsg(pMsg, (char*) msg); + MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg); if(pMsg->bParseHOSTNAME == 0) MsgSetHOSTNAME(pMsg, pMsg->pszRcvFrom); -- cgit v1.2.3 From 56e462610db0dc71cfc2e4af17d1eb27bd67fae7 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 17 Jun 2009 12:56:58 +0200 Subject: further optimized message object pri, facility and severity string generation simplified --- runtime/parser.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 0b45bfd5..c13edb8f 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -284,7 +284,6 @@ rsRetVal parseMsg(msg_t *pMsg) DEFiRet; uchar *msg; int pri; - int iPriText; CHKiRet(sanitizeMessage(pMsg)); @@ -294,7 +293,6 @@ rsRetVal parseMsg(msg_t *pMsg) /* pull PRI */ pri = DEFUPRI; msg = pMsg->pszRawMsg; - iPriText = 0; if(*msg == '<') { /* while we process the PRI, we also fill the PRI textual representation * inside the msg object. This may not be ideal from an OOP point of view, @@ -302,11 +300,8 @@ rsRetVal parseMsg(msg_t *pMsg) */ pri = 0; while(isdigit((int) *++msg)) { - pMsg->bufPRI[iPriText++ % 4] = *msg; /* mod 4 to guard against malformed messages! */ pri = 10 * pri + (*msg - '0'); } - pMsg->bufPRI[iPriText % 4] = '\0'; - pMsg->iLenPRI = iPriText % 4; if(*msg == '>') ++msg; if(pri & ~(LOG_FACMASK|LOG_PRIMASK)) -- cgit v1.2.3 From 8628312396b1535c41124e499d292f4d1e77d955 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 18 Jun 2009 13:22:21 +0200 Subject: cleaned up/optimized raw message handling in msg object --- runtime/parser.c | 79 +++++++++++++++++++------------------------------------- 1 file changed, 26 insertions(+), 53 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index c13edb8f..75cde343 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -114,10 +114,8 @@ static inline rsRetVal uncompressMessage(msg_t *pMsg) "Message ignored.", ret); FINALIZE; /* unconditional exit, nothing left to do... */ } - free(pMsg->pszRawMsg); - pMsg->pszRawMsg = deflateBuf; - pMsg->iLenRawMsg = iLenDefBuf; - deflateBuf = NULL; /* logically "freed" - caller is now responsible */ + MsgSetRawMsg(pMsg, (char*)deflateBuf, iLenDefBuf); + free(deflateBuf); } finalize_it: if(deflateBuf != NULL) @@ -165,6 +163,8 @@ sanitizeMessage(msg_t *pMsg) size_t iSrc; size_t iDst; size_t iMaxLine; + size_t maxDest; + uchar szSanBuf[32*1024]; /* buffer used for sanitizing a string */ assert(pMsg != NULL); @@ -205,70 +205,43 @@ sanitizeMessage(msg_t *pMsg) } } } - if(bNeedSanitize == 0) { - /* what a shame - we do not have a \0 byte... - * TODO: think about adding it or otherwise be able to use it... - */ - uchar *pRaw; - CHKmalloc(pRaw = realloc(pMsg->pszRawMsg, pMsg->iLenRawMsg + 1)); - pRaw[pMsg->iLenRawMsg] = '\0'; - pMsg->pszRawMsg = pRaw; + + if(!bNeedSanitize) FINALIZE; - } /* now copy over the message and sanitize it */ - /* TODO: can we get cheaper memory alloc? {alloca()?}*/ iMaxLine = glbl.GetMaxLine(); - CHKmalloc(pDst = malloc(sizeof(uchar) * (iMaxLine + 1))); + maxDest = lenMsg * 4; /* message can grow at most four-fold */ + if(maxDest > iMaxLine) + maxDest = iMaxLine; /* but not more than the max size! */ + if(maxDest < sizeof(szSanBuf)) + pDst = szSanBuf; + else + CHKmalloc(pDst = malloc(sizeof(uchar) * (iMaxLine + 1))); iSrc = iDst = 0; - while(iSrc < lenMsg && iDst < iMaxLine) { + while(iSrc < lenMsg && iDst < maxDest - 3) { /* leave some space if last char must be escaped */ if(pszMsg[iSrc] == '\0') { /* guard against \0 characters... */ - /* changed to the sequence (somewhat) proposed in - * draft-ietf-syslog-protocol-19. rgerhards, 2006-11-30 - */ - if(iDst + 3 < iMaxLine) { /* do we have space? */ - pDst[iDst++] = cCCEscapeChar; - pDst[iDst++] = '0'; - pDst[iDst++] = '0'; - pDst[iDst++] = '0'; - } /* if we do not have space, we simply ignore the '\0'... */ - /* log an error? Very questionable... rgerhards, 2006-11-30 */ - /* decided: we do not log an error, it won't help... rger, 2007-06-21 */ - } else if(bEscapeCCOnRcv && iscntrl((int) pszMsg[iSrc])) { + } else if(iscntrl((int) pszMsg[iSrc])) { + if(pszMsg[iSrc] == '\0' || bEscapeCCOnRcv) { /* we are configured to escape control characters. Please note * that this most probably break non-western character sets like * Japanese, Korean or Chinese. rgerhards, 2007-07-17 - * Note: sysklogd logs octal values only for DEL and CCs above 127. - * For others, it logs ^n where n is the control char converted to an - * alphabet character. We like consistency and thus escape it to octal - * in all cases. If someone complains, we may change the mode. At least - * we known now what's going on. - * rgerhards, 2007-07-17 */ - if(iDst + 3 < iMaxLine) { /* do we have space? */ - pDst[iDst++] = cCCEscapeChar; - pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0300) >> 6); - pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0070) >> 3); - pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0007)); - } /* again, if we do not have space, we ignore the char - see comment at '\0' */ + pDst[iDst++] = cCCEscapeChar; + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0300) >> 6); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0070) >> 3); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0007)); + } } else { pDst[iDst++] = pszMsg[iSrc]; } ++iSrc; } - pDst[iDst] = '\0'; /* space *is* reserved for this! */ - - /* we have a sanitized string. Let's save it now */ - free(pMsg->pszRawMsg); - if((pMsg->pszRawMsg = malloc((iDst+1) * sizeof(uchar))) == NULL) { - /* when we get no new buffer, we use what we already have ;) */ - pMsg->pszRawMsg = pDst; - } else { - /* trim buffer */ - memcpy(pMsg->pszRawMsg, pDst, iDst+1); - free(pDst); /* too big! */ - pMsg->iLenRawMsg = iDst; - } + + MsgSetRawMsg(pMsg, (char*)pDst, iDst); /* save sanitized string */ + + if(pDst != szSanBuf) + free(pDst); finalize_it: RETiRet; -- cgit v1.2.3 From 662ad3e4bf8dbd317d18aa1afcbf3e8b9e424506 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 23 Jun 2009 16:32:29 +0200 Subject: optimized hostname processing --- runtime/parser.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 75cde343..0a27c982 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -284,9 +284,6 @@ rsRetVal parseMsg(msg_t *pMsg) pMsg->iSeverity = LOG_PRI(pri); MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg); - if(pMsg->bParseHOSTNAME == 0) - MsgSetHOSTNAME(pMsg, pMsg->pszRcvFrom); - /* rger 2005-11-24 (happy thanksgiving!): we now need to check if we have * a traditional syslog message or one formatted according to syslog-protocol. * We need to apply different parsers depending on that. We use the -- cgit v1.2.3 From 07a7152ea0ec499a481942e9003079efffd7bb8f Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 25 Jun 2009 18:23:03 +0200 Subject: bugfix: msg_t mutex was sometimes initialized twice --- runtime/parser.c | 1 - 1 file changed, 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 0a27c982..d4ca7673 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -307,7 +307,6 @@ rsRetVal parseMsg(msg_t *pMsg) /* finalize message object */ pMsg->msgFlags &= ~NEEDS_PARSING; /* this message is now parsed */ - MsgPrepareEnqueue(pMsg); /* "historical" name - preparese for multi-threading */ finalize_it: RETiRet; -- cgit v1.2.3 From aaffc4281e0b26f419a3fc341461f2fc479080b8 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 30 Jun 2009 18:45:41 +0200 Subject: introduced a new way of handling the RcvFrom property ... plus a fix for a long-time bug in obj-types.h. That lead to the object pointer only then to become NULL when the object was actually destructed, I discovered this issue during introduction of the pRcvFrom property in msg_t, but it potentially had other effects, too. I am not sure if some experienced instability resulted from this bug OR if its fix will cause harm to so-far "correctly" running code. The later may very well be. Thus I will change it only for the current branch and also the beta, but not in all old builds. Let's see how things evolve. --- runtime/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index d4ca7673..a5183105 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -261,7 +261,7 @@ rsRetVal parseMsg(msg_t *pMsg) CHKiRet(sanitizeMessage(pMsg)); /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ - DBGPRINTF("msg parser: flags %x, from '%s', msg '%s'\n", pMsg->msgFlags, pMsg->pszRcvFrom, pMsg->pszRawMsg); + DBGPRINTF("msg parser: flags %x, from '%s', msg '%s'\n", pMsg->msgFlags, getRcvFrom(pMsg), pMsg->pszRawMsg); /* pull PRI */ pri = DEFUPRI; -- cgit v1.2.3 From 6fff4ae3329e852586f3a14ac8b8369bc6d61148 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 10 Jul 2009 13:56:46 +0200 Subject: bugfix: potential segfault when zip-compressed syslog records were received (double free) --- runtime/parser.c | 1 - 1 file changed, 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index a5183105..a2538fa3 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -115,7 +115,6 @@ static inline rsRetVal uncompressMessage(msg_t *pMsg) FINALIZE; /* unconditional exit, nothing left to do... */ } MsgSetRawMsg(pMsg, (char*)deflateBuf, iLenDefBuf); - free(deflateBuf); } finalize_it: if(deflateBuf != NULL) -- cgit v1.2.3 From aba8792c8a82ef52a3188ee7295e501ca21dae3b Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 26 Aug 2009 12:57:06 +0200 Subject: bugfix: message sanitation had some issues - control character DEL was not properly escaped - NUL and LF characters were not properly stripped if no control character replacement was to be done - NUL characters in the message body were silently dropped (this was a regeression introduced by some of the recent optimizations) --- runtime/parser.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index a2538fa3..db11ac5b 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -163,6 +163,7 @@ sanitizeMessage(msg_t *pMsg) size_t iDst; size_t iMaxLine; size_t maxDest; + bool bUpdatedLen = FALSE; uchar szSanBuf[32*1024]; /* buffer used for sanitizing a string */ assert(pMsg != NULL); @@ -177,6 +178,7 @@ sanitizeMessage(msg_t *pMsg) /* remove NUL character at end of message (see comment in function header) */ if(pszMsg[lenMsg-1] == '\0') { DBGPRINTF("dropped NUL at very end of message\n"); + bUpdatedLen = TRUE; lenMsg--; } @@ -187,6 +189,7 @@ sanitizeMessage(msg_t *pMsg) */ if(bDropTrailingLF && pszMsg[lenMsg-1] == '\n') { DBGPRINTF("dropped LF at very end of message (DropTrailingLF is set)\n"); + bUpdatedLen = TRUE; lenMsg--; } @@ -197,7 +200,7 @@ sanitizeMessage(msg_t *pMsg) */ int bNeedSanitize = 0; for(iSrc = 0 ; iSrc < lenMsg ; iSrc++) { - if(pszMsg[iSrc] < 32) { + if(iscntrl(pszMsg[iSrc])) { if(pszMsg[iSrc] == '\0' || bEscapeCCOnRcv) { bNeedSanitize = 1; break; @@ -205,8 +208,11 @@ sanitizeMessage(msg_t *pMsg) } } - if(!bNeedSanitize) + if(!bNeedSanitize) { + if(bUpdatedLen == TRUE) + MsgSetRawMsgSize(pMsg, lenMsg); FINALIZE; + } /* now copy over the message and sanitize it */ iMaxLine = glbl.GetMaxLine(); @@ -219,8 +225,10 @@ sanitizeMessage(msg_t *pMsg) CHKmalloc(pDst = malloc(sizeof(uchar) * (iMaxLine + 1))); iSrc = iDst = 0; while(iSrc < lenMsg && iDst < maxDest - 3) { /* leave some space if last char must be escaped */ - if(pszMsg[iSrc] == '\0') { /* guard against \0 characters... */ - } else if(iscntrl((int) pszMsg[iSrc])) { + if(iscntrl((int) pszMsg[iSrc])) { + /* note: \0 must always be escaped, the rest of the code currently + * can not handle it! -- rgerhards, 2009-08-26 + */ if(pszMsg[iSrc] == '\0' || bEscapeCCOnRcv) { /* we are configured to escape control characters. Please note * that this most probably break non-western character sets like -- cgit v1.2.3 From 37ba1df6e37b2d020001f390ccb2462ae04fc9c1 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 24 Sep 2009 09:48:38 +0200 Subject: bugfix: random data could be appended to message, possibly causing segfaults --- runtime/parser.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 0b45bfd5..079bcf5e 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -274,6 +274,7 @@ finalize_it: RETiRet; } + /* Parse a received message. The object's rawmsg property is taken and * parsed according to the relevant standards. This can later be * extended to support configured parsers. @@ -284,6 +285,7 @@ rsRetVal parseMsg(msg_t *pMsg) DEFiRet; uchar *msg; int pri; + int lenMsg; int iPriText; CHKiRet(sanitizeMessage(pMsg)); @@ -292,8 +294,11 @@ rsRetVal parseMsg(msg_t *pMsg) DBGPRINTF("msg parser: flags %x, from '%s', msg '%s'\n", pMsg->msgFlags, pMsg->pszRcvFrom, pMsg->pszRawMsg); /* pull PRI */ - pri = DEFUPRI; + lenMsg = pMsg->iLenRawMsg; + if(lenMsg == 0) + ABORT_FINALIZE(RS_RET_EMPTY_MSG); msg = pMsg->pszRawMsg; + pri = DEFUPRI; iPriText = 0; if(*msg == '<') { /* while we process the PRI, we also fill the PRI textual representation @@ -301,7 +306,7 @@ rsRetVal parseMsg(msg_t *pMsg) * but it offers us performance... */ pri = 0; - while(isdigit((int) *++msg)) { + while(--lenMsg > 0 && isdigit((int) *++msg)) { pMsg->bufPRI[iPriText++ % 4] = *msg; /* mod 4 to guard against malformed messages! */ pri = 10 * pri + (*msg - '0'); } @@ -342,7 +347,7 @@ rsRetVal parseMsg(msg_t *pMsg) /* finalize message object */ pMsg->msgFlags &= ~NEEDS_PARSING; /* this message is now parsed */ - MsgPrepareEnqueue(pMsg); /* "historical" name - preparese for multi-threading */ + MsgPrepareEnqueue(pMsg); /* "historical" name - prepare for multi-threading */ finalize_it: RETiRet; -- cgit v1.2.3 From 536415cf3dba053b0d2294b7f1dc8e34328e795f Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 29 Sep 2009 14:22:11 +0200 Subject: bugfix: invalid handling of zero-sized messages could lead to mis-addressing and potential memory corruption/segfault --- runtime/parser.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 079bcf5e..7eff0801 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -167,6 +167,7 @@ sanitizeMessage(msg_t *pMsg) size_t iMaxLine; assert(pMsg != NULL); + assert(pMsg->iLenRawMsg > 0); # ifdef USE_NETZIP CHKiRet(uncompressMessage(pMsg)); @@ -288,6 +289,9 @@ rsRetVal parseMsg(msg_t *pMsg) int lenMsg; int iPriText; + if(pMsg->iLenRawMsg == 0) + ABORT_FINALIZE(RS_RET_EMPTY_MSG); + CHKiRet(sanitizeMessage(pMsg)); /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ @@ -295,8 +299,6 @@ rsRetVal parseMsg(msg_t *pMsg) /* pull PRI */ lenMsg = pMsg->iLenRawMsg; - if(lenMsg == 0) - ABORT_FINALIZE(RS_RET_EMPTY_MSG); msg = pMsg->pszRawMsg; pri = DEFUPRI; iPriText = 0; -- cgit v1.2.3 From ec56b763b83677d1e9cd02a7ae610caf62e902bb Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 8 Oct 2009 16:36:17 +0200 Subject: bugfix in debug system and more instrumentation to find an issue bugfix: debug string larger than 1K were improperly displayed. Max size is now 32K, and if a string is even longer it is meaningful truncated. --- runtime/parser.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 466066e7..3c90c447 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -231,14 +231,14 @@ sanitizeMessage(msg_t *pMsg) * can not handle it! -- rgerhards, 2009-08-26 */ if(pszMsg[iSrc] == '\0' || bEscapeCCOnRcv) { - /* we are configured to escape control characters. Please note - * that this most probably break non-western character sets like - * Japanese, Korean or Chinese. rgerhards, 2007-07-17 - */ - pDst[iDst++] = cCCEscapeChar; - pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0300) >> 6); - pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0070) >> 3); - pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0007)); + /* we are configured to escape control characters. Please note + * that this most probably break non-western character sets like + * Japanese, Korean or Chinese. rgerhards, 2007-07-17 + */ + pDst[iDst++] = cCCEscapeChar; + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0300) >> 6); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0070) >> 3); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0007)); } } else { pDst[iDst++] = pszMsg[iSrc]; -- cgit v1.2.3 From e04e1b50025f5fa9c26abd946190dce8f797d08f Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 22 Oct 2009 11:33:38 +0200 Subject: enhanced test environment (including testbench) support for enhancing probability of memory addressing failure by using non-NULL default value for malloced memory (optional, only if requested by configure option). This helps to track down some otherwise undetected issues within the testbench and is expected to be very useful in the future. --- runtime/parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 3c90c447..e29f3b0b 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -96,7 +96,7 @@ static inline rsRetVal uncompressMessage(msg_t *pMsg) */ int ret; iLenDefBuf = glbl.GetMaxLine(); - CHKmalloc(deflateBuf = malloc(sizeof(uchar) * (iLenDefBuf + 1))); + CHKmalloc(deflateBuf = MALLOC(sizeof(uchar) * (iLenDefBuf + 1))); ret = uncompress((uchar *) deflateBuf, &iLenDefBuf, (uchar *) pszMsg+1, lenMsg-1); DBGPRINTF("Compressed message uncompressed with status %d, length: new %ld, old %d.\n", ret, (long) iLenDefBuf, (int) (lenMsg-1)); @@ -223,7 +223,7 @@ sanitizeMessage(msg_t *pMsg) if(maxDest < sizeof(szSanBuf)) pDst = szSanBuf; else - CHKmalloc(pDst = malloc(sizeof(uchar) * (iMaxLine + 1))); + CHKmalloc(pDst = MALLOC(sizeof(uchar) * (iMaxLine + 1))); iSrc = iDst = 0; while(iSrc < lenMsg && iDst < maxDest - 3) { /* leave some space if last char must be escaped */ if(iscntrl((int) pszMsg[iSrc])) { -- cgit v1.2.3 From 7d78b3bdfd357dd921797ce983eb96532c56a7f6 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 3 Nov 2009 10:41:47 +0100 Subject: restructured parser part of rsyslog now cleaner and hopefully usuable as a basis for loadable parser modules. I also cleaned up/consolidated some of the internal message generation functionality in rsyslogd. --- runtime/parser.c | 382 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 382 insertions(+) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index e29f3b0b..89e59f87 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -37,7 +37,10 @@ #include "dirty.h" #include "msg.h" #include "obj.h" +#include "datetime.h" #include "errmsg.h" +#include "unicode-helper.h" +#include "dirty.h" /* some defines */ #define DEFUPRI (LOG_USER|LOG_NOTICE) @@ -46,6 +49,7 @@ DEFobjStaticHelpers DEFobjCurrIf(glbl) DEFobjCurrIf(errmsg) +DEFobjCurrIf(datetime) /* static data */ @@ -60,11 +64,389 @@ rsRetVal parserClassInit(void) CHKiRet(objGetObjInterface(&obj)); /* this provides the root pointer for all other queries */ CHKiRet(objUse(glbl, CORE_COMPONENT)); CHKiRet(objUse(errmsg, CORE_COMPONENT)); + CHKiRet(objUse(datetime, CORE_COMPONENT)); // TODO: free components! see action.c finalize_it: RETiRet; } +/***************************RFC 5425 PARSER ******************************************************/ + + +/* Helper to parseRFCSyslogMsg. This function parses a field up to + * (and including) the SP character after it. The field contents is + * returned in a caller-provided buffer. The parsepointer is advanced + * to after the terminating SP. The caller must ensure that the + * provided buffer is large enough to hold the to be extracted value. + * Returns 0 if everything is fine or 1 if either the field is not + * SP-terminated or any other error occurs. -- rger, 2005-11-24 + * The function now receives the size of the string and makes sure + * that it does not process more than that. The *pLenStr counter is + * updated on exit. -- rgerhards, 2009-09-23 + */ +static int parseRFCField(uchar **pp2parse, uchar *pResult, int *pLenStr) +{ + uchar *p2parse; + int iRet = 0; + + assert(pp2parse != NULL); + assert(*pp2parse != NULL); + assert(pResult != NULL); + + p2parse = *pp2parse; + + /* this is the actual parsing loop */ + while(*pLenStr > 0 && *p2parse != ' ') { + *pResult++ = *p2parse++; + --(*pLenStr); + } + + if(*pLenStr > 0 && *p2parse == ' ') { + ++p2parse; /* eat SP, but only if not at end of string */ + --(*pLenStr); + } else { + iRet = 1; /* there MUST be an SP! */ + } + *pResult = '\0'; + + /* set the new parse pointer */ + *pp2parse = p2parse; + return 0; +} + + +/* Helper to parseRFCSyslogMsg. This function parses the structured + * data field of a message. It does NOT parse inside structured data, + * just gets the field as whole. Parsing the single entities is left + * to other functions. The parsepointer is advanced + * to after the terminating SP. The caller must ensure that the + * provided buffer is large enough to hold the to be extracted value. + * Returns 0 if everything is fine or 1 if either the field is not + * SP-terminated or any other error occurs. -- rger, 2005-11-24 + * The function now receives the size of the string and makes sure + * that it does not process more than that. The *pLenStr counter is + * updated on exit. -- rgerhards, 2009-09-23 + */ +static int parseRFCStructuredData(uchar **pp2parse, uchar *pResult, int *pLenStr) +{ + uchar *p2parse; + int bCont = 1; + int iRet = 0; + int lenStr; + + assert(pp2parse != NULL); + assert(*pp2parse != NULL); + assert(pResult != NULL); + + p2parse = *pp2parse; + lenStr = *pLenStr; + + /* this is the actual parsing loop + * Remeber: structured data starts with [ and includes any characters + * until the first ] followed by a SP. There may be spaces inside + * structured data. There may also be \] inside the structured data, which + * do NOT terminate an element. + */ + if(lenStr == 0 || *p2parse != '[') + return 1; /* this is NOT structured data! */ + + if(*p2parse == '-') { /* empty structured data? */ + *pResult++ = '-'; + ++p2parse; + --lenStr; + } else { + while(bCont) { + if(lenStr < 2) { + /* we now need to check if we have only structured data */ + if(lenStr > 0 && *p2parse == ']') { + *pResult++ = *p2parse; + p2parse++; + lenStr--; + bCont = 0; + } else { + iRet = 1; /* this is not valid! */ + bCont = 0; + } + } else if(*p2parse == '\\' && *(p2parse+1) == ']') { + /* this is escaped, need to copy both */ + *pResult++ = *p2parse++; + *pResult++ = *p2parse++; + lenStr -= 2; + } else if(*p2parse == ']' && *(p2parse+1) == ' ') { + /* found end, just need to copy the ] and eat the SP */ + *pResult++ = *p2parse; + p2parse += 2; + lenStr -= 2; + bCont = 0; + } else { + *pResult++ = *p2parse++; + --lenStr; + } + } + } + + if(lenStr > 0 && *p2parse == ' ') { + ++p2parse; /* eat SP, but only if not at end of string */ + --lenStr; + } else { + iRet = 1; /* there MUST be an SP! */ + } + *pResult = '\0'; + + /* set the new parse pointer */ + *pp2parse = p2parse; + *pLenStr = lenStr; + return 0; +} + +/* parse a RFC5424-formatted syslog message. This function returns + * 0 if processing of the message shall continue and 1 if something + * went wrong and this messe should be ignored. This function has been + * implemented in the effort to support syslog-protocol. Please note that + * the name (parse *RFC*) stems from the hope that syslog-protocol will + * some time become an RFC. Do not confuse this with informational + * RFC 3164 (which is legacy syslog). + * + * currently supported format: + * + * VERSION SP TIMESTAMP SP HOSTNAME SP APP-NAME SP PROCID SP MSGID SP [SD-ID]s SP MSG + * + * is already stripped when this function is entered. VERSION already + * has been confirmed to be "1", but has NOT been stripped from the message. + * + * rger, 2005-11-24 + */ +static int parseRFCSyslogMsg(msg_t *pMsg, int flags) +{ + uchar *p2parse; + uchar *pBuf; + int lenMsg; + int bContParse = 1; + + BEGINfunc + assert(pMsg != NULL); + assert(pMsg->pszRawMsg != NULL); + p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ + lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI; + + /* do a sanity check on the version and eat it (the caller checked this already) */ + assert(p2parse[0] == '1' && p2parse[1] == ' '); + p2parse += 2; + lenMsg -= 2; + + /* Now get us some memory we can use as a work buffer while parsing. + * We simply allocated a buffer sufficiently large to hold all of the + * message, so we can not run into any troubles. I think this is + * more wise then to use individual buffers. + */ + if((pBuf = MALLOC(sizeof(uchar) * (lenMsg + 1))) == NULL) + return 1; + + /* IMPORTANT NOTE: + * Validation is not actually done below nor are any errors handled. I have + * NOT included this for the current proof of concept. However, it is strongly + * advisable to add it when this code actually goes into production. + * rgerhards, 2005-11-24 + */ + + /* TIMESTAMP */ + if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + if(flags & IGNDATE) { + /* we need to ignore the msg data, so simply copy over reception date */ + memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); + } + } else { + DBGPRINTF("no TIMESTAMP detected!\n"); + bContParse = 0; + } + + /* HOSTNAME */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetHOSTNAME(pMsg, pBuf, ustrlen(pBuf)); + } + + /* APP-NAME */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetAPPNAME(pMsg, (char*)pBuf); + } + + /* PROCID */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetPROCID(pMsg, (char*)pBuf); + } + + /* MSGID */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetMSGID(pMsg, (char*)pBuf); + } + + /* STRUCTURED-DATA */ + if(bContParse) { + parseRFCStructuredData(&p2parse, pBuf, &lenMsg); + MsgSetStructuredData(pMsg, (char*)pBuf); + } + + /* MSG */ + MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); + + free(pBuf); + ENDfunc + return 0; /* all ok */ +} + + +/*********************** END RFC5425 PARSER ******************************************/ + +/***************************RFC 5425 PARSER ******************************************************/ + + +/* parse a legay-formatted syslog message. This function returns + * 0 if processing of the message shall continue and 1 if something + * went wrong and this messe should be ignored. This function has been + * implemented in the effort to support syslog-protocol. + * rger, 2005-11-24 + * As of 2006-01-10, I am removing the logic to continue parsing only + * when a valid TIMESTAMP is detected. Validity of other fields already + * is ignored. This is due to the fact that the parser has grown smarter + * and is now more able to understand different dialects of the syslog + * message format. I do not expect any bad side effects of this change, + * but I thought I log it in this comment. + * rgerhards, 2006-01-10 + */ +static int parseLegacySyslogMsg(msg_t *pMsg, int flags) +{ + uchar *p2parse; + int lenMsg; + int bTAGCharDetected; + int i; /* general index for parsing */ + uchar bufParseTAG[CONF_TAG_MAXSIZE]; + uchar bufParseHOSTNAME[CONF_TAG_HOSTNAME]; + BEGINfunc + + assert(pMsg != NULL); + assert(pMsg->pszRawMsg != NULL); + lenMsg = pMsg->iLenRawMsg - (pMsg->offAfterPRI + 1); + p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ + + /* Check to see if msg contains a timestamp. We start by assuming + * that the message timestamp is the time of reception (which we + * generated ourselfs and then try to actually find one inside the + * message. There we go from high-to low precison and are done + * when we find a matching one. -- rgerhards, 2008-09-16 + */ + if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + /* we are done - parse pointer is moved by ParseTIMESTAMP3339 */; + } else if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; + } else if(*p2parse == ' ' && lenMsg > 1) { /* try to see if it is slighly malformed - HP procurve seems to do that sometimes */ + ++p2parse; /* move over space */ + --lenMsg; + if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + /* indeed, we got it! */ + /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; + } else {/* parse pointer needs to be restored, as we moved it off-by-one + * for this try. + */ + --p2parse; + ++lenMsg; + } + } + + if(flags & IGNDATE) { + /* we need to ignore the msg data, so simply copy over reception date */ + memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); + } + + /* rgerhards, 2006-03-13: next, we parse the hostname and tag. But we + * do this only when the user has not forbidden this. I now introduce some + * code that allows a user to configure rsyslogd to treat the rest of the + * message as MSG part completely. In this case, the hostname will be the + * machine that we received the message from and the tag will be empty. This + * is meant to be an interim solution, but for now it is in the code. + */ + if(bParseHOSTNAMEandTAG && !(flags & INTERNAL_MSG)) { + /* parse HOSTNAME - but only if this is network-received! + * rger, 2005-11-14: we still have a problem with BSD messages. These messages + * do NOT include a host name. In most cases, this leads to the TAG to be treated + * as hostname and the first word of the message as the TAG. Clearly, this is not + * of advantage ;) I think I have now found a way to handle this situation: there + * are certain characters which are frequently used in TAG (e.g. ':'), which are + * *invalid* in host names. So while parsing the hostname, I check for these characters. + * If I find them, I set a simple flag but continue. After parsing, I check the flag. + * If it was set, then we most probably do not have a hostname but a TAG. Thus, I change + * the fields. I think this logic shall work with any type of syslog message. + * rgerhards, 2009-06-23: and I now have extended this logic to every character + * that is not a valid hostname. + */ + bTAGCharDetected = 0; + if(lenMsg > 0 && flags & PARSE_HOSTNAME) { + i = 0; + while(i < lenMsg && (isalnum(p2parse[i]) || p2parse[i] == '.' || p2parse[i] == '.' + || p2parse[i] == '_' || p2parse[i] == '-') && i < CONF_TAG_MAXSIZE) { + bufParseHOSTNAME[i] = p2parse[i]; + ++i; + } + + if(i > 0 && p2parse[i] == ' ' && isalnum(p2parse[i-1])) { + /* we got a hostname! */ + p2parse += i + 1; /* "eat" it (including SP delimiter) */ + lenMsg -= i + 1; + bufParseHOSTNAME[i] = '\0'; + MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i); + } + } + + /* now parse TAG - that should be present in message from all sources. + * This code is somewhat not compliant with RFC 3164. As of 3164, + * the TAG field is ended by any non-alphanumeric character. In + * practice, however, the TAG often contains dashes and other things, + * which would end the TAG. So it is not desirable. As such, we only + * accept colon and SP to be terminators. Even there is a slight difference: + * a colon is PART of the TAG, while a SP is NOT part of the tag + * (it is CONTENT). Starting 2008-04-04, we have removed the 32 character + * size limit (from RFC3164) on the tag. This had bad effects on existing + * envrionments, as sysklogd didn't obey it either (probably another bug + * in RFC3164...). We now receive the full size, but will modify the + * outputs so that only 32 characters max are used by default. + */ + i = 0; + while(lenMsg > 0 && *p2parse != ':' && *p2parse != ' ' && i < CONF_TAG_MAXSIZE) { + bufParseTAG[i++] = *p2parse++; + --lenMsg; + } + if(lenMsg > 0 && *p2parse == ':') { + ++p2parse; + --lenMsg; + bufParseTAG[i++] = ':'; + } + + /* no TAG can only be detected if the message immediatly ends, in which case an empty TAG + * is considered OK. So we do not need to check for empty TAG. -- rgerhards, 2009-06-23 + */ + bufParseTAG[i] = '\0'; /* terminate string */ + MsgSetTAG(pMsg, bufParseTAG, i); + } else {/* we enter this code area when the user has instructed rsyslog NOT + * to parse HOSTNAME and TAG - rgerhards, 2006-03-13 + */ + if(!(flags & INTERNAL_MSG)) { + DBGPRINTF("HOSTNAME and TAG not parsed by user configuraton.\n"); + } + } + + /* The rest is the actual MSG */ + MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); + + ENDfunc + return 0; /* all ok */ +} + + +/***************************END RFC 5425 PARSER ******************************************************/ + /* uncompress a received message if it is compressed. * pMsg->pszRawMsg buffer is updated. -- cgit v1.2.3 From 6f511cecfae3592f271627ebcb41e6a8c4f831e9 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 3 Nov 2009 12:39:48 +0100 Subject: more cleanup and working towards a parser module calling interface I cleaned up a lot of config variable access along the way. This version compiles and runs, but does not yet offer any enhanced functionality. pmrfc5424 is just a dummy that is not yet being used. --- runtime/parser.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 14 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 89e59f87..e2ad69e4 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -39,8 +39,10 @@ #include "obj.h" #include "datetime.h" #include "errmsg.h" +#include "parser.h" #include "unicode-helper.h" #include "dirty.h" +#include "cfsysline.h" /* some defines */ #define DEFUPRI (LOG_USER|LOG_NOTICE) @@ -52,23 +54,22 @@ DEFobjCurrIf(errmsg) DEFobjCurrIf(datetime) /* static data */ +static int bParseHOSTNAMEandTAG; /* cache for the equally-named global param - performance enhancement */ +/* config data */ +static uchar cCCEscapeChar = '#';/* character to be used to start an escape sequence for control chars */ +static int bEscapeCCOnRcv = 1; /* escape control characters on reception: 0 - no, 1 - yes */ +static int bDropTrailingLF = 1; /* drop trailing LF's on reception? */ -/* this is a dummy class init +/* we need to provide standard constructors and destructors, even though + * we only have static methods. The framework requires that. */ -rsRetVal parserClassInit(void) -{ - DEFiRet; +BEGINobjConstruct(parser) /* be sure to specify the object type also in END macro! */ +ENDobjConstruct(parser) - /* request objects we use */ - CHKiRet(objGetObjInterface(&obj)); /* this provides the root pointer for all other queries */ - CHKiRet(objUse(glbl, CORE_COMPONENT)); - CHKiRet(objUse(errmsg, CORE_COMPONENT)); - CHKiRet(objUse(datetime, CORE_COMPONENT)); -// TODO: free components! see action.c -finalize_it: - RETiRet; -} +BEGINobjDestruct(parser) /* be sure to specify the object type also in END and CODESTART macros! */ +CODESTARTobjDestruct(parser) +ENDobjDestruct(parser) /***************************RFC 5425 PARSER ******************************************************/ @@ -643,7 +644,8 @@ finalize_it: * extended to support configured parsers. * rgerhards, 2008-10-09 */ -rsRetVal parseMsg(msg_t *pMsg) +static rsRetVal +ParseMsg(msg_t *pMsg) { DEFiRet; uchar *msg; @@ -709,3 +711,57 @@ rsRetVal parseMsg(msg_t *pMsg) finalize_it: RETiRet; } + + +/* queryInterface function-- rgerhards, 2009-11-03 + */ +BEGINobjQueryInterface(parser) +CODESTARTobjQueryInterface(parser) + if(pIf->ifVersion != parserCURR_IF_VERSION) { /* check for current version, increment on each change */ + ABORT_FINALIZE(RS_RET_INTERFACE_NOT_SUPPORTED); + } + + /* ok, we have the right interface, so let's fill it + * Please note that we may also do some backwards-compatibility + * work here (if we can support an older interface version - that, + * of course, also affects the "if" above). + */ + pIf->ParseMsg = ParseMsg; +finalize_it: +ENDobjQueryInterface(parser) + + + +/* Reset config variables to default values. + * rgerhards, 2007-07-17 + */ +static rsRetVal +resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unused)) *pVal) +{ + cCCEscapeChar = '#'; + bEscapeCCOnRcv = 1; /* default is to escape control characters */ + bDropTrailingLF = 1; /* default is to drop trailing LF's on reception */ + + return RS_RET_OK; +} + + +/* Initialize the parser class. Must be called as the very first method + * before anything else is called inside this class. + * rgerhards, 2009-11-02 + */ +//BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ +BEGINAbstractObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ + /* request objects we use */ + CHKiRet(objUse(glbl, CORE_COMPONENT)); + CHKiRet(objUse(errmsg, CORE_COMPONENT)); + CHKiRet(objUse(datetime, CORE_COMPONENT)); + + bParseHOSTNAMEandTAG = glbl.GetParseHOSTNAMEandTAG(); /* cache value, is set only during rsyslogd option processing */ + + CHKiRet(regCfSysLineHdlr((uchar *)"controlcharacterescapeprefix", 0, eCmdHdlrGetChar, NULL, &cCCEscapeChar, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscapeCCOnRcv, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL)); +ENDObjClassInit(parser) + -- cgit v1.2.3 From b1db196953713dd09c499a3edf81347bd903c19e Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 3 Nov 2009 18:44:02 +0100 Subject: one step closer to dynamically loadable parsers This is a milestone commit, which adds new code that breaks nothing, but also does not add any visible change. Just prep work... --- runtime/parser.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 159 insertions(+), 6 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index e2ad69e4..f59d1974 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -55,20 +55,113 @@ DEFobjCurrIf(datetime) /* static data */ static int bParseHOSTNAMEandTAG; /* cache for the equally-named global param - performance enhancement */ + /* config data */ static uchar cCCEscapeChar = '#';/* character to be used to start an escape sequence for control chars */ static int bEscapeCCOnRcv = 1; /* escape control characters on reception: 0 - no, 1 - yes */ static int bDropTrailingLF = 1; /* drop trailing LF's on reception? */ +/* we create a small helper list of parsers so that we can obtain their + * handles if the config needs one. This is also used to unload all modules on + * shutdown. + */ +parserList_t *pParsLstRoot = NULL; + + +/* intialize (but NOT allocate) a parser list. Primarily meant as a hook + * which can be used to extend the list in the future. So far, just sets + * it to NULL. + */ +static rsRetVal +InitParserList(parserList_t **pListRoot) +{ + *pListRoot = NULL; + return RS_RET_OK; +} + -/* we need to provide standard constructors and destructors, even though - * we only have static methods. The framework requires that. +/* Add a parser to the list. We use a VERY simple and ineffcient algorithm, + * but it is employed only for a few milliseconds during config processing. So + * I prefer to keep it very simple and with simple data structures. Unfortunately, + * we need to preserve the order, but I don't like to add a tail pointer as that + * would require a container object. So I do the extra work to skip to the tail + * when adding elements... + * rgerhards, 2009-11-03 */ +static rsRetVal +AddParserToList(parserList_t **ppListRoot, parser_t *pParser) +{ + parserList_t *pThis; + parserList_t *pTail; + DEFiRet; + + CHKmalloc(pThis = MALLOC(sizeof(parserList_t))); + pThis->pParser = pParser; + pThis->pNext = NULL; + + if(*ppListRoot == NULL) { + pThis->pNext = *ppListRoot; + *ppListRoot = pThis; + } else { + /* find tail first */ + for(pTail = *ppListRoot ; pTail->pNext != NULL ; pTail = pTail->pNext) + /* just search, do nothing else */; + /* add at tail */ + pTail->pNext = pThis; + } + +finalize_it: + RETiRet; +} + + +/* find a parser based on the provided name */ +static rsRetVal +FindParser(parser_t **ppParser, uchar *pName) +{ + parserList_t *pThis; + DEFiRet; + + for(pThis = pParsLstRoot ; pThis != NULL ; pThis = pThis->pNext) { + if(ustrcmp(pThis->pParser->pName, pName) == 0) { + *ppParser = pThis->pParser; + FINALIZE; /* found it, iRet still eq. OK! */ + } + } + + iRet = RS_RET_PARSER_NOT_FOUND; + +finalize_it: + RETiRet; +} + + +/* --- END helper functions for parser list handling --- */ + + BEGINobjConstruct(parser) /* be sure to specify the object type also in END macro! */ ENDobjConstruct(parser) +/* ConstructionFinalizer. The most important chore is to add the parser object + * to our global list of available parsers. + * rgerhards, 2009-11-03 + */ +rsRetVal parserConstructFinalize(parser_t *pThis) +{ + DEFiRet; + + ISOBJ_TYPE_assert(pThis, parser); + CHKiRet(AddParserToList(&pParsLstRoot, pThis)); + DBGPRINTF("parser '%s' added to list of available parser\n", pThis->pName); + +finalize_it: + RETiRet; +} + BEGINobjDestruct(parser) /* be sure to specify the object type also in END and CODESTART macros! */ CODESTARTobjDestruct(parser) + free(pThis->pName); + //TODO: free module! ENDobjDestruct(parser) /***************************RFC 5425 PARSER ******************************************************/ @@ -536,7 +629,7 @@ finalize_it: * rgerhards, 2007-09-14 */ static inline rsRetVal -sanitizeMessage(msg_t *pMsg) +SanitizeMsg(msg_t *pMsg) { DEFiRet; uchar *pszMsg; @@ -656,7 +749,7 @@ ParseMsg(msg_t *pMsg) if(pMsg->iLenRawMsg == 0) ABORT_FINALIZE(RS_RET_EMPTY_MSG); - CHKiRet(sanitizeMessage(pMsg)); + CHKiRet(SanitizeMsg(pMsg)); /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ DBGPRINTF("msg parser: flags %x, from '%s', msg '%s'\n", pMsg->msgFlags, getRcvFrom(pMsg), pMsg->pszRawMsg); @@ -712,6 +805,54 @@ finalize_it: RETiRet; } +/* set the parser name - string is copied over, call can continue to use it, + * but must free it if desired. + */ +static rsRetVal +SetName(parser_t *pThis, uchar *name) +{ + DEFiRet; + + ISOBJ_TYPE_assert(pThis, parser); + assert(name != NULL); + + if(pThis->pName != NULL) { + free(pThis->pName); + pThis->pName = NULL; + } + + CHKmalloc(pThis->pName = ustrdup(name)); + +finalize_it: + RETiRet; +} + + +/* set a pointer to "our" module. Note that no module + * pointer must already be set. + */ +static rsRetVal +SetModPtr(parser_t *pThis, modInfo_t *pMod) +{ + ISOBJ_TYPE_assert(pThis, parser); + assert(pMod != NULL); + assert(pThis->pModule == NULL); + pThis->pModule = pMod; + return RS_RET_OK; +} + + +/* Specify if we should do standard message sanitazion before we pass the data + * down to the parser. + */ +static rsRetVal +SetDoSanitazion(parser_t *pThis, int bDoIt) +{ + ISOBJ_TYPE_assert(pThis, parser); + pThis->bDoSanitazion = bDoIt; + return RS_RET_OK; +} + /* queryInterface function-- rgerhards, 2009-11-03 */ @@ -726,7 +867,17 @@ CODESTARTobjQueryInterface(parser) * work here (if we can support an older interface version - that, * of course, also affects the "if" above). */ + pIf->Construct = parserConstruct; + pIf->ConstructFinalize = parserConstructFinalize; + pIf->Destruct = parserDestruct; + pIf->SetName = SetName; + pIf->SetModPtr = SetModPtr; + pIf->SetDoSanitazion = SetDoSanitazion; pIf->ParseMsg = ParseMsg; + pIf->SanitizeMsg = SanitizeMsg; + pIf->InitParserList = InitParserList; + pIf->AddParserToList = AddParserToList; + pIf->FindParser = FindParser; finalize_it: ENDobjQueryInterface(parser) @@ -750,8 +901,8 @@ resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unus * before anything else is called inside this class. * rgerhards, 2009-11-02 */ -//BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ -BEGINAbstractObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ +BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ +//BEGINAbstractObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ /* request objects we use */ CHKiRet(objUse(glbl, CORE_COMPONENT)); CHKiRet(objUse(errmsg, CORE_COMPONENT)); @@ -763,5 +914,7 @@ BEGINAbstractObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscapeCCOnRcv, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL)); + + InitParserList(&pParsLstRoot); ENDObjClassInit(parser) -- cgit v1.2.3 From 1b7f5c54684db29c096e09238648a45dce78ebee Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 4 Nov 2009 10:40:27 +0100 Subject: moved rfc3164/5424 code to new parser modules another milestone commit: the program works, the new interface is used, some more cleanup is needed and the per-ruleset config options are still missing. But we are getting closer... --- runtime/parser.c | 476 ++++++++----------------------------------------------- 1 file changed, 69 insertions(+), 407 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index f59d1974..2c764739 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -40,6 +40,7 @@ #include "datetime.h" #include "errmsg.h" #include "parser.h" +#include "ruleset.h" #include "unicode-helper.h" #include "dirty.h" #include "cfsysline.h" @@ -52,21 +53,25 @@ DEFobjStaticHelpers DEFobjCurrIf(glbl) DEFobjCurrIf(errmsg) DEFobjCurrIf(datetime) +DEFobjCurrIf(ruleset) /* static data */ -static int bParseHOSTNAMEandTAG; /* cache for the equally-named global param - performance enhancement */ /* config data */ static uchar cCCEscapeChar = '#';/* character to be used to start an escape sequence for control chars */ static int bEscapeCCOnRcv = 1; /* escape control characters on reception: 0 - no, 1 - yes */ static int bDropTrailingLF = 1; /* drop trailing LF's on reception? */ -/* we create a small helper list of parsers so that we can obtain their - * handles if the config needs one. This is also used to unload all modules on - * shutdown. +/* This is the list of all parsers known to us. + * This is also used to unload all modules on shutdown. */ parserList_t *pParsLstRoot = NULL; +/* this is the list of the default parsers, to be used if no others + * are specified. + */ +parserList_t *pDfltParsLst = NULL; + /* intialize (but NOT allocate) a parser list. Primarily meant as a hook * which can be used to extend the list in the future. So far, just sets @@ -139,6 +144,27 @@ finalize_it: /* --- END helper functions for parser list handling --- */ +/* Add a an already existing parser to the default list. As usual, order + * of calls is important (most importantly, that means the legacy parser, + * which can process everything, MUST be added last!). + * rgerhards, 2009-11-04 + */ +static rsRetVal +AddDfltParser(uchar *pName) +{ + parser_t *pParser; + DEFiRet; + + CHKiRet(FindParser(&pParser, pName)); + CHKiRet(AddParserToList(&pDfltParsLst, pParser)); + dbgprintf("Parser '%s' added to default parser set.\n", pName); + +finalize_it: + RETiRet; +} + + + BEGINobjConstruct(parser) /* be sure to specify the object type also in END macro! */ ENDobjConstruct(parser) @@ -152,7 +178,7 @@ rsRetVal parserConstructFinalize(parser_t *pThis) ISOBJ_TYPE_assert(pThis, parser); CHKiRet(AddParserToList(&pParsLstRoot, pThis)); - DBGPRINTF("parser '%s' added to list of available parser\n", pThis->pName); + DBGPRINTF("Parser '%s' added to list of available parsers.\n", pThis->pName); finalize_it: RETiRet; @@ -164,383 +190,6 @@ CODESTARTobjDestruct(parser) //TODO: free module! ENDobjDestruct(parser) -/***************************RFC 5425 PARSER ******************************************************/ - - -/* Helper to parseRFCSyslogMsg. This function parses a field up to - * (and including) the SP character after it. The field contents is - * returned in a caller-provided buffer. The parsepointer is advanced - * to after the terminating SP. The caller must ensure that the - * provided buffer is large enough to hold the to be extracted value. - * Returns 0 if everything is fine or 1 if either the field is not - * SP-terminated or any other error occurs. -- rger, 2005-11-24 - * The function now receives the size of the string and makes sure - * that it does not process more than that. The *pLenStr counter is - * updated on exit. -- rgerhards, 2009-09-23 - */ -static int parseRFCField(uchar **pp2parse, uchar *pResult, int *pLenStr) -{ - uchar *p2parse; - int iRet = 0; - - assert(pp2parse != NULL); - assert(*pp2parse != NULL); - assert(pResult != NULL); - - p2parse = *pp2parse; - - /* this is the actual parsing loop */ - while(*pLenStr > 0 && *p2parse != ' ') { - *pResult++ = *p2parse++; - --(*pLenStr); - } - - if(*pLenStr > 0 && *p2parse == ' ') { - ++p2parse; /* eat SP, but only if not at end of string */ - --(*pLenStr); - } else { - iRet = 1; /* there MUST be an SP! */ - } - *pResult = '\0'; - - /* set the new parse pointer */ - *pp2parse = p2parse; - return 0; -} - - -/* Helper to parseRFCSyslogMsg. This function parses the structured - * data field of a message. It does NOT parse inside structured data, - * just gets the field as whole. Parsing the single entities is left - * to other functions. The parsepointer is advanced - * to after the terminating SP. The caller must ensure that the - * provided buffer is large enough to hold the to be extracted value. - * Returns 0 if everything is fine or 1 if either the field is not - * SP-terminated or any other error occurs. -- rger, 2005-11-24 - * The function now receives the size of the string and makes sure - * that it does not process more than that. The *pLenStr counter is - * updated on exit. -- rgerhards, 2009-09-23 - */ -static int parseRFCStructuredData(uchar **pp2parse, uchar *pResult, int *pLenStr) -{ - uchar *p2parse; - int bCont = 1; - int iRet = 0; - int lenStr; - - assert(pp2parse != NULL); - assert(*pp2parse != NULL); - assert(pResult != NULL); - - p2parse = *pp2parse; - lenStr = *pLenStr; - - /* this is the actual parsing loop - * Remeber: structured data starts with [ and includes any characters - * until the first ] followed by a SP. There may be spaces inside - * structured data. There may also be \] inside the structured data, which - * do NOT terminate an element. - */ - if(lenStr == 0 || *p2parse != '[') - return 1; /* this is NOT structured data! */ - - if(*p2parse == '-') { /* empty structured data? */ - *pResult++ = '-'; - ++p2parse; - --lenStr; - } else { - while(bCont) { - if(lenStr < 2) { - /* we now need to check if we have only structured data */ - if(lenStr > 0 && *p2parse == ']') { - *pResult++ = *p2parse; - p2parse++; - lenStr--; - bCont = 0; - } else { - iRet = 1; /* this is not valid! */ - bCont = 0; - } - } else if(*p2parse == '\\' && *(p2parse+1) == ']') { - /* this is escaped, need to copy both */ - *pResult++ = *p2parse++; - *pResult++ = *p2parse++; - lenStr -= 2; - } else if(*p2parse == ']' && *(p2parse+1) == ' ') { - /* found end, just need to copy the ] and eat the SP */ - *pResult++ = *p2parse; - p2parse += 2; - lenStr -= 2; - bCont = 0; - } else { - *pResult++ = *p2parse++; - --lenStr; - } - } - } - - if(lenStr > 0 && *p2parse == ' ') { - ++p2parse; /* eat SP, but only if not at end of string */ - --lenStr; - } else { - iRet = 1; /* there MUST be an SP! */ - } - *pResult = '\0'; - - /* set the new parse pointer */ - *pp2parse = p2parse; - *pLenStr = lenStr; - return 0; -} - -/* parse a RFC5424-formatted syslog message. This function returns - * 0 if processing of the message shall continue and 1 if something - * went wrong and this messe should be ignored. This function has been - * implemented in the effort to support syslog-protocol. Please note that - * the name (parse *RFC*) stems from the hope that syslog-protocol will - * some time become an RFC. Do not confuse this with informational - * RFC 3164 (which is legacy syslog). - * - * currently supported format: - * - * VERSION SP TIMESTAMP SP HOSTNAME SP APP-NAME SP PROCID SP MSGID SP [SD-ID]s SP MSG - * - * is already stripped when this function is entered. VERSION already - * has been confirmed to be "1", but has NOT been stripped from the message. - * - * rger, 2005-11-24 - */ -static int parseRFCSyslogMsg(msg_t *pMsg, int flags) -{ - uchar *p2parse; - uchar *pBuf; - int lenMsg; - int bContParse = 1; - - BEGINfunc - assert(pMsg != NULL); - assert(pMsg->pszRawMsg != NULL); - p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ - lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI; - - /* do a sanity check on the version and eat it (the caller checked this already) */ - assert(p2parse[0] == '1' && p2parse[1] == ' '); - p2parse += 2; - lenMsg -= 2; - - /* Now get us some memory we can use as a work buffer while parsing. - * We simply allocated a buffer sufficiently large to hold all of the - * message, so we can not run into any troubles. I think this is - * more wise then to use individual buffers. - */ - if((pBuf = MALLOC(sizeof(uchar) * (lenMsg + 1))) == NULL) - return 1; - - /* IMPORTANT NOTE: - * Validation is not actually done below nor are any errors handled. I have - * NOT included this for the current proof of concept. However, it is strongly - * advisable to add it when this code actually goes into production. - * rgerhards, 2005-11-24 - */ - - /* TIMESTAMP */ - if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - if(flags & IGNDATE) { - /* we need to ignore the msg data, so simply copy over reception date */ - memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); - } - } else { - DBGPRINTF("no TIMESTAMP detected!\n"); - bContParse = 0; - } - - /* HOSTNAME */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetHOSTNAME(pMsg, pBuf, ustrlen(pBuf)); - } - - /* APP-NAME */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetAPPNAME(pMsg, (char*)pBuf); - } - - /* PROCID */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetPROCID(pMsg, (char*)pBuf); - } - - /* MSGID */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetMSGID(pMsg, (char*)pBuf); - } - - /* STRUCTURED-DATA */ - if(bContParse) { - parseRFCStructuredData(&p2parse, pBuf, &lenMsg); - MsgSetStructuredData(pMsg, (char*)pBuf); - } - - /* MSG */ - MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); - - free(pBuf); - ENDfunc - return 0; /* all ok */ -} - - -/*********************** END RFC5425 PARSER ******************************************/ - -/***************************RFC 5425 PARSER ******************************************************/ - - -/* parse a legay-formatted syslog message. This function returns - * 0 if processing of the message shall continue and 1 if something - * went wrong and this messe should be ignored. This function has been - * implemented in the effort to support syslog-protocol. - * rger, 2005-11-24 - * As of 2006-01-10, I am removing the logic to continue parsing only - * when a valid TIMESTAMP is detected. Validity of other fields already - * is ignored. This is due to the fact that the parser has grown smarter - * and is now more able to understand different dialects of the syslog - * message format. I do not expect any bad side effects of this change, - * but I thought I log it in this comment. - * rgerhards, 2006-01-10 - */ -static int parseLegacySyslogMsg(msg_t *pMsg, int flags) -{ - uchar *p2parse; - int lenMsg; - int bTAGCharDetected; - int i; /* general index for parsing */ - uchar bufParseTAG[CONF_TAG_MAXSIZE]; - uchar bufParseHOSTNAME[CONF_TAG_HOSTNAME]; - BEGINfunc - - assert(pMsg != NULL); - assert(pMsg->pszRawMsg != NULL); - lenMsg = pMsg->iLenRawMsg - (pMsg->offAfterPRI + 1); - p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ - - /* Check to see if msg contains a timestamp. We start by assuming - * that the message timestamp is the time of reception (which we - * generated ourselfs and then try to actually find one inside the - * message. There we go from high-to low precison and are done - * when we find a matching one. -- rgerhards, 2008-09-16 - */ - if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - /* we are done - parse pointer is moved by ParseTIMESTAMP3339 */; - } else if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; - } else if(*p2parse == ' ' && lenMsg > 1) { /* try to see if it is slighly malformed - HP procurve seems to do that sometimes */ - ++p2parse; /* move over space */ - --lenMsg; - if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - /* indeed, we got it! */ - /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; - } else {/* parse pointer needs to be restored, as we moved it off-by-one - * for this try. - */ - --p2parse; - ++lenMsg; - } - } - - if(flags & IGNDATE) { - /* we need to ignore the msg data, so simply copy over reception date */ - memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); - } - - /* rgerhards, 2006-03-13: next, we parse the hostname and tag. But we - * do this only when the user has not forbidden this. I now introduce some - * code that allows a user to configure rsyslogd to treat the rest of the - * message as MSG part completely. In this case, the hostname will be the - * machine that we received the message from and the tag will be empty. This - * is meant to be an interim solution, but for now it is in the code. - */ - if(bParseHOSTNAMEandTAG && !(flags & INTERNAL_MSG)) { - /* parse HOSTNAME - but only if this is network-received! - * rger, 2005-11-14: we still have a problem with BSD messages. These messages - * do NOT include a host name. In most cases, this leads to the TAG to be treated - * as hostname and the first word of the message as the TAG. Clearly, this is not - * of advantage ;) I think I have now found a way to handle this situation: there - * are certain characters which are frequently used in TAG (e.g. ':'), which are - * *invalid* in host names. So while parsing the hostname, I check for these characters. - * If I find them, I set a simple flag but continue. After parsing, I check the flag. - * If it was set, then we most probably do not have a hostname but a TAG. Thus, I change - * the fields. I think this logic shall work with any type of syslog message. - * rgerhards, 2009-06-23: and I now have extended this logic to every character - * that is not a valid hostname. - */ - bTAGCharDetected = 0; - if(lenMsg > 0 && flags & PARSE_HOSTNAME) { - i = 0; - while(i < lenMsg && (isalnum(p2parse[i]) || p2parse[i] == '.' || p2parse[i] == '.' - || p2parse[i] == '_' || p2parse[i] == '-') && i < CONF_TAG_MAXSIZE) { - bufParseHOSTNAME[i] = p2parse[i]; - ++i; - } - - if(i > 0 && p2parse[i] == ' ' && isalnum(p2parse[i-1])) { - /* we got a hostname! */ - p2parse += i + 1; /* "eat" it (including SP delimiter) */ - lenMsg -= i + 1; - bufParseHOSTNAME[i] = '\0'; - MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i); - } - } - - /* now parse TAG - that should be present in message from all sources. - * This code is somewhat not compliant with RFC 3164. As of 3164, - * the TAG field is ended by any non-alphanumeric character. In - * practice, however, the TAG often contains dashes and other things, - * which would end the TAG. So it is not desirable. As such, we only - * accept colon and SP to be terminators. Even there is a slight difference: - * a colon is PART of the TAG, while a SP is NOT part of the tag - * (it is CONTENT). Starting 2008-04-04, we have removed the 32 character - * size limit (from RFC3164) on the tag. This had bad effects on existing - * envrionments, as sysklogd didn't obey it either (probably another bug - * in RFC3164...). We now receive the full size, but will modify the - * outputs so that only 32 characters max are used by default. - */ - i = 0; - while(lenMsg > 0 && *p2parse != ':' && *p2parse != ' ' && i < CONF_TAG_MAXSIZE) { - bufParseTAG[i++] = *p2parse++; - --lenMsg; - } - if(lenMsg > 0 && *p2parse == ':') { - ++p2parse; - --lenMsg; - bufParseTAG[i++] = ':'; - } - - /* no TAG can only be detected if the message immediatly ends, in which case an empty TAG - * is considered OK. So we do not need to check for empty TAG. -- rgerhards, 2009-06-23 - */ - bufParseTAG[i] = '\0'; /* terminate string */ - MsgSetTAG(pMsg, bufParseTAG, i); - } else {/* we enter this code area when the user has instructed rsyslog NOT - * to parse HOSTNAME and TAG - rgerhards, 2006-03-13 - */ - if(!(flags & INTERNAL_MSG)) { - DBGPRINTF("HOSTNAME and TAG not parsed by user configuraton.\n"); - } - } - - /* The rest is the actual MSG */ - MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); - - ENDfunc - return 0; /* all ok */ -} - - -/***************************END RFC 5425 PARSER ******************************************************/ - /* uncompress a received message if it is compressed. * pMsg->pszRawMsg buffer is updated. @@ -645,10 +294,6 @@ SanitizeMsg(msg_t *pMsg) assert(pMsg != NULL); assert(pMsg->iLenRawMsg > 0); -# ifdef USE_NETZIP - CHKiRet(uncompressMessage(pMsg)); -# endif - pszMsg = pMsg->pszRawMsg; lenMsg = pMsg->iLenRawMsg; @@ -740,15 +385,22 @@ finalize_it: static rsRetVal ParseMsg(msg_t *pMsg) { - DEFiRet; uchar *msg; int pri; int lenMsg; int iPriText; + rsRetVal localRet; + parserList_t *pParserList; + static int iErrMsgRateLimiter = 0; + DEFiRet; if(pMsg->iLenRawMsg == 0) ABORT_FINALIZE(RS_RET_EMPTY_MSG); +# ifdef USE_NETZIP + CHKiRet(uncompressMessage(pMsg)); +# endif + CHKiRet(SanitizeMsg(pMsg)); /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ @@ -777,25 +429,35 @@ ParseMsg(msg_t *pMsg) pMsg->iSeverity = LOG_PRI(pri); MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg); - /* rger 2005-11-24 (happy thanksgiving!): we now need to check if we have - * a traditional syslog message or one formatted according to syslog-protocol. - * We need to apply different parsers depending on that. We use the - * -protocol VERSION field for the detection. + /* we now need to go through our list of parsers and see which one is capable of + * parsing the message. */ - if(msg[0] == '1' && msg[1] == ' ') { - dbgprintf("Message has syslog-protocol format.\n"); - setProtocolVersion(pMsg, 1); - if(parseRFCSyslogMsg(pMsg, pMsg->msgFlags) == 1) { - msgDestruct(&pMsg); - ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! - } - } else { /* we have legacy syslog */ - dbgprintf("Message has legacy syslog format.\n"); - setProtocolVersion(pMsg, 0); - if(parseLegacySyslogMsg(pMsg, pMsg->msgFlags) == 1) { - msgDestruct(&pMsg); - ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! + pParserList = ruleset.GetParserList(pMsg); + if(pParserList == NULL) + pParserList = pDfltParsLst; + DBGPRINTF("Using parser list %p%s.\n", pParserList, + (pParserList == pDfltParsLst) ? " (the default list)" : ""); + + while(pParserList != NULL) { + localRet = pParserList->pParser->pModule->mod.pm.parse(pMsg); + if(localRet != RS_RET_COULD_NOT_PARSE) + break; + pParserList = pParserList->pNext; + } + + /* We need to log a warning message and drop the message if we did not find a parser. + * Note that we log at most the first 1000 message, as this may very well be a problem + * that causes a message generation loop. We do not synchronize that counter, it doesn't + * matter if we log a handful messages more than we should... + */ + if(localRet != RS_RET_OK) { + if(++iErrMsgRateLimiter > 1000) { + errmsg.LogError(0, localRet, "Error: one message could not be processed by " + "any parser, message is being discarded (start of raw msg: '%50s')", + pMsg->pszRawMsg); } + DBGPRINTF("No parser could process the message (state %d), we need to discard it.\n", localRet); + ABORT_FINALIZE(localRet); } /* finalize message object */ @@ -877,6 +539,7 @@ CODESTARTobjQueryInterface(parser) pIf->SanitizeMsg = SanitizeMsg; pIf->InitParserList = InitParserList; pIf->AddParserToList = AddParserToList; + pIf->AddDfltParser = AddDfltParser; pIf->FindParser = FindParser; finalize_it: ENDobjQueryInterface(parser) @@ -902,13 +565,11 @@ resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unus * rgerhards, 2009-11-02 */ BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ -//BEGINAbstractObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ /* request objects we use */ CHKiRet(objUse(glbl, CORE_COMPONENT)); CHKiRet(objUse(errmsg, CORE_COMPONENT)); CHKiRet(objUse(datetime, CORE_COMPONENT)); - - bParseHOSTNAMEandTAG = glbl.GetParseHOSTNAMEandTAG(); /* cache value, is set only during rsyslogd option processing */ + CHKiRet(objUse(ruleset, CORE_COMPONENT)); CHKiRet(regCfSysLineHdlr((uchar *)"controlcharacterescapeprefix", 0, eCmdHdlrGetChar, NULL, &cCCEscapeChar, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL)); @@ -916,5 +577,6 @@ BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL)); InitParserList(&pParsLstRoot); + InitParserList(&pDfltParsLst); ENDObjClassInit(parser) -- cgit v1.2.3 From ef661fe13c0ab5018afedb1cb5b8cffab05ad7c4 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 4 Nov 2009 11:33:09 +0100 Subject: finalized parser module calling interface looks like we are almost done and need only to add the ruleset parser-specific config options. --- runtime/parser.c | 93 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 26 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 2c764739..e87068fc 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -376,36 +376,19 @@ finalize_it: RETiRet; } - -/* Parse a received message. The object's rawmsg property is taken and - * parsed according to the relevant standards. This can later be - * extended to support configured parsers. - * rgerhards, 2008-10-09 +/* A standard parser to parse out the PRI. This is made available in + * this module as it is expected that allmost all parsers will need + * that functionality and so they do not need to implement it themsleves. */ -static rsRetVal -ParseMsg(msg_t *pMsg) +static inline rsRetVal +ParsePRI(msg_t *pMsg) { - uchar *msg; int pri; + uchar *msg; int lenMsg; int iPriText; - rsRetVal localRet; - parserList_t *pParserList; - static int iErrMsgRateLimiter = 0; DEFiRet; - if(pMsg->iLenRawMsg == 0) - ABORT_FINALIZE(RS_RET_EMPTY_MSG); - -# ifdef USE_NETZIP - CHKiRet(uncompressMessage(pMsg)); -# endif - - CHKiRet(SanitizeMsg(pMsg)); - - /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ - DBGPRINTF("msg parser: flags %x, from '%s', msg '%s'\n", pMsg->msgFlags, getRcvFrom(pMsg), pMsg->pszRawMsg); - /* pull PRI */ lenMsg = pMsg->iLenRawMsg; msg = pMsg->pszRawMsg; @@ -428,9 +411,43 @@ ParseMsg(msg_t *pMsg) pMsg->iFacility = LOG_FAC(pri); pMsg->iSeverity = LOG_PRI(pri); MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg); + RETiRet; +} + + +/* Parse a received message. The object's rawmsg property is taken and + * parsed according to the relevant standards. This can later be + * extended to support configured parsers. + * rgerhards, 2008-10-09 + */ +static rsRetVal +ParseMsg(msg_t *pMsg) +{ + rsRetVal localRet; + parserList_t *pParserList; + parser_t *pParser; + bool bIsSanitized; + bool bPRIisParsed; + static int iErrMsgRateLimiter = 0; + DEFiRet; + + if(pMsg->iLenRawMsg == 0) + ABORT_FINALIZE(RS_RET_EMPTY_MSG); + +# ifdef USE_NETZIP + CHKiRet(uncompressMessage(pMsg)); +# endif + + /* we take the risk to print a non-sanitized string, because this is the best we can get + * (and that functionality is too important for debugging to drop it...). + */ + DBGPRINTF("msg parser: flags %x, from '%s', msg '%.50s'\n", pMsg->msgFlags, + getRcvFrom(pMsg), pMsg->pszRawMsg); /* we now need to go through our list of parsers and see which one is capable of - * parsing the message. + * parsing the message. Note that the first parser that requires message sanitization + * will cause it to happen. After that, access to the unsanitized message is no + * loger possible. */ pParserList = ruleset.GetParserList(pMsg); if(pParserList == NULL) @@ -438,7 +455,18 @@ ParseMsg(msg_t *pMsg) DBGPRINTF("Using parser list %p%s.\n", pParserList, (pParserList == pDfltParsLst) ? " (the default list)" : ""); + bIsSanitized = FALSE; + bPRIisParsed = FALSE; while(pParserList != NULL) { + pParser = pParserList->pParser; + if(pParser->bDoSanitazion && bIsSanitized == FALSE) { + CHKiRet(SanitizeMsg(pMsg)); + if(pParser->bDoPRIParsing && bPRIisParsed == FALSE) { + CHKiRet(ParsePRI(pMsg)); + bPRIisParsed = TRUE; + } + bIsSanitized = TRUE; + } localRet = pParserList->pParser->pModule->mod.pm.parse(pMsg); if(localRet != RS_RET_COULD_NOT_PARSE) break; @@ -453,14 +481,14 @@ ParseMsg(msg_t *pMsg) if(localRet != RS_RET_OK) { if(++iErrMsgRateLimiter > 1000) { errmsg.LogError(0, localRet, "Error: one message could not be processed by " - "any parser, message is being discarded (start of raw msg: '%50s')", + "any parser, message is being discarded (start of raw msg: '%.50s')", pMsg->pszRawMsg); } DBGPRINTF("No parser could process the message (state %d), we need to discard it.\n", localRet); ABORT_FINALIZE(localRet); } - /* finalize message object */ + /* "finalize" message object */ pMsg->msgFlags &= ~NEEDS_PARSING; /* this message is now parsed */ finalize_it: @@ -516,6 +544,18 @@ SetDoSanitazion(parser_t *pThis, int bDoIt) } +/* Specify if we should do standard PRI parsing before we pass the data + * down to the parser module. + */ +static rsRetVal +SetDoPRIParsing(parser_t *pThis, int bDoIt) +{ + ISOBJ_TYPE_assert(pThis, parser); + pThis->bDoPRIParsing = bDoIt; + return RS_RET_OK; +} + + /* queryInterface function-- rgerhards, 2009-11-03 */ BEGINobjQueryInterface(parser) @@ -535,6 +575,7 @@ CODESTARTobjQueryInterface(parser) pIf->SetName = SetName; pIf->SetModPtr = SetModPtr; pIf->SetDoSanitazion = SetDoSanitazion; + pIf->SetDoPRIParsing = SetDoPRIParsing; pIf->ParseMsg = ParseMsg; pIf->SanitizeMsg = SanitizeMsg; pIf->InitParserList = InitParserList; -- cgit v1.2.3 From b6a92e2f23895cadf9f0ea2de64021912bd2eeb0 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 4 Nov 2009 14:21:08 +0100 Subject: added $RulesetParser config directive The implementation is now almost done and works, including doc. I now need to verify shutdown, guess there are some resource leaks left... --- runtime/parser.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index e87068fc..0689657f 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -450,9 +450,10 @@ ParseMsg(msg_t *pMsg) * loger possible. */ pParserList = ruleset.GetParserList(pMsg); - if(pParserList == NULL) + if(pParserList == NULL) { pParserList = pDfltParsLst; - DBGPRINTF("Using parser list %p%s.\n", pParserList, + } + DBGPRINTF("parse using parser list %p%s.\n", pParserList, (pParserList == pDfltParsLst) ? " (the default list)" : ""); bIsSanitized = FALSE; @@ -467,7 +468,8 @@ ParseMsg(msg_t *pMsg) } bIsSanitized = TRUE; } - localRet = pParserList->pParser->pModule->mod.pm.parse(pMsg); + localRet = pParser->pModule->mod.pm.parse(pMsg); + dbgprintf("Parser '%s' returned %d\n", pParser->pName, localRet); if(localRet != RS_RET_COULD_NOT_PARSE) break; pParserList = pParserList->pNext; -- cgit v1.2.3 From aa2e8ea15b2001f131ebd196c180cc82aceb57b4 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 4 Nov 2009 15:34:31 +0100 Subject: first complete implementation of loadable parser system I have now done the necessary cleanup. Looks like everything is in place. Unfortunately, I do not yet have any actual parser that is not built-in, but I think we can postpone working on that when the first one appears. I don't expect troubles in that case, but you never know ;) --- runtime/parser.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 0689657f..38f72986 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -85,6 +85,27 @@ InitParserList(parserList_t **pListRoot) } +/* destruct a parser list. The list elements are destroyed, but the parser objects + * themselves are not modified. (That is done at a late stage during rsyslogd + * shutdown and need not be considered here.) + */ +static rsRetVal +DestructParserList(parserList_t **ppListRoot) +{ + parserList_t *pParsLst; + parserList_t *pParsLstDel; + + pParsLst = *ppListRoot; + while(pParsLst != NULL) { + pParsLstDel = pParsLst; + pParsLst = pParsLst->pNext; + free(pParsLstDel); + } + *ppListRoot = NULL; + return RS_RET_OK; +} + + /* Add a parser to the list. We use a VERY simple and ineffcient algorithm, * but it is employed only for a few milliseconds during config processing. So * I prefer to keep it very simple and with simple data structures. Unfortunately, @@ -143,7 +164,6 @@ finalize_it: /* --- END helper functions for parser list handling --- */ - /* Add a an already existing parser to the default list. As usual, order * of calls is important (most importantly, that means the legacy parser, * which can process everything, MUST be added last!). @@ -186,8 +206,8 @@ finalize_it: BEGINobjDestruct(parser) /* be sure to specify the object type also in END and CODESTART macros! */ CODESTARTobjDestruct(parser) + dbgprintf("destructing parser '%s'\n", pThis->pName); free(pThis->pName); - //TODO: free module! ENDobjDestruct(parser) @@ -581,6 +601,7 @@ CODESTARTobjQueryInterface(parser) pIf->ParseMsg = ParseMsg; pIf->SanitizeMsg = SanitizeMsg; pIf->InitParserList = InitParserList; + pIf->DestructParserList = DestructParserList; pIf->AddParserToList = AddParserToList; pIf->AddDfltParser = AddDfltParser; pIf->FindParser = FindParser; @@ -602,6 +623,37 @@ resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unus return RS_RET_OK; } +/* This destroys the master parserlist and all of its parser entries. MUST only be + * done when the module is shut down. Parser modules are NOT unloaded, rsyslog + * does that at a later stage for all dynamically loaded modules. + */ +static void +destroyMasterParserList(void) +{ + parserList_t *pParsLst; + parserList_t *pParsLstDel; + + pParsLst = pParsLstRoot; + while(pParsLst != NULL) { + parserDestruct(&pParsLst->pParser); + pParsLstDel = pParsLst; + pParsLst = pParsLst->pNext; + free(pParsLstDel); + } +} + +/* Exit our class. + * rgerhards, 2009-11-04 + */ +BEGINObjClassExit(parser, OBJ_IS_CORE_MODULE) /* class, version */ + DestructParserList(&pDfltParsLst); + destroyMasterParserList(); + objRelease(glbl, CORE_COMPONENT); + objRelease(errmsg, CORE_COMPONENT); + objRelease(datetime, CORE_COMPONENT); + objRelease(ruleset, CORE_COMPONENT); +ENDObjClassExit(parser) + /* Initialize the parser class. Must be called as the very first method * before anything else is called inside this class. -- cgit v1.2.3 From 8c29f2843591376f8a1abca5f09a9740927f1000 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 17 Nov 2009 08:11:31 +0100 Subject: moved DNS resolution code out of imudp and into the backend processing Most importantly, DNS resolution now never happens if the resolved name is not required. Note that this applies to imudp - for the other inputs, DNS resolution almost comes for free, so we do not do it there. However, the new method has been implemented in a generic way and as such may also be used by other modules in the future. --- runtime/parser.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 38f72986..0686bd91 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -462,7 +462,8 @@ ParseMsg(msg_t *pMsg) * (and that functionality is too important for debugging to drop it...). */ DBGPRINTF("msg parser: flags %x, from '%s', msg '%.50s'\n", pMsg->msgFlags, - getRcvFrom(pMsg), pMsg->pszRawMsg); + (pMsg->msgFlags & NEEDS_DNSRESOL) ? UCHAR_CONSTANT("~NOTRESOLVED~") : getRcvFrom(pMsg), + pMsg->pszRawMsg); /* we now need to go through our list of parsers and see which one is capable of * parsing the message. Note that the first parser that requires message sanitization -- cgit v1.2.3 From 18399f11cd48c12a1314c59475d6e26889cecf49 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 27 Nov 2009 11:26:09 +0100 Subject: re-implemented $EscapeControlCharacterTab config directive Based on Jonathan Bond-Caron's patch for v4. This now also includes some automatted tests. --- runtime/parser.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 0686bd91..b5245795 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -60,6 +60,7 @@ DEFobjCurrIf(ruleset) /* config data */ static uchar cCCEscapeChar = '#';/* character to be used to start an escape sequence for control chars */ static int bEscapeCCOnRcv = 1; /* escape control characters on reception: 0 - no, 1 - yes */ +static int bEscapeTab = 1; /* escape tab control character when doing CC escapes: 0 - no, 1 - yes */ static int bDropTrailingLF = 1; /* drop trailing LF's on reception? */ /* This is the list of all parsers known to us. @@ -339,6 +340,11 @@ SanitizeMsg(msg_t *pMsg) * needs sanitation than to do the sanitation in any case. So we first do * this and terminate when it is not needed - which is expectedly the case * for the vast majority of messages. -- rgerhards, 2009-06-15 + * Note that we do NOT check here if tab characters are to be escaped or + * not. I expect this functionality to be seldomly used and thus I do not + * like to pay the performance penalty. So the penalty is only with those + * that actually use it, because we may call the sanitizer without actual + * need below (but it then still will work perfectly well!). -- rgerhards, 2009-11-27 */ int bNeedSanitize = 0; for(iSrc = 0 ; iSrc < lenMsg ; iSrc++) { @@ -367,7 +373,7 @@ SanitizeMsg(msg_t *pMsg) CHKmalloc(pDst = MALLOC(sizeof(uchar) * (iMaxLine + 1))); iSrc = iDst = 0; while(iSrc < lenMsg && iDst < maxDest - 3) { /* leave some space if last char must be escaped */ - if(iscntrl((int) pszMsg[iSrc])) { + if(iscntrl((int) pszMsg[iSrc]) && (pszMsg[iSrc] != '\t' || bEscapeTab)) { /* note: \0 must always be escaped, the rest of the code currently * can not handle it! -- rgerhards, 2009-08-26 */ @@ -619,6 +625,7 @@ resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unus { cCCEscapeChar = '#'; bEscapeCCOnRcv = 1; /* default is to escape control characters */ + bEscapeTab = 1; /* default is to escape control characters */ bDropTrailingLF = 1; /* default is to drop trailing LF's on reception */ return RS_RET_OK; @@ -670,6 +677,7 @@ BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ CHKiRet(regCfSysLineHdlr((uchar *)"controlcharacterescapeprefix", 0, eCmdHdlrGetChar, NULL, &cCCEscapeChar, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscapeCCOnRcv, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactertab", 0, eCmdHdlrBinary, NULL, &bEscapeTab, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL)); InitParserList(&pParsLstRoot); -- cgit v1.2.3 From 85045270f69e4dcb25c409c9661e96e3172d7f30 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 27 Jan 2010 15:13:54 +0100 Subject: added $Escape8BitCharactersOnReceive directive Thanks to David Lang for suggesting this functionality. --- runtime/parser.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index b5245795..cbeb01e2 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -60,6 +60,7 @@ DEFobjCurrIf(ruleset) /* config data */ static uchar cCCEscapeChar = '#';/* character to be used to start an escape sequence for control chars */ static int bEscapeCCOnRcv = 1; /* escape control characters on reception: 0 - no, 1 - yes */ +static int bEscape8BitChars = 0; /* escape characters > 127 on reception: 0 - no, 1 - yes */ static int bEscapeTab = 1; /* escape tab control character when doing CC escapes: 0 - no, 1 - yes */ static int bDropTrailingLF = 1; /* drop trailing LF's on reception? */ @@ -353,6 +354,9 @@ SanitizeMsg(msg_t *pMsg) bNeedSanitize = 1; break; } + } else if(pszMsg[iSrc] > 127 && bEscape8BitChars) { + bNeedSanitize = 1; + break; } } @@ -387,6 +391,14 @@ SanitizeMsg(msg_t *pMsg) pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0070) >> 3); pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0007)); } + } else if(pszMsg[iSrc] > 127 && bEscape8BitChars) { + /* In this case, we also do the conversion. Note that this most + * probably breaks European languages. -- rgerhards, 2010-01-27 + */ + pDst[iDst++] = cCCEscapeChar; + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0300) >> 6); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0070) >> 3); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0007)); } else { pDst[iDst++] = pszMsg[iSrc]; } @@ -625,6 +637,7 @@ resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unus { cCCEscapeChar = '#'; bEscapeCCOnRcv = 1; /* default is to escape control characters */ + bEscape8BitChars = 0; /* default is to escape control characters */ bEscapeTab = 1; /* default is to escape control characters */ bDropTrailingLF = 1; /* default is to drop trailing LF's on reception */ @@ -677,6 +690,7 @@ BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ CHKiRet(regCfSysLineHdlr((uchar *)"controlcharacterescapeprefix", 0, eCmdHdlrGetChar, NULL, &cCCEscapeChar, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscapeCCOnRcv, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"escape8bitcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscape8BitChars, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactertab", 0, eCmdHdlrBinary, NULL, &bEscapeTab, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL)); -- cgit v1.2.3 From 38cb3926727c0ad29f3950db43ba12248e867b89 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 2 Feb 2010 15:51:01 +0100 Subject: replaced data type "bool" by "sbool" because this created some portability issues --- runtime/parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index cbeb01e2..ca31b35d 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -310,7 +310,7 @@ SanitizeMsg(msg_t *pMsg) size_t iDst; size_t iMaxLine; size_t maxDest; - bool bUpdatedLen = FALSE; + sbool bUpdatedLen = FALSE; uchar szSanBuf[32*1024]; /* buffer used for sanitizing a string */ assert(pMsg != NULL); @@ -464,8 +464,8 @@ ParseMsg(msg_t *pMsg) rsRetVal localRet; parserList_t *pParserList; parser_t *pParser; - bool bIsSanitized; - bool bPRIisParsed; + sbool bIsSanitized; + sbool bPRIisParsed; static int iErrMsgRateLimiter = 0; DEFiRet; -- cgit v1.2.3 From 302ad02e7781892856c1cacaf98a87f90db9571c Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 31 Mar 2010 16:21:58 +0200 Subject: temporary bugfix replaced by permanent one for... ...message-induced off-by-one error (potential segfault) (see 4.6.2) The analysis has been completed and a better fix been crafted and integrated. --- runtime/parser.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 466066e7..36e88ebd 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -176,7 +176,10 @@ sanitizeMessage(msg_t *pMsg) pszMsg = pMsg->pszRawMsg; lenMsg = pMsg->iLenRawMsg; - /* remove NUL character at end of message (see comment in function header) */ + /* remove NUL character at end of message (see comment in function header) + * Note that we do not need to add a NUL character in this case, because it + * is already present ;) + */ if(pszMsg[lenMsg-1] == '\0') { DBGPRINTF("dropped NUL at very end of message\n"); bUpdatedLen = TRUE; @@ -190,8 +193,9 @@ sanitizeMessage(msg_t *pMsg) */ if(bDropTrailingLF && pszMsg[lenMsg-1] == '\n') { DBGPRINTF("dropped LF at very end of message (DropTrailingLF is set)\n"); - bUpdatedLen = TRUE; lenMsg--; + pszMsg[lenMsg] = '\0'; + bUpdatedLen = TRUE; } /* it is much quicker to sweep over the message and see if it actually @@ -245,6 +249,7 @@ sanitizeMessage(msg_t *pMsg) } ++iSrc; } + pDst[iDst] = '\0'; MsgSetRawMsg(pMsg, (char*)pDst, iDst); /* save sanitized string */ -- cgit v1.2.3 From 2cd132eebb84dbcffcf0c20b9354c14f797c29cd Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 7 Apr 2010 12:42:41 +0200 Subject: enhanced nettester tool so that it re-uses it's callers environment this enables us to work with the "usual" environment tweaks (for debugging and other purposes), without the need for any special handling in nettester itself --- runtime/parser.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 466066e7..36e88ebd 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -176,7 +176,10 @@ sanitizeMessage(msg_t *pMsg) pszMsg = pMsg->pszRawMsg; lenMsg = pMsg->iLenRawMsg; - /* remove NUL character at end of message (see comment in function header) */ + /* remove NUL character at end of message (see comment in function header) + * Note that we do not need to add a NUL character in this case, because it + * is already present ;) + */ if(pszMsg[lenMsg-1] == '\0') { DBGPRINTF("dropped NUL at very end of message\n"); bUpdatedLen = TRUE; @@ -190,8 +193,9 @@ sanitizeMessage(msg_t *pMsg) */ if(bDropTrailingLF && pszMsg[lenMsg-1] == '\n') { DBGPRINTF("dropped LF at very end of message (DropTrailingLF is set)\n"); - bUpdatedLen = TRUE; lenMsg--; + pszMsg[lenMsg] = '\0'; + bUpdatedLen = TRUE; } /* it is much quicker to sweep over the message and see if it actually @@ -245,6 +249,7 @@ sanitizeMessage(msg_t *pMsg) } ++iSrc; } + pDst[iDst] = '\0'; MsgSetRawMsg(pMsg, (char*)pDst, iDst); /* save sanitized string */ -- cgit v1.2.3 From 587036bfb03167d86b0a2fbfe998db1a916cabb3 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 19 Apr 2010 17:03:08 +0200 Subject: changed imsolaris to use submitMsg() API This includes a modification to the rsyslog engine so that messages without PRI inside the message can properly be handled. --- runtime/parser.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 36e88ebd..810bf42b 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -287,19 +287,24 @@ rsRetVal parseMsg(msg_t *pMsg) msg = pMsg->pszRawMsg; pri = DEFUPRI; iPriText = 0; - if(*msg == '<') { - /* while we process the PRI, we also fill the PRI textual representation - * inside the msg object. This may not be ideal from an OOP point of view, - * but it offers us performance... - */ - pri = 0; - while(--lenMsg > 0 && isdigit((int) *++msg)) { - pri = 10 * pri + (*msg - '0'); + if(pMsg->msgFlags & NO_PRI_IN_RAW) { + /* In this case, simply do so as if the pri would be right at top */ + MsgSetAfterPRIOffs(pMsg, 0); + } else { + if(*msg == '<') { + /* while we process the PRI, we also fill the PRI textual representation + * inside the msg object. This may not be ideal from an OOP point of view, + * but it offers us performance... + */ + pri = 0; + while(--lenMsg > 0 && isdigit((int) *++msg)) { + pri = 10 * pri + (*msg - '0'); + } + if(*msg == '>') + ++msg; + if(pri & ~(LOG_FACMASK|LOG_PRIMASK)) + pri = DEFUPRI; } - if(*msg == '>') - ++msg; - if(pri & ~(LOG_FACMASK|LOG_PRIMASK)) - pri = DEFUPRI; } pMsg->iFacility = LOG_FAC(pri); pMsg->iSeverity = LOG_PRI(pri); -- cgit v1.2.3 From 96895d9e471716f4a449589fefa9f80a0564755a Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 26 Apr 2010 11:43:14 +0200 Subject: bugfix(minor): status variable was uninitialized However, this would have caused harm only if NO parser modules at all were loaded, which would lead to a defunctional configuration at all. And, even more important, this is impossible as two parser modules are built-in and thus can not be "not loaded", so we always have a minimum of two. --- runtime/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index ca20a11e..cacbe065 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -448,7 +448,7 @@ ParsePRI(msg_t *pMsg) static rsRetVal ParseMsg(msg_t *pMsg) { - rsRetVal localRet; + rsRetVal localRet = RS_RET_ERR; parserList_t *pParserList; parser_t *pParser; bool bIsSanitized; -- cgit v1.2.3 From 559cb84a79a9848ce1415569158928478991108c Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 10 Jun 2010 10:17:04 +0200 Subject: fixing msg duplication & loss regression, causes slowdown messages could get lost or be duplicated due to non-proper sync of transactions. This is a notable slowdown again, but we know how to get back concurrency, it just takes "some" more programming. It is important now to come back to correct code, so that we can base further improvements on that. --- runtime/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index bd0bf8e9..40374ae1 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -489,7 +489,7 @@ ParseMsg(msg_t *pMsg) /* we take the risk to print a non-sanitized string, because this is the best we can get * (and that functionality is too important for debugging to drop it...). */ - DBGPRINTF("msg parser: flags %x, from '%s', msg '%.50s'\n", pMsg->msgFlags, + DBGPRINTF("msg parser: flags %x, from '%s', msg '%.60s'\n", pMsg->msgFlags, (pMsg->msgFlags & NEEDS_DNSRESOL) ? UCHAR_CONSTANT("~NOTRESOLVED~") : getRcvFrom(pMsg), pMsg->pszRawMsg); -- cgit v1.2.3 From e64cd212432c2cf76245888499461e9c8bf73243 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 21 Jul 2010 18:08:19 +0200 Subject: moving towards scoping inside rsyslog.conf first step: adding object-type specifier to config statement table --- runtime/parser.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 40374ae1..f1a4bbf4 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -697,12 +697,12 @@ BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ CHKiRet(objUse(datetime, CORE_COMPONENT)); CHKiRet(objUse(ruleset, CORE_COMPONENT)); - CHKiRet(regCfSysLineHdlr((uchar *)"controlcharacterescapeprefix", 0, eCmdHdlrGetChar, NULL, &cCCEscapeChar, NULL)); - CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL)); - CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscapeCCOnRcv, NULL)); - CHKiRet(regCfSysLineHdlr((uchar *)"escape8bitcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscape8BitChars, NULL)); - CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactertab", 0, eCmdHdlrBinary, NULL, &bEscapeTab, NULL)); - CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"controlcharacterescapeprefix", 0, eCmdHdlrGetChar, NULL, &cCCEscapeChar, NULL, eConfObjGlobal)); + CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL, eConfObjGlobal)); + CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscapeCCOnRcv, NULL, eConfObjGlobal)); + CHKiRet(regCfSysLineHdlr((uchar *)"escape8bitcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscape8BitChars, NULL, eConfObjGlobal)); + CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactertab", 0, eCmdHdlrBinary, NULL, &bEscapeTab, NULL, eConfObjGlobal)); + CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL, eConfObjGlobal)); InitParserList(&pParsLstRoot); InitParserList(&pDfltParsLst); -- cgit v1.2.3 From 371a8eec29fa25bbf58f4b1f0d7e3bf4c3ad6329 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 16 Dec 2010 12:57:55 +0100 Subject: some cleanup based on clang static analyzer results --- runtime/parser.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 36e88ebd..d27f3e38 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -272,7 +272,6 @@ rsRetVal parseMsg(msg_t *pMsg) uchar *msg; int pri; int lenMsg; - int iPriText; if(pMsg->iLenRawMsg == 0) ABORT_FINALIZE(RS_RET_EMPTY_MSG); @@ -286,7 +285,6 @@ rsRetVal parseMsg(msg_t *pMsg) lenMsg = pMsg->iLenRawMsg; msg = pMsg->pszRawMsg; pri = DEFUPRI; - iPriText = 0; if(*msg == '<') { /* while we process the PRI, we also fill the PRI textual representation * inside the msg object. This may not be ideal from an OOP point of view, -- cgit v1.2.3 From d4539663f685910db1e8d7816508298b940c8590 Mon Sep 17 00:00:00 2001 From: Corey Smith Date: Thu, 24 Mar 2011 17:16:59 +0100 Subject: bugfix: PRI was invalid on Solaris for message from local log socket Signed-off-by: root --- runtime/parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index fdda9546..be9304d7 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -303,10 +303,10 @@ rsRetVal parseMsg(msg_t *pMsg) if(pri & ~(LOG_FACMASK|LOG_PRIMASK)) pri = DEFUPRI; } + pMsg->iFacility = LOG_FAC(pri); + pMsg->iSeverity = LOG_PRI(pri); + MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg); } - pMsg->iFacility = LOG_FAC(pri); - pMsg->iSeverity = LOG_PRI(pri); - MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg); /* rger 2005-11-24 (happy thanksgiving!): we now need to check if we have * a traditional syslog message or one formatted according to syslog-protocol. -- cgit v1.2.3 From eefc9b6eb69c130015955cfb15dc45e36e431d5a Mon Sep 17 00:00:00 2001 From: Corey Smith Date: Thu, 24 Mar 2011 17:25:03 +0100 Subject: bugfix: PRI was invalid on Solaris for message from local log socket Signed-off-by: root --- runtime/parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index d3644976..b385c54b 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -453,10 +453,10 @@ ParsePRI(msg_t *pMsg) if(pri & ~(LOG_FACMASK|LOG_PRIMASK)) pri = DEFUPRI; } + pMsg->iFacility = LOG_FAC(pri); + pMsg->iSeverity = LOG_PRI(pri); + MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg); } - pMsg->iFacility = LOG_FAC(pri); - pMsg->iSeverity = LOG_PRI(pri); - MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg); RETiRet; } -- cgit v1.2.3 From 542fd3300671a2480ff009e060b32f08fed44b4d Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 19 Apr 2011 12:37:39 +0200 Subject: some more work was required on the ruleset modification --- runtime/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 2e991a9d..14ccb49a 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -496,7 +496,7 @@ ParseMsg(msg_t *pMsg) * will cause it to happen. After that, access to the unsanitized message is no * loger possible. */ - pParserList = ruleset.GetParserList(pMsg); + pParserList = ruleset.GetParserList(ourConf, pMsg); if(pParserList == NULL) { pParserList = pDfltParsLst; } -- cgit v1.2.3 From a2e69cffff21ad433e61b45f91a2be4d5ebc5c72 Mon Sep 17 00:00:00 2001 From: Tomas Heinrich Date: Tue, 10 Jan 2012 17:14:16 +0100 Subject: added $SpaceLFOnReceive config directive Signed-off-by: Rainer Gerhards --- runtime/parser.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 466066e7..57b7bf8f 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -202,9 +202,13 @@ sanitizeMessage(msg_t *pMsg) int bNeedSanitize = 0; for(iSrc = 0 ; iSrc < lenMsg ; iSrc++) { if(iscntrl(pszMsg[iSrc])) { + if(bSpaceLFOnRcv && pszMsg[iSrc] == '\n') + pszMsg[iSrc] = ' '; + else if(pszMsg[iSrc] == '\0' || bEscapeCCOnRcv) { bNeedSanitize = 1; - break; + if (!bSpaceLFOnRcv) + break; } } } -- cgit v1.2.3 From d4024e98d0a7cef5830c71992513bbef0e342874 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 10 Jan 2012 18:04:18 +0100 Subject: part of merge, but forgot to add to merge set --- runtime/parser.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index ffaaac25..300db1e0 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -60,6 +60,7 @@ DEFobjCurrIf(ruleset) /* config data */ static uchar cCCEscapeChar = '#';/* character to be used to start an escape sequence for control chars */ static int bEscapeCCOnRcv = 1; /* escape control characters on reception: 0 - no, 1 - yes */ +static int bSpaceLFOnRcv = 0; /* replace newlines with spaces on reception: 0 - no, 1 - yes */ static int bEscape8BitChars = 0; /* escape characters > 127 on reception: 0 - no, 1 - yes */ static int bEscapeTab = 1; /* escape tab control character when doing CC escapes: 0 - no, 1 - yes */ static int bDropTrailingLF = 1; /* drop trailing LF's on reception? */ @@ -649,6 +650,7 @@ resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unus { cCCEscapeChar = '#'; bEscapeCCOnRcv = 1; /* default is to escape control characters */ + bSpaceLFOnRcv = 0; bEscape8BitChars = 0; /* default is to escape control characters */ bEscapeTab = 1; /* default is to escape control characters */ bDropTrailingLF = 1; /* default is to drop trailing LF's on reception */ @@ -702,6 +704,7 @@ BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ CHKiRet(regCfSysLineHdlr((uchar *)"controlcharacterescapeprefix", 0, eCmdHdlrGetChar, NULL, &cCCEscapeChar, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscapeCCOnRcv, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"spacelfonreceive", 0, eCmdHdlrBinary, NULL, &bSpaceLFOnRcv, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"escape8bitcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscape8BitChars, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactertab", 0, eCmdHdlrBinary, NULL, &bEscapeTab, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL)); -- cgit v1.2.3 From 9b24152b6ac6af942a62675c6136316fd487b661 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 18 Jan 2012 18:21:08 +0100 Subject: undoing v6.1 config scoping interface, part I The most common files are now changed, more needs to be done. --- runtime/parser.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 2e991a9d..b385c54b 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -695,12 +695,12 @@ BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ CHKiRet(objUse(datetime, CORE_COMPONENT)); CHKiRet(objUse(ruleset, CORE_COMPONENT)); - CHKiRet(regCfSysLineHdlr((uchar *)"controlcharacterescapeprefix", 0, eCmdHdlrGetChar, NULL, &cCCEscapeChar, NULL, eConfObjGlobal)); - CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL, eConfObjGlobal)); - CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscapeCCOnRcv, NULL, eConfObjGlobal)); - CHKiRet(regCfSysLineHdlr((uchar *)"escape8bitcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscape8BitChars, NULL, eConfObjGlobal)); - CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactertab", 0, eCmdHdlrBinary, NULL, &bEscapeTab, NULL, eConfObjGlobal)); - CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL, eConfObjGlobal)); + CHKiRet(regCfSysLineHdlr((uchar *)"controlcharacterescapeprefix", 0, eCmdHdlrGetChar, NULL, &cCCEscapeChar, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscapeCCOnRcv, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"escape8bitcharactersonreceive", 0, eCmdHdlrBinary, NULL, &bEscape8BitChars, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"escapecontrolcharactertab", 0, eCmdHdlrBinary, NULL, &bEscapeTab, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL)); InitParserList(&pParsLstRoot); InitParserList(&pDfltParsLst); -- cgit v1.2.3 From 439a5b70f2216616e9d55072be43658d5a7fe5d7 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 29 May 2012 12:42:46 +0200 Subject: improved support for --enable-debugless Now it's probably time to prove the effect in practice... --- runtime/parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index a79f2ce8..f0515484 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -180,7 +180,7 @@ AddDfltParser(uchar *pName) CHKiRet(FindParser(&pParser, pName)); CHKiRet(AddParserToList(&pDfltParsLst, pParser)); - dbgprintf("Parser '%s' added to default parser set.\n", pName); + DBGPRINTF("Parser '%s' added to default parser set.\n", pName); finalize_it: RETiRet; @@ -209,7 +209,7 @@ finalize_it: BEGINobjDestruct(parser) /* be sure to specify the object type also in END and CODESTART macros! */ CODESTARTobjDestruct(parser) - dbgprintf("destructing parser '%s'\n", pThis->pName); + DBGPRINTF("destructing parser '%s'\n", pThis->pName); free(pThis->pName); ENDobjDestruct(parser) @@ -521,7 +521,7 @@ ParseMsg(msg_t *pMsg) bIsSanitized = TRUE; } localRet = pParser->pModule->mod.pm.parse(pMsg); - dbgprintf("Parser '%s' returned %d\n", pParser->pName, localRet); + DBGPRINTF("Parser '%s' returned %d\n", pParser->pName, localRet); if(localRet != RS_RET_COULD_NOT_PARSE) break; pParserList = pParserList->pNext; -- cgit v1.2.3 From 9faf2240c4a8f09f3f6c2c9bbd47e48520524e03 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 22 Aug 2012 15:29:02 +0200 Subject: changed TRUE/FALSE to RSTRUE/RSFALSE This is done to prevent name claches with libraries. --- runtime/parser.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 300db1e0..180814cf 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -311,7 +311,7 @@ SanitizeMsg(msg_t *pMsg) size_t iDst; size_t iMaxLine; size_t maxDest; - sbool bUpdatedLen = FALSE; + sbool bUpdatedLen = RSFALSE; uchar szSanBuf[32*1024]; /* buffer used for sanitizing a string */ assert(pMsg != NULL); @@ -326,7 +326,7 @@ SanitizeMsg(msg_t *pMsg) */ if(pszMsg[lenMsg-1] == '\0') { DBGPRINTF("dropped NUL at very end of message\n"); - bUpdatedLen = TRUE; + bUpdatedLen = RSTRUE; lenMsg--; } @@ -339,7 +339,7 @@ SanitizeMsg(msg_t *pMsg) DBGPRINTF("dropped LF at very end of message (DropTrailingLF is set)\n"); lenMsg--; pszMsg[lenMsg] = '\0'; - bUpdatedLen = TRUE; + bUpdatedLen = RSTRUE; } /* it is much quicker to sweep over the message and see if it actually @@ -370,7 +370,7 @@ SanitizeMsg(msg_t *pMsg) } if(!bNeedSanitize) { - if(bUpdatedLen == TRUE) + if(bUpdatedLen == RSTRUE) MsgSetRawMsgSize(pMsg, lenMsg); FINALIZE; } @@ -508,17 +508,17 @@ ParseMsg(msg_t *pMsg) DBGPRINTF("parse using parser list %p%s.\n", pParserList, (pParserList == pDfltParsLst) ? " (the default list)" : ""); - bIsSanitized = FALSE; - bPRIisParsed = FALSE; + bIsSanitized = RSFALSE; + bPRIisParsed = RSFALSE; while(pParserList != NULL) { pParser = pParserList->pParser; - if(pParser->bDoSanitazion && bIsSanitized == FALSE) { + if(pParser->bDoSanitazion && bIsSanitized == RSFALSE) { CHKiRet(SanitizeMsg(pMsg)); - if(pParser->bDoPRIParsing && bPRIisParsed == FALSE) { + if(pParser->bDoPRIParsing && bPRIisParsed == RSFALSE) { CHKiRet(ParsePRI(pMsg)); - bPRIisParsed = TRUE; + bPRIisParsed = RSTRUE; } - bIsSanitized = TRUE; + bIsSanitized = RSTRUE; } localRet = pParser->pModule->mod.pm.parse(pMsg); dbgprintf("Parser '%s' returned %d\n", pParser->pName, localRet); -- cgit v1.2.3 From 72930188588e54474298d464b943868f60300d7c Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 2 Oct 2012 17:25:32 +0200 Subject: add "parser" parameter to ruleset(), so that parser chain can be configured --- runtime/parser.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 645ea0f4..b40edf4c 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -143,6 +143,14 @@ finalize_it: RETiRet; } +void +printParserList(parserList_t *pList) +{ + while(pList != NULL) { + dbgprintf("parser: %s\n", pList->pParser->pName); + pList = pList->pNext; + } +} /* find a parser based on the provided name */ static rsRetVal -- cgit v1.2.3 From 49d1203b3582f7d637af11a226386a4df186e687 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 14 Jan 2013 15:52:42 +0100 Subject: optimize: iscntrl() seems to be surprisingly slow at least so tells the profiler... --- runtime/parser.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index b40edf4c..777683fb 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -362,11 +362,10 @@ SanitizeMsg(msg_t *pMsg) */ int bNeedSanitize = 0; for(iSrc = 0 ; iSrc < lenMsg ; iSrc++) { - if(iscntrl(pszMsg[iSrc])) { + if(pszMsg[iSrc] < 32) { if(bSpaceLFOnRcv && pszMsg[iSrc] == '\n') pszMsg[iSrc] = ' '; - else - if(pszMsg[iSrc] == '\0' || bEscapeCCOnRcv) { + else if(pszMsg[iSrc] == '\0' || bEscapeCCOnRcv) { bNeedSanitize = 1; if (!bSpaceLFOnRcv) break; @@ -394,7 +393,7 @@ SanitizeMsg(msg_t *pMsg) CHKmalloc(pDst = MALLOC(sizeof(uchar) * (iMaxLine + 1))); iSrc = iDst = 0; while(iSrc < lenMsg && iDst < maxDest - 3) { /* leave some space if last char must be escaped */ - if(iscntrl((int) pszMsg[iSrc]) && (pszMsg[iSrc] != '\t' || bEscapeTab)) { + if((pszMsg[iSrc] < 32) && (pszMsg[iSrc] != '\t' || bEscapeTab)) { /* note: \0 must always be escaped, the rest of the code currently * can not handle it! -- rgerhards, 2009-08-26 */ -- cgit v1.2.3 From beed1bda6969b70b608ff9e606f52ac6d41cc8c1 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 14 Jan 2013 16:35:38 +0100 Subject: optimize: save inspection of already-inspected data this is just a small improvement, but let's get the benefit ;) --- runtime/parser.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 777683fb..74b28f4c 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -382,7 +382,9 @@ SanitizeMsg(msg_t *pMsg) FINALIZE; } - /* now copy over the message and sanitize it */ + /* now copy over the message and sanitize it. Note that up to iSrc-1 there was + * obviously no need to sanitize, so we can go over that quickly... + */ iMaxLine = glbl.GetMaxLine(); maxDest = lenMsg * 4; /* message can grow at most four-fold */ if(maxDest > iMaxLine) @@ -391,7 +393,11 @@ SanitizeMsg(msg_t *pMsg) pDst = szSanBuf; else CHKmalloc(pDst = MALLOC(sizeof(uchar) * (iMaxLine + 1))); - iSrc = iDst = 0; + if(iSrc > 0) { + iSrc--; /* go back to where everything is OK */ + memcpy(pDst, pszMsg, iSrc); /* fast copy known good */ + } + iDst = iSrc; while(iSrc < lenMsg && iDst < maxDest - 3) { /* leave some space if last char must be escaped */ if((pszMsg[iSrc] < 32) && (pszMsg[iSrc] != '\t' || bEscapeTab)) { /* note: \0 must always be escaped, the rest of the code currently -- cgit v1.2.3