From d798f5b66f4ae699f352b6c40abd07495eff8f94 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 23 Jun 2009 11:22:07 +0200 Subject: some more optimizations - done malloc() instead of calloc() for msg_t, as we have large space which needs not be initialized - shrunk syslogTime structure in the hope to get better cache and write performance (non-aligned data should not hurt much here) --- runtime/msg.c | 51 +++++++++++++++++++++++++++++++++++++++++++++---- runtime/msg.h | 5 +++++ runtime/obj.h | 2 +- runtime/rsyslog.h | 1 + runtime/syslogd-types.h | 30 +++++++++++++++-------------- 5 files changed, 70 insertions(+), 19 deletions(-) diff --git a/runtime/msg.c b/runtime/msg.c index 1a864648..6ad0ee4f 100644 --- a/runtime/msg.c +++ b/runtime/msg.c @@ -425,6 +425,11 @@ rsRetVal MsgEnableThreadSafety(void) * itself but rather uses a user-supplied value. This enables the caller * to do some tricks to save processing time (done, for example, in the * udp input). + * NOTE: this constructor does NOT call calloc(), as we have many bytes + * inside the structure which do not need to be cleared. bzero() will + * heavily thrash the cache, so we do the init manually (which also + * is the right thing to do with pointers, as they are not neccessarily + * a binary 0 on all machines [but today almost always...]). * rgerhards, 2008-10-06 */ static inline rsRetVal msgBaseConstruct(msg_t **ppThis) @@ -433,15 +438,53 @@ static inline rsRetVal msgBaseConstruct(msg_t **ppThis) msg_t *pM; assert(ppThis != NULL); - if((pM = calloc(1, sizeof(msg_t))) == NULL) - ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY); + CHKmalloc(pM = malloc(sizeof(msg_t))); + objConstructSetObjInfo(pM); /* intialize object helper entities */ - /* initialize members that are non-zero */ + /* initialize members in ORDER they appear in structure (think "cache line"!) */ + pM->flowCtlType = 0; + pM->bDoLock = 0; + pM->bParseHOSTNAME = 0; pM->iRefCount = 1; pM->iSeverity = -1; pM->iFacility = -1; + pM->offAfterPRI = 0; pM->offMSG = -1; - objConstructSetObjInfo(pM); + pM->iLenInputName = 0; + pM->iProtocolVersion = 0; + pM->msgFlags = 0; + pM->iLenRawMsg = 0; + pM->iLenMSG = 0; + pM->iLenTAG = 0; + pM->iLenHOSTNAME = 0; + pM->iLenRcvFrom = 0; + pM->iLenRcvFromIP = 0; + pM->pszRawMsg = NULL; + pM->pszHOSTNAME = NULL; + pM->pszRcvFrom = NULL; + pM->pszRcvFromIP = NULL; + pM->pszInputName = NULL; + pM->pszRcvdAt3164 = NULL; + pM->pszRcvdAt3339 = NULL; + pM->pszRcvdAt_MySQL = NULL; + pM->pszRcvdAt_PgSQL = NULL; + pM->pszTIMESTAMP3164 = NULL; + pM->pszTIMESTAMP3339 = NULL; + pM->pszTIMESTAMP_MySQL = NULL; + pM->pszTIMESTAMP_PgSQL = NULL; + pM->pCSProgName = NULL; + pM->pCSStrucData = NULL; + pM->pCSAPPNAME = NULL; + pM->pCSPROCID = NULL; + pM->pCSMSGID = NULL; + pM->pRuleset = NULL; + memset(&pM->tRcvdAt, 0, sizeof(pM->tRcvdAt)); + memset(&pM->tTIMESTAMP, 0, sizeof(pM->tTIMESTAMP)); + pM->TAG.pszTAG = NULL; + pM->pszTimestamp3164[0] = '\0'; + pM->pszTimestamp3339[0] = '\0'; + pM->pszTIMESTAMP_SecFrac[0] = '\0'; + pM->pszRcvdAt_SecFrac[0] = '\0'; /* DEV debugging only! dbgprintf("msgConstruct\t0x%x, ref 1\n", (int)pM);*/ diff --git a/runtime/msg.h b/runtime/msg.h index 0f9bd95e..9113882a 100644 --- a/runtime/msg.h +++ b/runtime/msg.h @@ -51,6 +51,11 @@ * will be decremented. If it is 1, however, the object is actually * destroyed. To make this work, it is vital that MsgAddRef() is * called each time a "copy" is stored somewhere. + * + * WARNING: this structure is not calloc()ed, so be careful when + * adding new fields. You need to initialize them in + * msgBaseConstruct(). That function header comment also describes + * why this is the case. */ struct msg { BEGINobjInstance; /* Data to implement generic object - MUST be the first data element! */ diff --git a/runtime/obj.h b/runtime/obj.h index 3973a16e..50868cc9 100644 --- a/runtime/obj.h +++ b/runtime/obj.h @@ -77,8 +77,8 @@ /* the next macro MUST be called in Constructors: */ #ifndef NDEBUG /* this means if debug... */ # define objConstructSetObjInfo(pThis) \ - ASSERT(((obj_t*) (pThis))->pObjInfo == NULL); \ ((obj_t*) (pThis))->pObjInfo = pObjInfoOBJ; \ + ((obj_t*) (pThis))->pszName = NULL; \ ((obj_t*) (pThis))->iObjCooCKiE = 0xBADEFEE #else # define objConstructSetObjInfo(pThis) ((obj_t*) (pThis))->pObjInfo = pObjInfoOBJ diff --git a/runtime/rsyslog.h b/runtime/rsyslog.h index 692b1327..ac068d5e 100644 --- a/runtime/rsyslog.h +++ b/runtime/rsyslog.h @@ -101,6 +101,7 @@ typedef struct strmLstnPortList_s strmLstnPortList_t; // TODO: rename? typedef long long int64; typedef long long unsigned uint64; typedef int64 number_t; /* type to use for numbers - TODO: maybe an autoconf option? */ +typedef char intTiny; /* 0..127! */ #ifdef __hpux typedef unsigned int u_int32_t; /* TODO: is this correct? */ diff --git a/runtime/syslogd-types.h b/runtime/syslogd-types.h index be0dfdd8..83b38f28 100644 --- a/runtime/syslogd-types.h +++ b/runtime/syslogd-types.h @@ -78,23 +78,25 @@ typedef enum _EHostnameCmpMode EHostnameCmpMode; /* rgerhards 2004-11-11: the following structure represents * a time as it is used in syslog. + * rgerhards, 2009-06-23: packed structure for better cache performance + * (but left ultimate decision about packing to compiler) */ struct syslogTime { - int timeType; /* 0 - unitinialized , 1 - RFC 3164, 2 - syslog-protocol */ - int year; - int month; - int day; - int hour; /* 24 hour clock */ - int minute; - int second; - int secfrac; /* fractional seconds (must be 32 bit!) */ - int secfracPrecision; + intTiny timeType; /* 0 - unitinialized , 1 - RFC 3164, 2 - syslog-protocol */ + intTiny month; + intTiny day; + intTiny hour; /* 24 hour clock */ + intTiny minute; + intTiny second; + intTiny secfracPrecision; + intTiny OffsetMinute; /* UTC offset in minutes */ + intTiny OffsetHour; /* UTC offset in hours + * full UTC offset minutes = OffsetHours*60 + OffsetMinute. Then use + * OffsetMode to know the direction. + */ char OffsetMode; /* UTC offset + or - */ - char OffsetHour; /* UTC offset in hours */ - int OffsetMinute; /* UTC offset in minutes */ - /* full UTC offset minutes = OffsetHours*60 + OffsetMinute. Then use - * OffsetMode to know the direction. - */ + short year; + int secfrac; /* fractional seconds (must be 32 bit!) */ }; typedef struct syslogTime syslogTime_t; -- cgit v1.2.3 From 6181156b1c42825bac892d3e1284dcb2d4fcf5d3 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 23 Jun 2009 11:33:11 +0200 Subject: fix: previous patch aborted in release mode --- runtime/obj.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/runtime/obj.h b/runtime/obj.h index 50868cc9..419d29cc 100644 --- a/runtime/obj.h +++ b/runtime/obj.h @@ -81,7 +81,9 @@ ((obj_t*) (pThis))->pszName = NULL; \ ((obj_t*) (pThis))->iObjCooCKiE = 0xBADEFEE #else -# define objConstructSetObjInfo(pThis) ((obj_t*) (pThis))->pObjInfo = pObjInfoOBJ +# define objConstructSetObjInfo(pThis) \ + ((obj_t*) (pThis))->pObjInfo = pObjInfoOBJ; \ + ((obj_t*) (pThis))->pszName = NULL #endif #define objDestruct(pThis) (((obj_t*) (pThis))->pObjInfo->objMethods[objMethod_DESTRUCT])(&pThis) #define objSerialize(pThis) (((obj_t*) (pThis))->pObjInfo->objMethods[objMethod_SERIALIZE]) -- cgit v1.2.3 From b50d13a6a97c0b6fa14807775ae0edf52ef015fb Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 23 Jun 2009 14:50:03 +0200 Subject: restored repeated message reduction processing --- action.c | 22 +++++++---------- runtime/msg.c | 70 ++++++++++++++++++++++++++++++++++++++++++------------- runtime/msg.h | 5 +--- runtime/rsyslog.h | 3 +++ runtime/ruleset.c | 7 ++---- tools/syslogd.c | 1 + 6 files changed, 69 insertions(+), 39 deletions(-) diff --git a/action.c b/action.c index d214e808..2040d6bd 100644 --- a/action.c +++ b/action.c @@ -4,7 +4,7 @@ * * File begun on 2007-08-06 by RGerhards (extracted from syslogd.c) * - * Copyright 2007 Rainer Gerhards and Adiscon GmbH. + * Copyright 2007-2009 Rainer Gerhards and Adiscon GmbH. * * This file is part of rsyslog. * @@ -239,10 +239,7 @@ rsRetVal actionConstruct(action_t **ppThis) ASSERT(ppThis != NULL); - if((pThis = (action_t*) calloc(1, sizeof(action_t))) == NULL) { - ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY); - } - + CHKmalloc(pThis = (action_t*) calloc(1, sizeof(action_t))); pThis->iResumeInterval = glbliActionResumeInterval; pThis->iResumeRetryCount = glbliActionResumeRetryCount; pThis->tLastOccur = time(NULL); /* done once per action on startup only */ @@ -658,7 +655,7 @@ actionWriteToAction(action_t *pAction) if(pAction->iNbrNoExec < pAction->iExecEveryNthOccur - 1) { ++pAction->iNbrNoExec; dbgprintf("action %p passed %d times to execution - less than neded - discarding\n", - pAction, pAction->iNbrNoExec); + pAction, pAction->iNbrNoExec); FINALIZE; } else { pAction->iNbrNoExec = 0; /* we execute the action now, so the number of no execs is down to */ @@ -675,6 +672,7 @@ actionWriteToAction(action_t *pAction) */ if(pAction->f_prevcount > 1) { msg_t *pMsg; + size_t lenRepMsg; uchar szRepMsg[1024]; if((pMsg = MsgDup(pAction->f_pMsg)) == NULL) { @@ -684,23 +682,19 @@ actionWriteToAction(action_t *pAction) } if(pAction->bRepMsgHasMsg == 0) { /* old format repeat message? */ - snprintf((char*)szRepMsg, sizeof(szRepMsg), "last message repeated %d times", + lenRepMsg = snprintf((char*)szRepMsg, sizeof(szRepMsg), " last message repeated %d times", pAction->f_prevcount); } else { - snprintf((char*)szRepMsg, sizeof(szRepMsg), "message repeated %d times: [%.800s]", + lenRepMsg = snprintf((char*)szRepMsg, sizeof(szRepMsg), " message repeated %d times: [%.800s]", pAction->f_prevcount, getMSG(pAction->f_pMsg)); } - /* We now need to update the other message properties. - * ... RAWMSG is a problem ... Please note that digital + /* We now need to update the other message properties. Please note that digital * signatures inside the message are also invalidated. */ datetime.getCurrTime(&(pMsg->tRcvdAt), &(pMsg->ttGenTime)); memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); -#pragma warn "need fix msg repeationg handling" - //MsgSetMSG(pMsg, (char*)szRepMsg); - MsgSetRawMsgWOSize(pMsg, (char*)szRepMsg); - + MsgReplaceMSG(pMsg, szRepMsg, lenRepMsg); pMsgSave = pAction->f_pMsg; /* save message pointer for later restoration */ pAction->f_pMsg = pMsg; /* use the new msg (pointer will be restored below) */ } diff --git a/runtime/msg.c b/runtime/msg.c index 6ad0ee4f..8c306aca 100644 --- a/runtime/msg.c +++ b/runtime/msg.c @@ -1733,38 +1733,76 @@ void MsgSetHOSTNAME(msg_t *pMsg, uchar* pszHOSTNAME) } -/* rgerhards 2004-11-09: set MSG in msg object +/* set the offset of the MSG part into the raw msg buffer */ void MsgSetMSGoffs(msg_t *pMsg, short offs) { - assert(pMsg != NULL); - - pMsg->iLenMSG = ustrlen(pMsg->pszRawMsg + offs); + ISOBJ_TYPE_assert(pMsg, msg); + pMsg->iLenMSG = pMsg->iLenRawMsg - offs; pMsg->offMSG = offs; } +/* replace the MSG part of a message. The update actually takes place inside + * rawmsg. + * There are two cases: either the new message will be larger than the new msg + * or it will be less than or equal. If it is less than or equal, we can utilize + * the previous message buffer. If it is larger, we can utilize the msg_t-included + * message buffer if it fits in there. If this is not the case, we need to alloc + * a new, larger, chunk and copy over the data to it. Note that this function is + * (hopefully) relatively seldom being called, so some performance impact is + * uncritical. In any case, pszMSG is copied, so if it was dynamically allocated, + * the caller is responsible for freeing it. + * rgerhards, 2009-06-23 + */ +rsRetVal MsgReplaceMSG(msg_t *pThis, uchar* pszMSG, int lenMSG) +{ + int lenNew; + uchar *bufNew; + DEFiRet; + ISOBJ_TYPE_assert(pThis, msg); + assert(pszMSG != NULL); + + lenNew = pThis->iLenRawMsg + lenMSG - pThis->iLenMSG; + if(lenMSG > pThis->iLenMSG && lenNew >= CONF_RAWMSG_BUFSIZE) { + /* we have lost and need to alloc a new buffer ;) */ + CHKmalloc(bufNew = malloc(lenNew + 1)); + memcpy(bufNew, pThis->pszRawMsg, pThis->offMSG); + free(pThis->pszRawMsg); + pThis->pszRawMsg = bufNew; + } + + memcpy(pThis->pszRawMsg + pThis->offMSG, pszMSG, lenMSG); + pThis->pszRawMsg[lenNew] = '\0'; /* this also works with truncation! */ + pThis->iLenRawMsg = lenNew; + pThis->iLenMSG = lenMSG; + +finalize_it: + RETiRet; +} + + /* set raw message in message object. Size of message is provided. * rgerhards, 2009-06-16 */ -void MsgSetRawMsg(msg_t *pMsg, char* pszRawMsg, size_t lenMsg) +void MsgSetRawMsg(msg_t *pThis, char* pszRawMsg, size_t lenMsg) { - assert(pMsg != NULL); - if(pMsg->pszRawMsg != pMsg->szRawMsg) - free(pMsg->pszRawMsg); + assert(pThis != NULL); + if(pThis->pszRawMsg != pThis->szRawMsg) + free(pThis->pszRawMsg); - pMsg->iLenRawMsg = lenMsg; - if(pMsg->iLenRawMsg < CONF_RAWMSG_BUFSIZE) { + pThis->iLenRawMsg = lenMsg; + if(pThis->iLenRawMsg < CONF_RAWMSG_BUFSIZE) { /* small enough: use fixed buffer (faster!) */ - pMsg->pszRawMsg = pMsg->szRawMsg; - } else if((pMsg->pszRawMsg = (uchar*) malloc(pMsg->iLenRawMsg + 1)) == NULL) { + pThis->pszRawMsg = pThis->szRawMsg; + } else if((pThis->pszRawMsg = (uchar*) malloc(pThis->iLenRawMsg + 1)) == NULL) { /* truncate message, better than completely loosing it... */ - pMsg->pszRawMsg = pMsg->szRawMsg; - pMsg->iLenRawMsg = CONF_RAWMSG_BUFSIZE - 1; + pThis->pszRawMsg = pThis->szRawMsg; + pThis->iLenRawMsg = CONF_RAWMSG_BUFSIZE - 1; } - memcpy(pMsg->pszRawMsg, pszRawMsg, pMsg->iLenRawMsg); - pMsg->pszRawMsg[pMsg->iLenRawMsg] = '\0'; /* this also works with truncation! */ + memcpy(pThis->pszRawMsg, pszRawMsg, pThis->iLenRawMsg); + pThis->pszRawMsg[pThis->iLenRawMsg] = '\0'; /* this also works with truncation! */ } diff --git a/runtime/msg.h b/runtime/msg.h index 9113882a..c38cb788 100644 --- a/runtime/msg.h +++ b/runtime/msg.h @@ -28,10 +28,6 @@ #ifndef MSG_H_INCLUDED #define MSG_H_INCLUDED 1 -/* some configuration constants */ -#define CONF_RAWMSG_BUFSIZE 101 -#define CONF_TAG_BUFSIZE 33 /* RFC says 32 chars (+ \0), but in practice we see longer ones... */ - #include #include "obj.h" #include "syslogd-types.h" @@ -162,6 +158,7 @@ rsRetVal MsgSetAfterPRIOffs(msg_t *pMsg, short offs); void MsgSetMSGoffs(msg_t *pMsg, short offs); void MsgSetRawMsgWOSize(msg_t *pMsg, char* pszRawMsg); void MsgSetRawMsg(msg_t *pMsg, char* pszRawMsg, size_t lenMsg); +rsRetVal MsgReplaceMSG(msg_t *pThis, uchar* pszMSG, int lenMSG); void moveHOSTNAMEtoTAG(msg_t *pM); char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, cstr_t *pCSPropName, size_t *pPropLen, unsigned short *pbMustBeFreed); diff --git a/runtime/rsyslog.h b/runtime/rsyslog.h index ac068d5e..793df782 100644 --- a/runtime/rsyslog.h +++ b/runtime/rsyslog.h @@ -31,6 +31,9 @@ * ############################################################# */ #define RS_STRINGBUF_ALLOC_INCREMENT 128 #define CONF_TAG_MAXSIZE 512 /* a value that is deemed far too large for any valid TAG */ +#define CONF_RAWMSG_BUFSIZE 101 +#define CONF_TAG_BUFSIZE 33 /* RFC says 32 chars (+ \0), but in practice we see longer ones... */ + /* ############################################################# * * # End Config Settings # * diff --git a/runtime/ruleset.c b/runtime/ruleset.c index 93d40e24..d98b4217 100644 --- a/runtime/ruleset.c +++ b/runtime/ruleset.c @@ -84,7 +84,6 @@ DEFFUNC_llExecFunc(doIterateRulesetActions) iRet = rule.IterateAllActions(pRule, pMyParam->pFunc, pMyParam->pParam); RETiRet; } -#if 0 /* iterate over all actions of THIS rule set. */ static rsRetVal @@ -96,7 +95,7 @@ iterateRulesetAllActions(ruleset_t *pThis, rsRetVal (*pFunc)(void*, void*), void params.pFunc = pFunc; params.pParam = pParam; - CHKiRet(llExecFunc(&llRulesets, doIterateRulesetActions, ¶ms)); + CHKiRet(llExecFunc(&(pThis->llRules), doIterateRulesetActions, ¶ms)); finalize_it: RETiRet; @@ -112,7 +111,6 @@ DEFFUNC_llExecFunc(doIterateAllActions) iRet = iterateRulesetAllActions(pThis, pMyParam->pFunc, pMyParam->pParam); RETiRet; } -#endif /* iterate over ALL actions present in the WHOLE system. * this is often needed, for example when HUP processing * must be done or a shutdown is pending. @@ -126,8 +124,7 @@ iterateAllActions(rsRetVal (*pFunc)(void*, void*), void* pParam) params.pFunc = pFunc; params.pParam = pParam; - //CHKiRet(llExecFunc(&llRulesets, doIterateAllActions, ¶ms)); - CHKiRet(llExecFunc(&llRulesets, doIterateRulesetActions, ¶ms)); + CHKiRet(llExecFunc(&llRulesets, doIterateAllActions, ¶ms)); finalize_it: RETiRet; diff --git a/tools/syslogd.c b/tools/syslogd.c index ed826fc1..178ad455 100644 --- a/tools/syslogd.c +++ b/tools/syslogd.c @@ -1469,6 +1469,7 @@ DEFFUNC_llExecFunc(flushRptdMsgsActions) assert(pAction != NULL); BEGINfunc +RUNLOG_VAR("%p", pAction); LockObj(pAction); /* TODO: time() performance: the call below could be moved to * the beginn of the llExec(). This makes it slightly less correct, but -- cgit v1.2.3 From b1f2e53921b7ab19c363a63de47a0e7a35866052 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 23 Jun 2009 15:17:55 +0200 Subject: prevented unneccessary apc calls --- runtime/apc.c | 7 +++++-- runtime/stream.c | 15 ++++++++++----- runtime/stream.h | 5 +++-- tools/syslogd.c | 3 +-- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/runtime/apc.c b/runtime/apc.c index b0b5f298..5919191d 100644 --- a/runtime/apc.c +++ b/runtime/apc.c @@ -257,8 +257,11 @@ execScheduled(void) END_MTX_PROTECTED_OPERATIONS_UNCOND(&listMutex); CHKiRet(iRet); - DBGPRINTF("running apc scheduler - we have %s to execute\n", - pExecList == NULL ? "nothing" : "something"); + if(pExecList != NULL) { + DBGPRINTF("running apc scheduler - we have %s to execute\n", + pExecList == NULL ? "nothing" : "something"); + } + for(pCurr = pExecList ; pCurr != NULL ; pCurr = pNext) { dbgprintf("executing apc list entry %p, apc %p\n", pCurr, pCurr->pApc); pNext = pCurr->pNext; diff --git a/runtime/stream.c b/runtime/stream.c index f9859617..00c726d9 100644 --- a/runtime/stream.c +++ b/runtime/stream.c @@ -74,6 +74,7 @@ flushApc(void *param1, void __attribute__((unused)) *param2) BEGIN_MTX_PROTECTED_OPERATIONS_UNCOND(&pThis->mut); strmFlush(pThis); + pThis->apcRequested = 0; END_MTX_PROTECTED_OPERATIONS_UNCOND(&pThis->mut); } @@ -1001,12 +1002,16 @@ scheduleFlushRequest(strm_t *pThis) apc_t *pApc; DEFiRet; - CHKiRet(apc.CancelApc(pThis->apcID)); + if(!pThis->apcRequested) { + /* we do an request only if none is yet pending */ + pThis->apcRequested = 1; + // TODO: find similar thing later CHKiRet(apc.CancelApc(pThis->apcID)); dbgprintf("XXX: requesting to add apc!\n"); - CHKiRet(apc.Construct(&pApc)); - CHKiRet(apc.SetProcedure(pApc, (void (*)(void*, void*))flushApc)); - CHKiRet(apc.SetParam1(pApc, pThis)); - CHKiRet(apc.ConstructFinalize(pApc, &pThis->apcID)); + CHKiRet(apc.Construct(&pApc)); + CHKiRet(apc.SetProcedure(pApc, (void (*)(void*, void*))flushApc)); + CHKiRet(apc.SetParam1(pApc, pThis)); + CHKiRet(apc.ConstructFinalize(pApc, &pThis->apcID)); + } finalize_it: RETiRet; diff --git a/runtime/stream.h b/runtime/stream.h index e3ad32b1..ac003c7b 100644 --- a/runtime/stream.h +++ b/runtime/stream.h @@ -104,8 +104,8 @@ typedef struct strm_s { int64 iCurrOffs;/* current offset */ int64 *pUsrWCntr; /* NULL or a user-provided counter that receives the nbr of bytes written since the last CntrSet() */ /* dynamic properties, valid only during file open, not to be persistet */ - int bDisabled; /* should file no longer be written to? (currently set only if omfile file size limit fails) */ - int bSync; /* sync this file after every write? */ + bool bDisabled; /* should file no longer be written to? (currently set only if omfile file size limit fails) */ + bool bSync; /* sync this file after every write? */ size_t sIOBufSize;/* size of IO buffer */ uchar *pszDir; /* Directory */ int lenDir; @@ -123,6 +123,7 @@ typedef struct strm_s { int iFlushInterval; /* flush in which interval - 0, no flushing */ apc_id_t apcID; /* id of current Apc request (used for cancelling) */ pthread_mutex_t mut;/* mutex for flush in async mode */ + int apcRequested; /* is an apc Requested? */ /* support for omfile size-limiting commands, special counters, NOT persisted! */ off_t iSizeLimit; /* file size limit, 0 = no limit */ uchar *pszSizeLimitCmd; /* command to carry out when size limit is reached */ diff --git a/tools/syslogd.c b/tools/syslogd.c index 178ad455..385fef1c 100644 --- a/tools/syslogd.c +++ b/tools/syslogd.c @@ -1469,7 +1469,6 @@ DEFFUNC_llExecFunc(flushRptdMsgsActions) assert(pAction != NULL); BEGINfunc -RUNLOG_VAR("%p", pAction); LockObj(pAction); /* TODO: time() performance: the call below could be moved to * the beginn of the llExec(). This makes it slightly less correct, but @@ -2586,7 +2585,7 @@ mainloop(void) * but a once-a-day wakeup should be quite acceptable. -- rgerhards, 2008-06-09 */ //tvSelectTimeout.tv_sec = (bReduceRepeatMsgs == 1) ? TIMERINTVL : 86400 /*1 day*/; -tvSelectTimeout.tv_sec = 5; // TESTING ONLY!!! TODO: change back!!! + tvSelectTimeout.tv_sec = TIMERINTVL; /* TODO: change this back to the above code when we have a better solution for apc */ tvSelectTimeout.tv_usec = 0; select(1, NULL, NULL, NULL, &tvSelectTimeout); if(bFinished) -- cgit v1.2.3 From 86e37f70fe0e9de0e00362990c73536843c8fef3 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 23 Jun 2009 16:14:19 +0200 Subject: more strict parsing of the hostname in rfc3164 mode ... hopefully removes false positives (but may cause some trouble with hostname parsing). For details, see this bug tracker: http://bugzilla.adiscon.com/show_bug.cgi?id=126 This patch is not optimal for v4 - another one will follow. The spirit of this commit is to enable easier backporting if someone is interested in doing so. --- ChangeLog | 8 +++- runtime/msg.h | 2 +- runtime/rsyslog.h | 12 ++++-- tools/syslogd.c | 114 ++++++++++++++++++++---------------------------------- 4 files changed, 57 insertions(+), 79 deletions(-) diff --git a/ChangeLog b/ChangeLog index 36a819a3..46235ff7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,5 @@ --------------------------------------------------------------------------- -Version 4.3.2 [DEVEL] (rgerhards), 2009-??-?? +Version 4.5.0 [DEVEL] (rgerhards), 2009-??-?? - greatly reduced memory requirements of msg object to around half of the previous demand. This means that more messages can be stored in core! Due to fewer cache misses, this also means some @@ -22,6 +22,10 @@ Version 4.3.2 [DEVEL] (rgerhards), 2009-??-?? - added capability to fsync() queue disk files for enhanced reliability (also add's speed, because you do no longer need to run the whole file system in sync mode) +- more strict parsing of the hostname in rfc3164 mode, hopefully + removes false positives (but may cause some trouble with hostname + parsing). For details, see this bug tracker: + http://bugzilla.adiscon.com/show_bug.cgi?id=126 - added configuration commands (see doc for explanations) * $OMFileZipLevel * $OMFileIOBufferSize @@ -29,6 +33,8 @@ Version 4.3.2 [DEVEL] (rgerhards), 2009-??-?? * $MainMsgQueueSyncQueueFiles * $ActionQueueSyncQueueFiles --------------------------------------------------------------------------- +Version 4.3.2 [beta] (rgerhards), 2009-??-?? +--------------------------------------------------------------------------- Version 4.3.1 [DEVEL] (rgerhards), 2009-05-25 - added capability to run multiple tcp listeners (on different ports) - performance enhancement: imtcp calls parser no longer on input thread diff --git a/runtime/msg.h b/runtime/msg.h index c38cb788..d70b939a 100644 --- a/runtime/msg.h +++ b/runtime/msg.h @@ -152,7 +152,7 @@ rsRetVal MsgSetFlowControlType(msg_t *pMsg, flowControl_t eFlowCtl); rsRetVal MsgSetStructuredData(msg_t *pMsg, char* pszStrucData); void MsgSetRcvFrom(msg_t *pMsg, uchar* pszRcvFrom); rsRetVal MsgSetRcvFromIP(msg_t *pMsg, uchar* pszRcvFromIP); -void MsgAssignHOSTNAME(msg_t *pMsg, char *pBuf); +//void MsgAssignHOSTNAME(msg_t *pMsg, char *pBuf); void MsgSetHOSTNAME(msg_t *pMsg, uchar* pszHOSTNAME); rsRetVal MsgSetAfterPRIOffs(msg_t *pMsg, short offs); void MsgSetMSGoffs(msg_t *pMsg, short offs); diff --git a/runtime/rsyslog.h b/runtime/rsyslog.h index 793df782..4fa5100e 100644 --- a/runtime/rsyslog.h +++ b/runtime/rsyslog.h @@ -29,10 +29,14 @@ /* ############################################################# * * # Config Settings # * * ############################################################# */ -#define RS_STRINGBUF_ALLOC_INCREMENT 128 -#define CONF_TAG_MAXSIZE 512 /* a value that is deemed far too large for any valid TAG */ -#define CONF_RAWMSG_BUFSIZE 101 -#define CONF_TAG_BUFSIZE 33 /* RFC says 32 chars (+ \0), but in practice we see longer ones... */ +#define RS_STRINGBUF_ALLOC_INCREMENT 128 +/* MAXSIZE are absolute maxima, while BUFSIZE are just values after which + * processing is more time-intense. + */ +#define CONF_TAG_MAXSIZE 512 /* a value that is deemed far too large for any valid TAG */ +#define CONF_TAG_HOSTNAME 512 /* a value that is deemed far too large for any valid HOSTNAME */ +#define CONF_RAWMSG_BUFSIZE 101 +#define CONF_TAG_BUFSIZE 33 /* RFC says 32 chars (+ \0), but in practice we see longer ones... */ /* ############################################################# * diff --git a/tools/syslogd.c b/tools/syslogd.c index 385fef1c..2f28cde0 100644 --- a/tools/syslogd.c +++ b/tools/syslogd.c @@ -1176,6 +1176,7 @@ int parseLegacySyslogMsg(msg_t *pMsg, int flags) int bTAGCharDetected; int i; /* general index for parsing */ uchar bufParseTAG[CONF_TAG_MAXSIZE]; + uchar bufParseHOSTNAME[CONF_TAG_HOSTNAME]; BEGINfunc assert(pMsg != NULL); @@ -1228,50 +1229,27 @@ int parseLegacySyslogMsg(msg_t *pMsg, int flags) * If I find them, I set a simple flag but continue. After parsing, I check the flag. * If it was set, then we most probably do not have a hostname but a TAG. Thus, I change * the fields. I think this logic shall work with any type of syslog message. + * rgerhards, 2009-06-23: and I now have extended this logic to every character + * that is not a valid hostname. */ bTAGCharDetected = 0; if(flags & PARSE_HOSTNAME) { - /* TODO: quick and dirty memory allocation */ - /* the memory allocated is far too much in most cases. But on the plus side, - * it is quite fast... - rgerhards, 2007-09-20 - */ - if((pBuf = malloc(sizeof(char)* (ustrlen(p2parse) +1))) == NULL) - return 1; - pWork = pBuf; - /* this is the actual parsing loop */ - while(*p2parse && *p2parse != ' ' && *p2parse != ':') { - if(*p2parse == '[' || *p2parse == ']' || *p2parse == '/') - bTAGCharDetected = 1; - *pWork++ = *p2parse++; + i = 0; + while((isalnum(p2parse[i]) || p2parse[i] == '.' || p2parse[i] == '.' + || p2parse[i] == '_') && i < CONF_TAG_MAXSIZE) { + bufParseHOSTNAME[i] = p2parse[i]; + ++i; + } + + if(i > 0 && p2parse[i] == ' ' && isalnum(p2parse[i-1])) { + /* we got a hostname! */ + p2parse += i + 1; /* "eat" it (including SP delimiter) */ + bufParseHOSTNAME[i] = '\0'; + MsgSetHOSTNAME(pMsg, bufParseHOSTNAME); + //MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i); + } else { + MsgSetHOSTNAME(pMsg, getRcvFrom(pMsg)); } - /* we need to handle ':' seperately, because it terminates the - * TAG - so we also need to terminate the parser here! - * rgerhards, 2007-09-10 *p2parse points to a valid address here in - * any case. We can reach this point only if we are at end of string, - * or we have a ':' or ' '. What the if below does is check if we are - * not at end of string and, if so, advance the parse pointer. If we - * are already at end of string, *p2parse is equal to '\0', neither if - * will be true and the parse pointer remain as is. This is perfectly - * well. - */ - if(*p2parse == ':') { - bTAGCharDetected = 1; - /* We will move hostname to tag, so preserve ':' (otherwise we - * will needlessly change the message format) */ - *pWork++ = *p2parse++; - } else if(*p2parse == ' ') - ++p2parse; - *pWork = '\0'; - MsgAssignHOSTNAME(pMsg, pBuf); - } - /* check if we seem to have a TAG */ - if(bTAGCharDetected) { - /* indeed, this smells like a TAG, so lets use it for this. We take - * the HOSTNAME from the sender system instead. - */ - DBGPRINTF("HOSTNAME contains invalid characters, assuming it to be a TAG.\n"); - moveHOSTNAMEtoTAG(pMsg); - MsgSetHOSTNAME(pMsg, getRcvFrom(pMsg)); } /* now parse TAG - that should be present in message from all sources. @@ -1287,40 +1265,30 @@ int parseLegacySyslogMsg(msg_t *pMsg, int flags) * in RFC3164...). We now receive the full size, but will modify the * outputs so that only 32 characters max are used by default. */ - /* The following code in general is quick & dirty - I need to get - * it going for a test, rgerhards 2004-11-16 */ - /* lol.. we tried to solve it, just to remind ourselfs that 32 octets - * is the max size ;) we need to shuffle the code again... Just for - * the records: the code is currently clean, but we could optimize it! */ - if(!bTAGCharDetected) { - i = 0; - while(*p2parse && *p2parse != ':' && *p2parse != ' ' && i < CONF_TAG_MAXSIZE) { - bufParseTAG[i++] = *p2parse++; - } - if(*p2parse == ':') { - ++p2parse; - bufParseTAG[i++] = ':'; - } + i = 0; + while(*p2parse && *p2parse != ':' && *p2parse != ' ' && i < CONF_TAG_MAXSIZE) { + bufParseTAG[i++] = *p2parse++; + } + if(*p2parse == ':') { + ++p2parse; + bufParseTAG[i++] = ':'; + } - if(i == 0) - { /* rger, 2005-11-10: no TAG found - this implies that what - * we have considered to be the HOSTNAME is most probably the - * TAG. We consider it so probable, that we now adjust it - * that way. So we pick up the previously set hostname, assign - * it to tag and use the sender system (from IP stack) as - * the hostname. This situation is the standard case with - * stock BSD syslogd. - */ - DBGPRINTF("No TAG in message, assuming that HOSTNAME is missing.\n"); - moveHOSTNAMEtoTAG(pMsg); - MsgSetHOSTNAME(pMsg, getRcvFrom(pMsg)); - } else { /* we have a TAG, so we can happily set it ;) */ - bufParseTAG[i] = '\0'; /* terminate string */ - MsgSetTAG(pMsg, bufParseTAG, i); - } - } else { - /* we have no TAG, so we ... */ - /*DO NOTHING*/; + if(i == 0) + { /* rger, 2005-11-10: no TAG found - this implies that what + * we have considered to be the HOSTNAME is most probably the + * TAG. We consider it so probable, that we now adjust it + * that way. So we pick up the previously set hostname, assign + * it to tag and use the sender system (from IP stack) as + * the hostname. This situation is the standard case with + * stock BSD syslogd. + */ + DBGPRINTF("No TAG in message, assuming that HOSTNAME is missing.\n"); + moveHOSTNAMEtoTAG(pMsg); + MsgSetHOSTNAME(pMsg, getRcvFrom(pMsg)); + } else { /* we have a TAG, so we can happily set it ;) */ + bufParseTAG[i] = '\0'; /* terminate string */ + MsgSetTAG(pMsg, bufParseTAG, i); } } else { /* we enter this code area when the user has instructed rsyslog NOT -- cgit v1.2.3 From 662ad3e4bf8dbd317d18aa1afcbf3e8b9e424506 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 23 Jun 2009 16:32:29 +0200 Subject: optimized hostname processing --- plugins/imfile/imfile.c | 2 +- plugins/imklog/imklog.c | 2 +- runtime/msg.c | 10 +++++----- runtime/msg.h | 5 ++--- runtime/parser.c | 3 --- tools/syslogd.c | 47 +++++++++++------------------------------------ 6 files changed, 20 insertions(+), 49 deletions(-) diff --git a/plugins/imfile/imfile.c b/plugins/imfile/imfile.c index 0f5f49dc..631d02ff 100644 --- a/plugins/imfile/imfile.c +++ b/plugins/imfile/imfile.c @@ -100,7 +100,7 @@ static rsRetVal enqLine(fileInfo_t *pInfo, cstr_t *cstrLine) MsgSetInputName(pMsg, UCHAR_CONSTANT("imfile"), sizeof("imfile")-1); MsgSetRawMsg(pMsg, (char*)rsCStrGetSzStr(cstrLine), cstrLen(cstrLine)); MsgSetMSGoffs(pMsg, 0); /* we do not have a header... */ - MsgSetHOSTNAME(pMsg, glbl.GetLocalHostName()); + MsgSetHOSTNAME(pMsg, glbl.GetLocalHostName(), ustrlen(glbl.GetLocalHostName())); MsgSetTAG(pMsg, pInfo->pszTag, pInfo->lenTag); pMsg->iFacility = LOG_FAC(pInfo->iFacility); pMsg->iSeverity = LOG_PRI(pInfo->iSeverity); diff --git a/plugins/imklog/imklog.c b/plugins/imklog/imklog.c index 67fdc8a3..f63d60ac 100644 --- a/plugins/imklog/imklog.c +++ b/plugins/imklog/imklog.c @@ -100,7 +100,7 @@ enqMsg(uchar *msg, uchar* pszTag, int iFacility, int iSeverity) MsgSetMSGoffs(pMsg, 0); /* we do not have a header... */ MsgSetRcvFrom(pMsg, glbl.GetLocalHostName()); MsgSetRcvFromIP(pMsg, (uchar*)"127.0.0.1"); - MsgSetHOSTNAME(pMsg, glbl.GetLocalHostName()); + MsgSetHOSTNAME(pMsg, glbl.GetLocalHostName(), ustrlen(glbl.GetLocalHostName())); MsgSetTAG(pMsg, pszTag, ustrlen(pszTag)); pMsg->iFacility = LOG_FAC(iFacility); pMsg->iSeverity = LOG_PRI(iSeverity); diff --git a/runtime/msg.c b/runtime/msg.c index 8c306aca..6cf41322 100644 --- a/runtime/msg.c +++ b/runtime/msg.c @@ -1439,7 +1439,7 @@ int getHOSTNAMELen(msg_t *pM) return 0; else if(pM->pszHOSTNAME == NULL) - return 0; + return pM->iLenRcvFrom; else return pM->iLenHOSTNAME; } @@ -1451,7 +1451,7 @@ char *getHOSTNAME(msg_t *pM) return ""; else if(pM->pszHOSTNAME == NULL) - return ""; + return (char*) pM->pszRcvFrom; else return (char*) pM->pszHOSTNAME; } @@ -1720,12 +1720,12 @@ void MsgAssignHOSTNAME(msg_t *pMsg, char *pBuf) * we need it. The rest of the code already knows how to handle an * unset HOSTNAME. */ -void MsgSetHOSTNAME(msg_t *pMsg, uchar* pszHOSTNAME) +void MsgSetHOSTNAME(msg_t *pMsg, uchar* pszHOSTNAME, int lenHOSTNAME) { assert(pMsg != NULL); free(pMsg->pszHOSTNAME); - pMsg->iLenHOSTNAME = ustrlen(pszHOSTNAME); + pMsg->iLenHOSTNAME = lenHOSTNAME; if((pMsg->pszHOSTNAME = malloc(pMsg->iLenHOSTNAME + 1)) != NULL) memcpy(pMsg->pszHOSTNAME, pszHOSTNAME, pMsg->iLenHOSTNAME + 1); else @@ -2719,7 +2719,7 @@ rsRetVal MsgSetProperty(msg_t *pThis, var_t *pProp) } else if(isProp("pszRcvFrom")) { MsgSetRcvFrom(pThis, rsCStrGetSzStrNoNULL(pProp->val.pStr)); } else if(isProp("pszHOSTNAME")) { - MsgSetHOSTNAME(pThis, rsCStrGetSzStrNoNULL(pProp->val.pStr)); + MsgSetHOSTNAME(pThis, rsCStrGetSzStrNoNULL(pProp->val.pStr), rsCStrLen(pProp->val.pStr)); } else if(isProp("pCSStrucData")) { MsgSetStructuredData(pThis, (char*) rsCStrGetSzStrNoNULL(pProp->val.pStr)); } else if(isProp("pCSAPPNAME")) { diff --git a/runtime/msg.h b/runtime/msg.h index d70b939a..fd4d650b 100644 --- a/runtime/msg.h +++ b/runtime/msg.h @@ -3,7 +3,7 @@ * * File begun on 2007-07-13 by RGerhards (extracted from syslogd.c) * - * Copyright 2007 Rainer Gerhards and Adiscon GmbH. + * Copyright 2007-2009 Rainer Gerhards and Adiscon GmbH. * * This file is part of the rsyslog runtime library. * @@ -152,8 +152,7 @@ rsRetVal MsgSetFlowControlType(msg_t *pMsg, flowControl_t eFlowCtl); rsRetVal MsgSetStructuredData(msg_t *pMsg, char* pszStrucData); void MsgSetRcvFrom(msg_t *pMsg, uchar* pszRcvFrom); rsRetVal MsgSetRcvFromIP(msg_t *pMsg, uchar* pszRcvFromIP); -//void MsgAssignHOSTNAME(msg_t *pMsg, char *pBuf); -void MsgSetHOSTNAME(msg_t *pMsg, uchar* pszHOSTNAME); +void MsgSetHOSTNAME(msg_t *pMsg, uchar* pszHOSTNAME, int lenHOSTNAME); rsRetVal MsgSetAfterPRIOffs(msg_t *pMsg, short offs); void MsgSetMSGoffs(msg_t *pMsg, short offs); void MsgSetRawMsgWOSize(msg_t *pMsg, char* pszRawMsg); diff --git a/runtime/parser.c b/runtime/parser.c index 75cde343..0a27c982 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -284,9 +284,6 @@ rsRetVal parseMsg(msg_t *pMsg) pMsg->iSeverity = LOG_PRI(pri); MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg); - if(pMsg->bParseHOSTNAME == 0) - MsgSetHOSTNAME(pMsg, pMsg->pszRcvFrom); - /* rger 2005-11-24 (happy thanksgiving!): we now need to check if we have * a traditional syslog message or one formatted according to syslog-protocol. * We need to apply different parsers depending on that. We use the diff --git a/tools/syslogd.c b/tools/syslogd.c index 2f28cde0..faa793fb 100644 --- a/tools/syslogd.c +++ b/tools/syslogd.c @@ -609,7 +609,7 @@ static inline rsRetVal printline(uchar *hname, uchar *hnameIP, uchar *msg, int f * being the local host). rgerhards 2004-11-16 */ if((pMsg->msgFlags & PARSE_HOSTNAME) == 0) - MsgSetHOSTNAME(pMsg, hname); + MsgSetHOSTNAME(pMsg, hname, strlen(hname)); MsgSetRcvFrom(pMsg, hname); MsgSetAfterPRIOffs(pMsg, p - msg); CHKiRet(MsgSetRcvFromIP(pMsg, hnameIP)); @@ -883,7 +883,7 @@ logmsgInternal(int iErr, int pri, uchar *msg, int flags) CHKiRet(msgConstruct(&pMsg)); MsgSetInputName(pMsg, UCHAR_CONSTANT("rsyslogd"), sizeof("rsyslogd")-1); MsgSetRawMsgWOSize(pMsg, (char*)msg); - MsgSetHOSTNAME(pMsg, glbl.GetLocalHostName()); + MsgSetHOSTNAME(pMsg, glbl.GetLocalHostName(), ustrlen(glbl.GetLocalHostName())); MsgSetRcvFrom(pMsg, glbl.GetLocalHostName()); MsgSetRcvFromIP(pMsg, UCHAR_CONSTANT("127.0.0.1")); /* check if we have an error code associated and, if so, @@ -1113,12 +1113,7 @@ int parseRFCSyslogMsg(msg_t *pMsg, int flags) /* HOSTNAME */ if(bContParse) { parseRFCField(&p2parse, pBuf); - MsgSetHOSTNAME(pMsg, pBuf); - } else { - /* we can not parse, so we get the system we - * received the data from. - */ - MsgSetHOSTNAME(pMsg, getRcvFrom(pMsg)); + MsgSetHOSTNAME(pMsg, pBuf, strlen(pBuf)); } /* APP-NAME */ @@ -1147,7 +1142,6 @@ int parseRFCSyslogMsg(msg_t *pMsg, int flags) /* MSG */ MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); - //MsgSetMSG(pMsg, (char*)p2parse); free(pBuf); ENDfunc @@ -1171,8 +1165,6 @@ int parseRFCSyslogMsg(msg_t *pMsg, int flags) int parseLegacySyslogMsg(msg_t *pMsg, int flags) { uchar *p2parse; - char *pBuf; - char *pWork; int bTAGCharDetected; int i; /* general index for parsing */ uchar bufParseTAG[CONF_TAG_MAXSIZE]; @@ -1198,8 +1190,7 @@ int parseLegacySyslogMsg(msg_t *pMsg, int flags) if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse) == RS_RET_OK) { /* indeed, we got it! */ /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; - } else { - /* parse pointer needs to be restored, as we moved it off-by-one + } else {/* parse pointer needs to be restored, as we moved it off-by-one * for this try. */ --p2parse; @@ -1245,10 +1236,7 @@ int parseLegacySyslogMsg(msg_t *pMsg, int flags) /* we got a hostname! */ p2parse += i + 1; /* "eat" it (including SP delimiter) */ bufParseHOSTNAME[i] = '\0'; - MsgSetHOSTNAME(pMsg, bufParseHOSTNAME); - //MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i); - } else { - MsgSetHOSTNAME(pMsg, getRcvFrom(pMsg)); + MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i); } } @@ -1274,30 +1262,17 @@ int parseLegacySyslogMsg(msg_t *pMsg, int flags) bufParseTAG[i++] = ':'; } - if(i == 0) - { /* rger, 2005-11-10: no TAG found - this implies that what - * we have considered to be the HOSTNAME is most probably the - * TAG. We consider it so probable, that we now adjust it - * that way. So we pick up the previously set hostname, assign - * it to tag and use the sender system (from IP stack) as - * the hostname. This situation is the standard case with - * stock BSD syslogd. - */ - DBGPRINTF("No TAG in message, assuming that HOSTNAME is missing.\n"); - moveHOSTNAMEtoTAG(pMsg); - MsgSetHOSTNAME(pMsg, getRcvFrom(pMsg)); - } else { /* we have a TAG, so we can happily set it ;) */ - bufParseTAG[i] = '\0'; /* terminate string */ - MsgSetTAG(pMsg, bufParseTAG, i); - } + /* no TAG can only be detected if the message immediatly ends, in which case an empty TAG + * is considered OK. So we do not need to check for empty TAG. -- rgerhards, 2009-06-23 + */ + bufParseTAG[i] = '\0'; /* terminate string */ + MsgSetTAG(pMsg, bufParseTAG, i); } else { /* we enter this code area when the user has instructed rsyslog NOT * to parse HOSTNAME and TAG - rgerhards, 2006-03-13 */ - if(!(flags & INTERNAL_MSG)) - { + if(!(flags & INTERNAL_MSG)) { DBGPRINTF("HOSTNAME and TAG not parsed by user configuraton.\n"); - MsgSetHOSTNAME(pMsg, getRcvFrom(pMsg)); } } -- cgit v1.2.3 From b2fa740b9ab5fb9e85309b3307f3fca21f625ab1 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 23 Jun 2009 17:14:42 +0200 Subject: optimized TAG handling --- runtime/msg.c | 87 ++++++++++++++++++++++-------------------------------- runtime/msg.h | 2 +- runtime/rsyslog.h | 6 ++-- tests/diskqueue.sh | 2 +- tools/syslogd.c | 7 ++--- 5 files changed, 44 insertions(+), 60 deletions(-) diff --git a/runtime/msg.c b/runtime/msg.c index 6cf41322..1a882597 100644 --- a/runtime/msg.c +++ b/runtime/msg.c @@ -551,6 +551,11 @@ static inline void freeTAG(msg_t *pThis) if(pThis->iLenTAG >= CONF_TAG_BUFSIZE) free(pThis->TAG.pszTAG); } +static inline void freeHOSTNAME(msg_t *pThis) +{ + if(pThis->iLenHOSTNAME >= CONF_HOSTNAME_BUFSIZE) + free(pThis->pszHOSTNAME); +} BEGINobjDestruct(msg) /* be sure to specify the object type also in END and CODESTART macros! */ @@ -569,7 +574,7 @@ CODESTARTobjDestruct(msg) if(pThis->pszRawMsg != pThis->szRawMsg) free(pThis->pszRawMsg); freeTAG(pThis); - free(pThis->pszHOSTNAME); + freeHOSTNAME(pThis); free(pThis->pszInputName); free(pThis->pszRcvFrom); free(pThis->pszRcvFromIP); @@ -669,7 +674,7 @@ msg_t* MsgDup(msg_t* pOld) pNew->iProtocolVersion = pOld->iProtocolVersion; pNew->ttGenTime = pOld->ttGenTime; pNew->offMSG = pOld->offMSG; - /* enable this, if someone actually uses UxTradMsg, delete after some time has + /* enable this, if someone actually uses UxTradMsg, delete after some time has * passed and nobody complained -- rgerhards, 2009-06-16 pNew->offAfterPRI = pOld->offAfterPRI; */ @@ -684,8 +689,18 @@ msg_t* MsgDup(msg_t* pOld) pNew->iLenTAG = pOld->iLenTAG; } } - tmpCOPYSZ(RawMsg); - tmpCOPYSZ(HOSTNAME); + if(pOld->iLenRawMsg < CONF_RAWMSG_BUFSIZE) { + memcpy(pNew->szRawMsg, pOld->szRawMsg, pOld->iLenRawMsg + 1); + pNew->pszRawMsg = pNew->szRawMsg; + } else { + tmpCOPYSZ(RawMsg); + } + if(pOld->iLenHOSTNAME < CONF_HOSTNAME_BUFSIZE) { + memcpy(pNew->szHOSTNAME, pOld->szHOSTNAME, pOld->iLenHOSTNAME + 1); + pNew->pszHOSTNAME = pNew->szHOSTNAME; + } else { + tmpCOPYSZ(HOSTNAME); + } tmpCOPYSZ(RcvFrom); tmpCOPYCSTR(ProgName); @@ -886,30 +901,6 @@ finalize_it: } -/* This function moves the HOSTNAME inside the message object to the - * TAG. It is a specialised function used to handle the condition when - * a message without HOSTNAME is being processed. The missing HOSTNAME - * is only detected at a later stage, during TAG processing, so that - * we already had set the HOSTNAME property and now need to move it to - * the TAG. Of course, we could do this via a couple of get/set methods, - * but it is far more efficient to do it via this specialised method. - * This is especially important as this can be a very common case, e.g. - * when BSD syslog is acting as a sender. - * rgerhards, 2005-11-10. - * NOTE ********* 2009-06-18 / rgerhards ************* - * This function is being obsoleted by the new handling. I keep it for - * a while, and for oversize tags it is somewhat less optimal than in previous - * versions. This should only happen very seldom. - */ -void moveHOSTNAMEtoTAG(msg_t *pM) -{ - assert(pM != NULL); - MsgSetTAG(pM, pM->pszHOSTNAME, pM->iLenHOSTNAME); - free(pM->pszHOSTNAME); - pM->pszHOSTNAME = NULL; - pM->iLenHOSTNAME = 0; -} - /* Access methods - dumb & easy, not a comment for each ;) */ void setProtocolVersion(msg_t *pM, int iNewVersion) @@ -1694,22 +1685,6 @@ finalize_it: } -/* Set the HOSTNAME to a caller-provided string. This is thought - * to be a heap buffer that the caller will no longer use. This - * function is a performance optimization over MsgSetHOSTNAME(). - * rgerhards 2004-11-19 - */ -void MsgAssignHOSTNAME(msg_t *pMsg, char *pBuf) -{ - assert(pMsg != NULL); - assert(pBuf != NULL); - if(pMsg->pszHOSTNAME != NULL) - free(pMsg->pszHOSTNAME); - pMsg->iLenHOSTNAME = strlen(pBuf); - pMsg->pszHOSTNAME = (uchar*) pBuf; -} - - /* rgerhards 2004-11-09: set HOSTNAME in msg object * rgerhards, 2007-06-21: * Does not return anything. If an error occurs, the hostname is @@ -1720,16 +1695,24 @@ void MsgAssignHOSTNAME(msg_t *pMsg, char *pBuf) * we need it. The rest of the code already knows how to handle an * unset HOSTNAME. */ -void MsgSetHOSTNAME(msg_t *pMsg, uchar* pszHOSTNAME, int lenHOSTNAME) +void MsgSetHOSTNAME(msg_t *pThis, uchar* pszHOSTNAME, int lenHOSTNAME) { - assert(pMsg != NULL); - free(pMsg->pszHOSTNAME); + assert(pThis != NULL); - pMsg->iLenHOSTNAME = lenHOSTNAME; - if((pMsg->pszHOSTNAME = malloc(pMsg->iLenHOSTNAME + 1)) != NULL) - memcpy(pMsg->pszHOSTNAME, pszHOSTNAME, pMsg->iLenHOSTNAME + 1); - else - DBGPRINTF("Could not allocate memory in MsgSetHOSTNAME()\n"); + freeHOSTNAME(pThis); + + pThis->iLenHOSTNAME = lenHOSTNAME; + if(pThis->iLenHOSTNAME < CONF_HOSTNAME_BUFSIZE) { + /* small enough: use fixed buffer (faster!) */ + pThis->pszHOSTNAME = pThis->szHOSTNAME; + } else if((pThis->pszHOSTNAME = (uchar*) malloc(pThis->iLenHOSTNAME + 1)) == NULL) { + /* truncate message, better than completely loosing it... */ + pThis->pszHOSTNAME = pThis->szHOSTNAME; + pThis->iLenHOSTNAME = CONF_HOSTNAME_BUFSIZE - 1; + } + + memcpy(pThis->pszHOSTNAME, pszHOSTNAME, pThis->iLenHOSTNAME); + pThis->pszHOSTNAME[pThis->iLenHOSTNAME] = '\0'; /* this also works with truncation! */ } diff --git a/runtime/msg.h b/runtime/msg.h index fd4d650b..4bfc1e3f 100644 --- a/runtime/msg.h +++ b/runtime/msg.h @@ -111,6 +111,7 @@ struct msg { struct syslogTime tTIMESTAMP;/* (parsed) value of the timestamp */ /* some fixed-size buffers to save malloc()/free() for frequently used fields (from the default templates) */ uchar szRawMsg[CONF_RAWMSG_BUFSIZE]; /* most messages are small, and these are stored here (without malloc/free!) */ + uchar szHOSTNAME[CONF_HOSTNAME_BUFSIZE]; union { uchar *pszTAG; /* pointer to tag value */ uchar szBuf[CONF_TAG_BUFSIZE]; @@ -158,7 +159,6 @@ void MsgSetMSGoffs(msg_t *pMsg, short offs); void MsgSetRawMsgWOSize(msg_t *pMsg, char* pszRawMsg); void MsgSetRawMsg(msg_t *pMsg, char* pszRawMsg, size_t lenMsg); rsRetVal MsgReplaceMSG(msg_t *pThis, uchar* pszMSG, int lenMSG); -void moveHOSTNAMEtoTAG(msg_t *pM); char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, cstr_t *pCSPropName, size_t *pPropLen, unsigned short *pbMustBeFreed); char *textpri(char *pRes, size_t pResLen, int pri); diff --git a/runtime/rsyslog.h b/runtime/rsyslog.h index 4fa5100e..913ab300 100644 --- a/runtime/rsyslog.h +++ b/runtime/rsyslog.h @@ -31,12 +31,14 @@ * ############################################################# */ #define RS_STRINGBUF_ALLOC_INCREMENT 128 /* MAXSIZE are absolute maxima, while BUFSIZE are just values after which - * processing is more time-intense. + * processing is more time-intense. The BUFSIZE params currently add their + * value to the fixed size of the message object. */ #define CONF_TAG_MAXSIZE 512 /* a value that is deemed far too large for any valid TAG */ #define CONF_TAG_HOSTNAME 512 /* a value that is deemed far too large for any valid HOSTNAME */ #define CONF_RAWMSG_BUFSIZE 101 -#define CONF_TAG_BUFSIZE 33 /* RFC says 32 chars (+ \0), but in practice we see longer ones... */ +#define CONF_TAG_BUFSIZE 32 +#define CONF_HOSTNAME_BUFSIZE 32 /* ############################################################# * diff --git a/tests/diskqueue.sh b/tests/diskqueue.sh index 668a1224..d2e75cbd 100755 --- a/tests/diskqueue.sh +++ b/tests/diskqueue.sh @@ -5,7 +5,7 @@ # added 2009-04-17 by Rgerhards # This file is part of the rsyslog project, released under GPLv3 # uncomment for debugging support: -echo testing queue disk-only mode +echo diskqueue.sh: testing queue disk-only mode source $srcdir/diag.sh init source $srcdir/diag.sh startup diskqueue.conf # 20000 messages should be enough - the disk test is slow enough ;) diff --git a/tools/syslogd.c b/tools/syslogd.c index faa793fb..6f264e5e 100644 --- a/tools/syslogd.c +++ b/tools/syslogd.c @@ -609,7 +609,7 @@ static inline rsRetVal printline(uchar *hname, uchar *hnameIP, uchar *msg, int f * being the local host). rgerhards 2004-11-16 */ if((pMsg->msgFlags & PARSE_HOSTNAME) == 0) - MsgSetHOSTNAME(pMsg, hname, strlen(hname)); + MsgSetHOSTNAME(pMsg, hname, ustrlen(hname)); MsgSetRcvFrom(pMsg, hname); MsgSetAfterPRIOffs(pMsg, p - msg); CHKiRet(MsgSetRcvFromIP(pMsg, hnameIP)); @@ -1113,7 +1113,7 @@ int parseRFCSyslogMsg(msg_t *pMsg, int flags) /* HOSTNAME */ if(bContParse) { parseRFCField(&p2parse, pBuf); - MsgSetHOSTNAME(pMsg, pBuf, strlen(pBuf)); + MsgSetHOSTNAME(pMsg, pBuf, ustrlen(pBuf)); } /* APP-NAME */ @@ -1267,8 +1267,7 @@ int parseLegacySyslogMsg(msg_t *pMsg, int flags) */ bufParseTAG[i] = '\0'; /* terminate string */ MsgSetTAG(pMsg, bufParseTAG, i); - } else { - /* we enter this code area when the user has instructed rsyslog NOT + } else {/* we enter this code area when the user has instructed rsyslog NOT * to parse HOSTNAME and TAG - rgerhards, 2006-03-13 */ if(!(flags & INTERNAL_MSG)) { -- cgit v1.2.3