From d579eacd44d5513db9d89e3a45ae00ee9af753bc Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 24 Oct 2012 12:38:34 +0200 Subject: ommongodb rate-limits error messages to prevent spamming the syslog closes (for v7.2): http://bugzilla.adiscon.com/show_bug.cgi?id=366 --- ChangeLog | 4 ++++ plugins/ommongodb/ommongodb.c | 31 ++++++++++++++++++------------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4b549842..b6726326 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,8 @@ ---------------------------------------------------------------------------- +Version 7.2.1 [v7-stable] 2012-10-?? +- ommongodb rate-limits error messages to prevent spamming the syslog + closes (for v7.2): http://bugzilla.adiscon.com/show_bug.cgi?id=366 +---------------------------------------------------------------------------- Version 7.2.0 [v7-stable] 2012-10-22 This starts a new stable branch based on 7.1.12 plus the following changes: - bugfix: imuxsock did not properly honor $LocalHostIPIF diff --git a/plugins/ommongodb/ommongodb.c b/plugins/ommongodb/ommongodb.c index 2c65f275..0da6dc48 100644 --- a/plugins/ommongodb/ommongodb.c +++ b/plugins/ommongodb/ommongodb.c @@ -68,6 +68,7 @@ typedef struct _instanceData { uchar *pwd; uchar *dbNcoll; uchar *tplName; + int bErrMsgPermitted; /* only one errmsg permitted per connection */ } instanceData; @@ -139,19 +140,21 @@ static void reportMongoError(instanceData *pData) { char errStr[1024]; - errmsg.LogError(0, RS_RET_ERR, "ommongodb: error: %s", - rs_strerror_r(errno, errStr, sizeof(errStr))); -#if 0 gchar *err; - if(mongo_sync_cmd_get_last_error(pData->conn, (gchar*)pData->db, &err) == TRUE) { - errmsg.LogError(0, RS_RET_ERR, "ommongodb: error: %s", err); - } else { - errmsg.LogError(0, RS_RET_ERR, "ommongodb: we had an error, but can " - "not obtain specifics"); + int eno; + + if(pData->bErrMsgPermitted) { + eno = errno; + if(mongo_sync_cmd_get_last_error(pData->conn, (gchar*)pData->db, &err) == TRUE) { + errmsg.LogError(0, RS_RET_ERR, "ommongodb: error: %s", err); + } else { + DBGPRINTF("ommongodb: we had an error, but can not obtain specifics, " + "using plain old errno error message generator\n"); + errmsg.LogError(0, RS_RET_ERR, "ommongodb: error: %s", + rs_strerror_r(eno, errStr, sizeof(errStr))); + } + pData->bErrMsgPermitted = 0; } -#else - (void)pData; -#endif } @@ -433,9 +436,11 @@ CODESTARTdoAction /* FIXME: is this a correct return code? */ ABORT_FINALIZE(RS_RET_ERR); } - if(!mongo_sync_cmd_insert(pData->conn, (char*)pData->dbNcoll, doc, NULL)) { - reportMongoError(pData); + if(mongo_sync_cmd_insert(pData->conn, (char*)pData->dbNcoll, doc, NULL)) { + pData->bErrMsgPermitted = 1; + } else { dbgprintf("ommongodb: insert error\n"); + reportMongoError(pData); ABORT_FINALIZE(RS_RET_SUSPENDED); } -- cgit v1.2.3 From c72a43d280744b2b1fec5e981651fc9bc8035f94 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 24 Oct 2012 13:00:38 +0200 Subject: make rsyslog core suspened actions after 10 failures in a row This was former the case after 1,000 failures and could cause rsyslog to be spammed/ressou --- ChangeLog | 4 ++++ action.c | 2 +- doc/v7compatibility.html | 10 ++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index b6726326..dcb6993e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ ---------------------------------------------------------------------------- Version 7.2.1 [v7-stable] 2012-10-?? +- the rsyslog core now suspeneds actions after 10 failures in a row + This was former the case after 1,000 failures and could cause rsyslog + to be spammed/ressources misused. See the v6 compatibility doc for more + details. - ommongodb rate-limits error messages to prevent spamming the syslog closes (for v7.2): http://bugzilla.adiscon.com/show_bug.cgi?id=366 ---------------------------------------------------------------------------- diff --git a/action.c b/action.c index 9c06f61e..ca260f92 100644 --- a/action.c +++ b/action.c @@ -659,7 +659,7 @@ actionDoRetry(action_t *pThis, time_t ttNow, int *pbShutdownImmediate) iRetries = 0; while((*pbShutdownImmediate == 0) && pThis->eState == ACT_STATE_RTRY) { iRet = pThis->pMod->tryResume(pThis->pModData); - if((pThis->iResumeOKinRow > 999) && (pThis->iResumeOKinRow % 1000 == 0)) { + if((pThis->iResumeOKinRow > 9) && (pThis->iResumeOKinRow % 10 == 0)) { bTreatOKasSusp = 1; pThis->iResumeOKinRow = 0; } else { diff --git a/doc/v7compatibility.html b/doc/v7compatibility.html index 692a4fe1..8834cd54 100644 --- a/doc/v7compatibility.html +++ b/doc/v7compatibility.html @@ -42,6 +42,16 @@ They tell that the construct is deprecated and which statement is to be used as replacement. This does not affect operations: both modules are still fully operational and will not be removed in the v7 timeframe. +

Retries of output plugins that do not do proper replies

+

Some output plugins may not be able to detect if their target is capable of +accepting data again after an error (technically, they always return OK when +TryResume is called). Previously, the rsyslog core engine suspended such an action +after 1000 succesive failures. This lead to potentially a large amount of +errors and error messages. Starting with 7.2.1, this has been reduced to 10 +successive failures. This still gives the plugin a chance to recover. In extreme +cases, a plugin may now enter suspend mode where it previously did not do so. +In practice, we do NOT expect that. +

This documentation is part of the rsyslog project.
Copyright © 2011-2012 by Rainer Gerhards and -- cgit v1.2.3 From bf1eef67ddfe16682d2733713ec0b2a3301840b9 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 24 Oct 2012 14:44:45 +0200 Subject: cleanup --- grammar/rainerscript.c | 2 +- runtime/ruleset.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/grammar/rainerscript.c b/grammar/rainerscript.c index 9483e116..733ebef4 100644 --- a/grammar/rainerscript.c +++ b/grammar/rainerscript.c @@ -2458,7 +2458,7 @@ cnfstmtOptimizeAct(struct cnfstmt *stmt) action_t *pAct; pAct = stmt->d.act; - if(!strcmp((char*)modGetName(stmt->d.act->pMod), "builtin:omdiscard")) { + if(!strcmp((char*)modGetName(pAct->pMod), "builtin:omdiscard")) { DBGPRINTF("optimizer: replacing omdiscard by STOP\n"); actionDestruct(stmt->d.act); stmt->nodetype = S_STOP; diff --git a/runtime/ruleset.c b/runtime/ruleset.c index bdeb61b7..8d2bb924 100644 --- a/runtime/ruleset.c +++ b/runtime/ruleset.c @@ -458,14 +458,12 @@ static void execPROPFILT(struct cnfstmt *stmt, batch_t *pBatch, sbool *active) { sbool *thenAct; - msg_t *pMsg; sbool bRet; int i; thenAct = newActive(pBatch); for(i = 0 ; i < batchNumMsgs(pBatch) && !*(pBatch->pbShutdownImmediate) ; ++i) { if(pBatch->pElem[i].state == BATCH_STATE_DISC) continue; /* will be ignored in any case */ - pMsg = (msg_t*)(pBatch->pElem[i].pUsrp); if(active == NULL || active[i]) { bRet = evalPROPFILT(stmt, (msg_t*)(pBatch->pElem[i].pUsrp)); } else -- cgit v1.2.3