summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--action.c30
2 files changed, 28 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index 854a14d3..c4546db6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -11,6 +11,14 @@ Version 7.2.7 [v7-stable] 2013-03-??
The rsyslog debug log file is now continued to be written across the
fork.
- updated systemd files to match current systemd source
+- bugfix: failover/action suspend did not work correctly
+ This was experienced if the retry action took more than one second
+ to complete. For suspending, a cached timestamp was used, and if the
+ retry took longer, that timestamp was already in the past. As a
+ result, the action never was kept in suspended state, and as such
+ no failover happened. The suspend functionalit now does no longer use
+ the cached timestamp (should not have any performance implication, as
+ action suspend occurs very infrequently).
- bugfix: nested if/prifilt conditions did not work properly
closes: http://bugzilla.adiscon.com/show_bug.cgi?id=415
- bugfix: script == comparison did not work properly on JSON objects
diff --git a/action.c b/action.c
index fd8727ee..8fc92e56 100644
--- a/action.c
+++ b/action.c
@@ -613,13 +613,17 @@ static void actionDisable(action_t *pThis)
* CPU time. TODO: maybe a config option for that?
* rgerhards, 2007-08-02
*/
-static inline void actionSuspend(action_t *pThis, time_t ttNow)
+static inline void actionSuspend(action_t *pThis)
{
- if(ttNow == NO_TIME_PROVIDED)
- datetime.GetTime(&ttNow);
+ time_t ttNow;
+
+ /* note: we can NOT use a cached timestamp, as time may have evolved
+ * since caching, and this would break logic (and it actually did so!)
+ */
+ datetime.GetTime(&ttNow);
pThis->ttResumeRtry = ttNow + pThis->iResumeInterval * (pThis->iNbrResRtry / 10 + 1);
actionSetState(pThis, ACT_STATE_SUSP);
- DBGPRINTF("earliest retry=%d\n", (int) pThis->ttResumeRtry);
+ DBGPRINTF("action suspended, earliest retry=%d\n", (int) pThis->ttResumeRtry);
}
@@ -639,7 +643,7 @@ static inline void actionSuspend(action_t *pThis, time_t ttNow)
* of its inability to recover. -- rgerhards, 2010-04-26.
*/
static inline rsRetVal
-actionDoRetry(action_t *pThis, time_t ttNow, int *pbShutdownImmediate)
+actionDoRetry(action_t *pThis, int *pbShutdownImmediate)
{
int iRetries;
int iSleepPeriod;
@@ -650,7 +654,9 @@ actionDoRetry(action_t *pThis, time_t ttNow, int *pbShutdownImmediate)
iRetries = 0;
while((*pbShutdownImmediate == 0) && pThis->eState == ACT_STATE_RTRY) {
+ DBGPRINTF("actionDoRetry: enter loop, iRetries=%d\n", iRetries);
iRet = pThis->pMod->tryResume(pThis->pModData);
+ DBGPRINTF("actionDoRetry: action->tryResume returned %d\n", iRet);
if((pThis->iResumeOKinRow > 9) && (pThis->iResumeOKinRow % 10 == 0)) {
bTreatOKasSusp = 1;
pThis->iResumeOKinRow = 0;
@@ -658,16 +664,18 @@ actionDoRetry(action_t *pThis, time_t ttNow, int *pbShutdownImmediate)
bTreatOKasSusp = 0;
}
if((iRet == RS_RET_OK) && (!bTreatOKasSusp)) {
+ DBGPRINTF("actionDoRetry: had success RDY again (iRet=%d)\n", iRet);
actionSetState(pThis, ACT_STATE_RDY);
} else if(iRet == RS_RET_SUSPENDED || bTreatOKasSusp) {
/* max retries reached? */
+ DBGPRINTF("actionDoRetry: check for max retries, iResumeRetryCount %d, iRetries %d\n",
+ pThis->iResumeRetryCount, iRetries);
if((pThis->iResumeRetryCount != -1 && iRetries >= pThis->iResumeRetryCount)) {
- actionSuspend(pThis, ttNow);
+ actionSuspend(pThis);
} else {
++pThis->iNbrResRtry;
++iRetries;
iSleepPeriod = pThis->iResumeInterval;
- ttNow += iSleepPeriod; /* not truly exact, but sufficiently... */
srSleep(iSleepPeriod, 0);
if(*pbShutdownImmediate) {
ABORT_FINALIZE(RS_RET_FORCE_TERM);
@@ -714,7 +722,7 @@ static rsRetVal actionTryResume(action_t *pThis, int *pbShutdownImmediate)
if(pThis->eState == ACT_STATE_RTRY) {
if(ttNow == NO_TIME_PROVIDED) /* use cached result if we have it */
datetime.GetTime(&ttNow);
- CHKiRet(actionDoRetry(pThis, ttNow, pbShutdownImmediate));
+ CHKiRet(actionDoRetry(pThis, pbShutdownImmediate));
}
if(Debug && (pThis->eState == ACT_STATE_RTRY ||pThis->eState == ACT_STATE_SUSP)) {
@@ -1125,6 +1133,7 @@ submitBatch(action_t *pAction, batch_t *pBatch, int nElem)
assert(pBatch != NULL);
+ DBGPRINTF("submitBatch: enter, nElem %d\n", nElem);
wasDoneTo = pBatch->iDoneUpTo;
bDone = 0;
do {
@@ -1146,7 +1155,8 @@ submitBatch(action_t *pAction, batch_t *pBatch, int nElem)
|| localRet == RS_RET_DEFER_COMMIT) {
bDone = 1;
} else if(localRet == RS_RET_SUSPENDED) {
- ; /* do nothing, this will retry the full batch */
+ DBGPRINTF("action ret RS_RET_SUSPENDED - retry full batch\n");
+ /* do nothing, this will retry the full batch */
} else if(localRet == RS_RET_ACTION_FAILED) {
/* in this case, everything not yet committed is BAD */
for(i = pBatch->iDoneUpTo ; i < wasDoneTo + nElem ; ++i) {
@@ -1974,7 +1984,7 @@ addAction(action_t **ppAction, modInfo_t *pMod, void *pModData,
pAction->eState = ACT_STATE_RDY; /* action is enabled */
if(bSuspended)
- actionSuspend(pAction, datetime.GetTime(NULL)); /* "good" time call, only during init and unavoidable */
+ actionSuspend(pAction);
CHKiRet(actionConstructFinalize(pAction, queueParams));