From 420189ba2f860fe29b75a2c8e426d7ca9b1ffaf7 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 27 Apr 2012 12:32:57 +0200 Subject: bugfix: active input in "light delay state" could block rsyslog termination at least for prolonged period of time... --- runtime/queue.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'runtime/queue.c') diff --git a/runtime/queue.c b/runtime/queue.c index 9f318523..5e000cd2 100644 --- a/runtime/queue.c +++ b/runtime/queue.c @@ -2247,6 +2247,10 @@ doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr) DEFiRet; struct timespec t; + if(glbl.GetGlobalInputTermState()) { + ABORT_FINALIZE(RS_RET_FORCE_TERM); + } + STATSCOUNTER_INC(pThis->ctrEnqueued, pThis->mutCtrEnqueued); /* first check if we need to discard this message (which will cause CHKiRet() to exit) */ -- cgit v1.2.3 From a4d9f6e70f4bbe24708067653516655474f46493 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 3 May 2012 09:25:06 +0200 Subject: bugfix: inside queue.c, some thread cancel states were not correctly reset. While this is a bug, we assume it did have no practical effect because the reset as it was done was set to the state the code actually had at this point. But better fix this... --- runtime/queue.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'runtime/queue.c') diff --git a/runtime/queue.c b/runtime/queue.c index 5e000cd2..9961ed4d 100644 --- a/runtime/queue.c +++ b/runtime/queue.c @@ -1715,7 +1715,7 @@ ConsumerReg(qqueue_t *pThis, wti_t *pWti) } /* but now cancellation is no longer permitted */ - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave); + pthread_setcancelstate(iCancelStateSave, NULL); finalize_it: DBGPRINTF("regular consumer finished, iret=%d, szlog %d sz phys %d\n", iRet, @@ -1768,7 +1768,7 @@ ConsumerDA(qqueue_t *pThis, wti_t *pWti) } /* but now cancellation is no longer permitted */ - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave); + pthread_setcancelstate(iCancelStateSave, NULL); /* now we are done, but need to re-aquire the mutex */ d_pthread_mutex_lock(pThis->mut); @@ -1847,7 +1847,6 @@ qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */ int wrk; uchar *qName; size_t lenBuf; - int iQueueSizeSave; ASSERT(pThis != NULL); @@ -2244,8 +2243,8 @@ finalize_it: static inline rsRetVal doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr) { - DEFiRet; struct timespec t; + DEFiRet; if(glbl.GetGlobalInputTermState()) { ABORT_FINALIZE(RS_RET_FORCE_TERM); @@ -2279,7 +2278,7 @@ doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr) if(flowCtlType == eFLOWCTL_FULL_DELAY) { while(pThis->iQueueSize >= pThis->iFullDlyMrk) { DBGOPRINT((obj_t*) pThis, "enqueueMsg: FullDelay mark reached for full delayable message - blocking.\n"); - pthread_cond_wait(&pThis->belowFullDlyWtrMrk, pThis->mut); /* TODO error check? But what do then? */ + pthread_cond_wait(&pThis->belowFullDlyWtrMrk, pThis->mut); } } else if(flowCtlType == eFLOWCTL_LIGHT_DELAY) { if(pThis->iQueueSize >= pThis->iLightDlyMrk) { -- cgit v1.2.3 From 6f529cc2daafb791ca1bbef8a3ee128833db19c1 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 3 May 2012 09:53:48 +0200 Subject: bugfix: rsyslog did not terminate when delayable inputs were blocked ...due to unvailable sources. Fixes: http://bugzilla.adiscon.com/show_bug.cgi?id=299 Thanks to Marcin M for bringing up this problem and Andre Lorbach for helping to reproduce and fix it. --- runtime/queue.c | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) (limited to 'runtime/queue.c') diff --git a/runtime/queue.c b/runtime/queue.c index 9961ed4d..e968806c 100644 --- a/runtime/queue.c +++ b/runtime/queue.c @@ -2243,8 +2243,9 @@ finalize_it: static inline rsRetVal doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr) { - struct timespec t; DEFiRet; + int err; + struct timespec t; if(glbl.GetGlobalInputTermState()) { ABORT_FINALIZE(RS_RET_FORCE_TERM); @@ -2276,15 +2277,48 @@ doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr) * It's a side effect, but a good one ;) -- rgerhards, 2008-03-14 */ if(flowCtlType == eFLOWCTL_FULL_DELAY) { + DBGOPRINT((obj_t*) pThis, "enqueueMsg: FullDelay mark reached for full delayable message " + "- blocking.\n"); while(pThis->iQueueSize >= pThis->iFullDlyMrk) { - DBGOPRINT((obj_t*) pThis, "enqueueMsg: FullDelay mark reached for full delayable message - blocking.\n"); - pthread_cond_wait(&pThis->belowFullDlyWtrMrk, pThis->mut); + /* We have a problem during shutdown if we block eternally. In that + * case, the the input thread cannot be terminated. So we wake up + * from time to time to check for termination. + * TODO/v6(at earliest): check if we could signal the condition during + * shutdown. However, this requires new queue registries and thus is + * far to much change for a stable version (and I am still not sure it + * is worth the effort, given how seldom this situation occurs and how + * few resources the wakeups need). -- rgerhards, 2012-05-03 + * In any case, this was the old code (if we do the TODO): + * pthread_cond_wait(&pThis->belowFullDlyWtrMrk, pThis->mut); + */ + timeoutComp(&t, 1000); + err = pthread_cond_timedwait(&pThis->belowLightDlyWtrMrk, pThis->mut, &t); + if(err != 0 && err != ETIMEDOUT) { + /* Something is really wrong now. Report to debug log and abort the + * wait. That keeps us running, even though we may lose messages. + */ + DBGOPRINT((obj_t*) pThis, "potential program bug: pthread_cond_timedwait()" + "/fulldelay returned %d\n", err); + break; + + } + DBGPRINTF("wti worker in full delay timed out, checking termination...\n"); + if(glbl.GetGlobalInputTermState()) { + ABORT_FINALIZE(RS_RET_FORCE_TERM); + } } } else if(flowCtlType == eFLOWCTL_LIGHT_DELAY) { if(pThis->iQueueSize >= pThis->iLightDlyMrk) { - DBGOPRINT((obj_t*) pThis, "enqueueMsg: LightDelay mark reached for light delayable message - blocking a bit.\n"); + DBGOPRINT((obj_t*) pThis, "enqueueMsg: LightDelay mark reached for light " + "delayable message - blocking a bit.\n"); timeoutComp(&t, 1000); /* 1000 millisconds = 1 second TODO: make configurable */ - pthread_cond_timedwait(&pThis->belowLightDlyWtrMrk, pThis->mut, &t); /* TODO error check? But what do then? */ + err = pthread_cond_timedwait(&pThis->belowLightDlyWtrMrk, pThis->mut, &t); + if(err != 0 && err != ETIMEDOUT) { + /* Something is really wrong now. Report to debug log */ + DBGOPRINT((obj_t*) pThis, "potential program bug: pthread_cond_timedwait()" + "/lightdelay returned %d\n", err); + + } } } @@ -2297,6 +2331,9 @@ doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr) || (pThis->qType == QUEUETYPE_DISK && pThis->sizeOnDiskMax != 0 && pThis->tVars.disk.sizeOnDisk > pThis->sizeOnDiskMax)) { DBGOPRINT((obj_t*) pThis, "enqueueMsg: queue FULL - waiting to drain.\n"); + if(glbl.GetGlobalInputTermState()) { + ABORT_FINALIZE(RS_RET_FORCE_TERM); + } timeoutComp(&t, pThis->toEnq); STATSCOUNTER_INC(pThis->ctrFull, pThis->mutCtrFull); // TODO : handle enqOnly => discard! -- cgit v1.2.3