diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | ChangeLog | 16 | ||||
-rw-r--r-- | action.c | 4 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | doc/v6compatibility.html | 9 | ||||
-rw-r--r-- | plugins/imrelp/imrelp.c | 39 | ||||
-rw-r--r-- | runtime/queue.c | 26 | ||||
-rw-r--r-- | runtime/rsconf.c | 3 | ||||
-rw-r--r-- | tests/Makefile.am | 4 | ||||
-rwxr-xr-x | tests/imuxsock_ccmiddle_root.sh | 3 | ||||
-rwxr-xr-x | tests/imuxsock_logger_root.sh | 3 | ||||
-rwxr-xr-x | tests/imuxsock_traillf_root.sh | 3 | ||||
-rwxr-xr-x | tests/sndrcv_udp.sh | 3 | ||||
-rw-r--r-- | tools/syslogd.c | 4 |
14 files changed, 99 insertions, 21 deletions
@@ -26,6 +26,7 @@ missing compile rsyslogd rsyslog.service +ylwrap *.orig rg.conf* *.swp @@ -1,5 +1,7 @@ --------------------------------------------------------------------------- Version 6.5.0 [devel] 2012-0?-?? +- imrelp now supports non-cancel thread termination + (but now requires at least librelp 1.0.1) - added --enable-debugless configure option for very high demanding envs This actually at compile time disables a lot of debug code, resulting in some speedup (but serious loss of debugging capabilities) @@ -459,6 +461,13 @@ expected that interfaces, even new ones, break during the initial [ported from v4] --------------------------------------------------------------------------- Version 5.9.8 [V5-BETA], 2012-05-?? +- bugfix: delayble source could block action queue, even if there was + a disk queue associated with it. The root cause of this problem was + that it makes no sense to delay messages once they arrive in the + action queue - the "input" that is being held in that case is the main + queue worker, what makes no sense. + Thanks to Marcin for alerting us on this problem and providing + instructions to reproduce it. - bugfix: disk queue was not persisted on shutdown, regression of fix to http://bugzilla.adiscon.com/show_bug.cgi?id=299 The new code also handles the case of shutdown of blocking light and @@ -616,6 +625,13 @@ Version 5.9.0 [V5-DEVEL] (rgerhards), 2011-06-08 closes: http://bugzilla.adiscon.com/show_bug.cgi?id=236 --------------------------------------------------------------------------- Version 5.8.12 [V5-stable] 2012-05-?? +- bugfix: delayble source could block action queue, even if there was + a disk queue associated with it. The root cause of this problem was + that it makes no sense to delay messages once they arrive in the + action queue - the "input" that is being held in that case is the main + queue worker, what makes no sense. + Thanks to Marcin for alerting us on this problem and providing + instructions to reproduce it. - bugfix: disk queue was not persisted on shutdown, regression of fix to http://bugzilla.adiscon.com/show_bug.cgi?id=299 The new code also handles the case of shutdown of blocking light and @@ -262,7 +262,7 @@ actionResetQueueParams(void) cs.bActionQSyncQeueFiles = 0; cs.iActionQtoQShutdown = 0; /* queue shutdown */ cs.iActionQtoActShutdown = 1000; /* action shutdown (in phase 2) */ - cs.iActionQtoEnq = 2000; /* timeout for queue enque */ + cs.iActionQtoEnq = 50; /* timeout for queue enque */ cs.iActionQtoWrkShutdown = 60000; /* timeout for worker thread shutdown */ cs.iActionQWrkMinMsgs = 100; /* minimum messages per worker needed to start a new one */ cs.bActionQSaveOnShutdown = 1; /* save queue on shutdown (when DA enabled)? */ @@ -1350,7 +1350,7 @@ doSubmitToActionQ(action_t *pAction, msg_t *pMsg) if(pAction->pQueue->qType == QUEUETYPE_DIRECT) iRet = qqueueEnqObjDirect(pAction->pQueue, (void*) MsgAddRef(pMsg)); else - iRet = qqueueEnqObj(pAction->pQueue, pMsg->flowCtlType, (void*) MsgAddRef(pMsg)); + iRet = qqueueEnqObj(pAction->pQueue, eFLOWCTL_NO_DELAY, (void*) MsgAddRef(pMsg)); RETiRet; } diff --git a/configure.ac b/configure.ac index cee453db..1f995eb7 100644 --- a/configure.ac +++ b/configure.ac @@ -916,7 +916,7 @@ AC_ARG_ENABLE(relp, [enable_relp=no] ) if test "x$enable_relp" = "xyes"; then - PKG_CHECK_MODULES(RELP, relp >= 0.1.1) + PKG_CHECK_MODULES(RELP, relp >= 1.0.1) fi AM_CONDITIONAL(ENABLE_RELP, test x$enable_relp = xyes) AC_SUBST(RELP_CFLAGS) diff --git a/doc/v6compatibility.html b/doc/v6compatibility.html index 1f830854..058ab4f1 100644 --- a/doc/v6compatibility.html +++ b/doc/v6compatibility.html @@ -112,6 +112,15 @@ to spot why things went wrong (and if at all). <p>Due to their positive effect on performance and comparatively low overhead, default batch sizes have been increased. Starting with 6.3.4, the action queues have a default batch size of 128 messages. +<h2>Default action queue enqueue timeout</h2> +<p>This timeout previously was 2seconds, and has been reduced to 50ms (starting with 6.5.0). This change +was made as a long timeout will caused delays in the associated main queue, something +that was quite unexpected to users. Now, this can still happen, but the effect is much +less harsh (but still considerable on a busy system). Also, 50ms should be fairly enough +for most output sources, except when they are really broken (like network disconnect). If +they are really broken, even a 2second timeout does not help, so we hopefully get the best +of both worlds with the new timeout. A specific timeout can of course still be configured, +it is just the timeout that changed. <h2>outchannels</h2> <p>Outchannels are a to-be-removed feature of rsyslog, at least as far as the config syntax is concerned. Nevertheless, v6 still supports it, but a new syntax is required diff --git a/plugins/imrelp/imrelp.c b/plugins/imrelp/imrelp.c index 99fabd18..f6040b21 100644 --- a/plugins/imrelp/imrelp.c +++ b/plugins/imrelp/imrelp.c @@ -22,7 +22,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "config.h" #include <stdlib.h> #include <assert.h> @@ -35,6 +34,7 @@ #include <netdb.h> #include <sys/types.h> #include <sys/socket.h> +#include <signal.h> #include <librelp.h> #include "rsyslog.h" #include "dirty.h" @@ -236,13 +236,39 @@ BEGINfreeCnf CODESTARTfreeCnf ENDfreeCnf +/* This is used to terminate the plugin. Note that the signal handler blocks + * other activity on the thread. As such, it is safe to request the stop. When + * we terminate, relpEngine is called, and it's select() loop interrupted. But + * only *after this function is done*. So we do not have a race! + */ +static void +doSIGTTIN(int __attribute__((unused)) sig) +{ + DBGPRINTF("imrelp: termination requested via SIGTTIN - telling RELP engine\n"); + relpEngineSetStop(pRelpEngine); +} + + /* This function is called to gather input. */ BEGINrunInput + sigset_t sigSet; + struct sigaction sigAct; CODESTARTrunInput - /* TODO: we must be careful to start the listener here. Currently, tcpsrv.c seems to - * do that in ConstructFinalize + /* we want to support non-cancel input termination. To do so, we must signal librelp + * when to stop. As we run on the same thread, we need to register as SIGTTIN handler, + * which will be used to put the terminating condition into librelp. */ + sigfillset(&sigSet); + pthread_sigmask(SIG_BLOCK, &sigSet, NULL); + sigemptyset(&sigSet); + sigaddset(&sigSet, SIGTTIN); + pthread_sigmask(SIG_UNBLOCK, &sigSet, NULL); + memset(&sigAct, 0, sizeof (sigAct)); + sigemptyset(&sigAct.sa_mask); + sigAct.sa_handler = doSIGTTIN; + sigaction(SIGTTIN, &sigAct, NULL); + iRet = relpEngineRun(pRelpEngine); ENDrunInput @@ -284,12 +310,19 @@ resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unus } +BEGINisCompatibleWithFeature +CODESTARTisCompatibleWithFeature + if(eFeat == sFEATURENonCancelInputTermination) + iRet = RS_RET_OK; +ENDisCompatibleWithFeature + BEGINqueryEtryPt CODESTARTqueryEtryPt CODEqueryEtryPt_STD_IMOD_QUERIES CODEqueryEtryPt_STD_CONF2_QUERIES CODEqueryEtryPt_STD_CONF2_PREPRIVDROP_QUERIES +CODEqueryEtryPt_IsCompatibleWithFeature_IF_OMOD_QUERIES ENDqueryEtryPt diff --git a/runtime/queue.c b/runtime/queue.c index 05399278..34935403 100644 --- a/runtime/queue.c +++ b/runtime/queue.c @@ -2462,21 +2462,27 @@ doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr) while( (pThis->iMaxQueueSize > 0 && pThis->iQueueSize >= pThis->iMaxQueueSize) || (pThis->qType == QUEUETYPE_DISK && pThis->sizeOnDiskMax != 0 && pThis->tVars.disk.sizeOnDisk > pThis->sizeOnDiskMax)) { - DBGOPRINT((obj_t*) pThis, "enqueueMsg: queue FULL - waiting to drain.\n"); - if(glbl.GetGlobalInputTermState()) { - DBGOPRINT((obj_t*) pThis, "enqueueMsg: queue FULL, discard due to FORCE_TERM.\n"); - ABORT_FINALIZE(RS_RET_FORCE_TERM); - } - timeoutComp(&t, pThis->toEnq); STATSCOUNTER_INC(pThis->ctrFull, pThis->mutCtrFull); -// TODO : handle enqOnly => discard! - if(pthread_cond_timedwait(&pThis->notFull, pThis->mut, &t) != 0) { - DBGOPRINT((obj_t*) pThis, "enqueueMsg: cond timeout, dropping message!\n"); + if(pThis->toEnq == 0 || pThis->bEnqOnly) { + DBGOPRINT((obj_t*) pThis, "enqueueMsg: queue FULL - configured for immediate discarding.\n"); STATSCOUNTER_INC(pThis->ctrFDscrd, pThis->mutCtrFDscrd); objDestruct(pUsr); ABORT_FINALIZE(RS_RET_QUEUE_FULL); - } + } else { + DBGOPRINT((obj_t*) pThis, "enqueueMsg: queue FULL - waiting %dms to drain.\n", pThis->toEnq); + if(glbl.GetGlobalInputTermState()) { + DBGOPRINT((obj_t*) pThis, "enqueueMsg: queue FULL, discard due to FORCE_TERM.\n"); + ABORT_FINALIZE(RS_RET_FORCE_TERM); + } + timeoutComp(&t, pThis->toEnq); + if(pthread_cond_timedwait(&pThis->notFull, pThis->mut, &t) != 0) { + DBGOPRINT((obj_t*) pThis, "enqueueMsg: cond timeout, dropping message!\n"); + STATSCOUNTER_INC(pThis->ctrFDscrd, pThis->mutCtrFDscrd); + objDestruct(pUsr); + ABORT_FINALIZE(RS_RET_QUEUE_FULL); + } dbgoprint((obj_t*) pThis, "enqueueMsg: wait solved queue full condition, enqueing\n"); + } } /* and finally enqueue the message */ diff --git a/runtime/rsconf.c b/runtime/rsconf.c index 460e69d6..fca4f9b8 100644 --- a/runtime/rsconf.c +++ b/runtime/rsconf.c @@ -685,9 +685,10 @@ runInputModules(void) node = module.GetNxtCnfType(runConf, NULL, eMOD_IN); while(node != NULL) { if(node->canRun) { - DBGPRINTF("running module %s with config %p\n", node->pMod->pszName, node); bNeedsCancel = (node->pMod->isCompatibleWithFeature(sFEATURENonCancelInputTermination) == RS_RET_OK) ? 0 : 1; + DBGPRINTF("running module %s with config %p, term mode: %s\n", node->pMod->pszName, node, + bNeedsCancel ? "cancel" : "cooperative/SIGTTIN"); thrdCreate(node->pMod->mod.im.runInput, node->pMod->mod.im.afterRun, bNeedsCancel, (node->pMod->cnfName == NULL) ? node->pMod->pszName : node->pMod->cnfName); } diff --git a/tests/Makefile.am b/tests/Makefile.am index 50ce2e0b..aff44eef 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -65,7 +65,6 @@ TESTS += \ failover-no-basic.sh \ rcvr_fail_restore.sh \ linkedlistqueue.sh -endif if HAVE_VALGRIND TESTS += \ @@ -76,7 +75,8 @@ TESTS += \ failover-no-basic-vg.sh \ failover-no-rptd-vg.sh \ tcp-msgreduc-vg.sh -endif +endif # HAVE_VALGRIND +endif # ENABLE_IMDIAG if ENABLE_MYSQL_TESTS diff --git a/tests/imuxsock_ccmiddle_root.sh b/tests/imuxsock_ccmiddle_root.sh index b487611a..7f255bd0 100755 --- a/tests/imuxsock_ccmiddle_root.sh +++ b/tests/imuxsock_ccmiddle_root.sh @@ -2,6 +2,9 @@ # carry out this test echo \[imuxsock_ccmiddle_root.sh\]: test trailing LF handling in imuxsock echo This test must be run as root with no other active syslogd +if [ "$EUID" -ne 0 ]; then + exit 77 # Not root, skip this test +fi source $srcdir/diag.sh init source $srcdir/diag.sh startup imuxsock_ccmiddle_root.conf # send a message with trailing LF diff --git a/tests/imuxsock_logger_root.sh b/tests/imuxsock_logger_root.sh index 377999f7..0902d797 100755 --- a/tests/imuxsock_logger_root.sh +++ b/tests/imuxsock_logger_root.sh @@ -2,6 +2,9 @@ # carry out this test. echo \[imuxsock_logger_root.sh\]: test trailing LF handling in imuxsock echo This test must be run as root with no other active syslogd +if [ "$EUID" -ne 0 ]; then + exit 77 # Not root, skip this test +fi source $srcdir/diag.sh init source $srcdir/diag.sh startup imuxsock_logger_root.conf # send a message with trailing LF diff --git a/tests/imuxsock_traillf_root.sh b/tests/imuxsock_traillf_root.sh index 1b821ee7..0141a626 100755 --- a/tests/imuxsock_traillf_root.sh +++ b/tests/imuxsock_traillf_root.sh @@ -2,6 +2,9 @@ # carry out this test echo \[imuxsock_traillf_root.sh\]: test trailing LF handling in imuxsock echo This test must be run as root with no other active syslogd +if [ "$EUID" -ne 0 ]; then + exit 77 # Not root, skip this test +fi source $srcdir/diag.sh init source $srcdir/diag.sh startup imuxsock_traillf_root.conf # send a message with trailing LF diff --git a/tests/sndrcv_udp.sh b/tests/sndrcv_udp.sh index 274a414a..df37782c 100755 --- a/tests/sndrcv_udp.sh +++ b/tests/sndrcv_udp.sh @@ -7,4 +7,7 @@ # This file is part of the rsyslog project, released under GPLv3 echo =============================================================================== echo \[sndrcv_udp.sh\]: testing sending and receiving via udp +if [ "$EUID" -ne 0 ]; then + exit 77 # Not root, skip this test +fi source $srcdir/sndrcv_drvr.sh sndrcv_udp 50 diff --git a/tools/syslogd.c b/tools/syslogd.c index 930920b7..8a04fb6b 100644 --- a/tools/syslogd.c +++ b/tools/syslogd.c @@ -1662,7 +1662,7 @@ doGlblProcessInit(void) if( !(Debug == DEBUG_FULL || NoFork) ) { - DBGPRINTF("Checking pidfile.\n"); + DBGPRINTF("Checking pidfile '%s'.\n", PidFile); if (!check_pid(PidFile)) { memset(&sigAct, 0, sizeof (sigAct)); @@ -1735,7 +1735,7 @@ doGlblProcessInit(void) } /* tuck my process id away */ - DBGPRINTF("Writing pidfile %s.\n", PidFile); + DBGPRINTF("Writing pidfile '%s'.\n", PidFile); if (!check_pid(PidFile)) { if (!write_pid(PidFile)) |