summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--ChangeLog16
-rw-r--r--action.c4
-rw-r--r--configure.ac2
-rw-r--r--doc/v6compatibility.html9
-rw-r--r--plugins/imrelp/imrelp.c39
-rw-r--r--runtime/queue.c26
-rw-r--r--runtime/rsconf.c3
-rw-r--r--tests/Makefile.am4
-rwxr-xr-xtests/imuxsock_ccmiddle_root.sh3
-rwxr-xr-xtests/imuxsock_logger_root.sh3
-rwxr-xr-xtests/imuxsock_traillf_root.sh3
-rwxr-xr-xtests/sndrcv_udp.sh3
-rw-r--r--tools/syslogd.c4
14 files changed, 99 insertions, 21 deletions
diff --git a/.gitignore b/.gitignore
index 55f069ee..b24a0666 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,7 @@ missing
compile
rsyslogd
rsyslog.service
+ylwrap
*.orig
rg.conf*
*.swp
diff --git a/ChangeLog b/ChangeLog
index 01268200..659e9ca3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,7 @@
---------------------------------------------------------------------------
Version 6.5.0 [devel] 2012-0?-??
+- imrelp now supports non-cancel thread termination
+ (but now requires at least librelp 1.0.1)
- added --enable-debugless configure option for very high demanding envs
This actually at compile time disables a lot of debug code, resulting
in some speedup (but serious loss of debugging capabilities)
@@ -459,6 +461,13 @@ expected that interfaces, even new ones, break during the initial
[ported from v4]
---------------------------------------------------------------------------
Version 5.9.8 [V5-BETA], 2012-05-??
+- bugfix: delayble source could block action queue, even if there was
+ a disk queue associated with it. The root cause of this problem was
+ that it makes no sense to delay messages once they arrive in the
+ action queue - the "input" that is being held in that case is the main
+ queue worker, what makes no sense.
+ Thanks to Marcin for alerting us on this problem and providing
+ instructions to reproduce it.
- bugfix: disk queue was not persisted on shutdown, regression of fix to
http://bugzilla.adiscon.com/show_bug.cgi?id=299
The new code also handles the case of shutdown of blocking light and
@@ -616,6 +625,13 @@ Version 5.9.0 [V5-DEVEL] (rgerhards), 2011-06-08
closes: http://bugzilla.adiscon.com/show_bug.cgi?id=236
---------------------------------------------------------------------------
Version 5.8.12 [V5-stable] 2012-05-??
+- bugfix: delayble source could block action queue, even if there was
+ a disk queue associated with it. The root cause of this problem was
+ that it makes no sense to delay messages once they arrive in the
+ action queue - the "input" that is being held in that case is the main
+ queue worker, what makes no sense.
+ Thanks to Marcin for alerting us on this problem and providing
+ instructions to reproduce it.
- bugfix: disk queue was not persisted on shutdown, regression of fix to
http://bugzilla.adiscon.com/show_bug.cgi?id=299
The new code also handles the case of shutdown of blocking light and
diff --git a/action.c b/action.c
index 91399f49..652c0259 100644
--- a/action.c
+++ b/action.c
@@ -262,7 +262,7 @@ actionResetQueueParams(void)
cs.bActionQSyncQeueFiles = 0;
cs.iActionQtoQShutdown = 0; /* queue shutdown */
cs.iActionQtoActShutdown = 1000; /* action shutdown (in phase 2) */
- cs.iActionQtoEnq = 2000; /* timeout for queue enque */
+ cs.iActionQtoEnq = 50; /* timeout for queue enque */
cs.iActionQtoWrkShutdown = 60000; /* timeout for worker thread shutdown */
cs.iActionQWrkMinMsgs = 100; /* minimum messages per worker needed to start a new one */
cs.bActionQSaveOnShutdown = 1; /* save queue on shutdown (when DA enabled)? */
@@ -1350,7 +1350,7 @@ doSubmitToActionQ(action_t *pAction, msg_t *pMsg)
if(pAction->pQueue->qType == QUEUETYPE_DIRECT)
iRet = qqueueEnqObjDirect(pAction->pQueue, (void*) MsgAddRef(pMsg));
else
- iRet = qqueueEnqObj(pAction->pQueue, pMsg->flowCtlType, (void*) MsgAddRef(pMsg));
+ iRet = qqueueEnqObj(pAction->pQueue, eFLOWCTL_NO_DELAY, (void*) MsgAddRef(pMsg));
RETiRet;
}
diff --git a/configure.ac b/configure.ac
index cee453db..1f995eb7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -916,7 +916,7 @@ AC_ARG_ENABLE(relp,
[enable_relp=no]
)
if test "x$enable_relp" = "xyes"; then
- PKG_CHECK_MODULES(RELP, relp >= 0.1.1)
+ PKG_CHECK_MODULES(RELP, relp >= 1.0.1)
fi
AM_CONDITIONAL(ENABLE_RELP, test x$enable_relp = xyes)
AC_SUBST(RELP_CFLAGS)
diff --git a/doc/v6compatibility.html b/doc/v6compatibility.html
index 1f830854..058ab4f1 100644
--- a/doc/v6compatibility.html
+++ b/doc/v6compatibility.html
@@ -112,6 +112,15 @@ to spot why things went wrong (and if at all).
<p>Due to their positive effect on performance and comparatively low overhead,
default batch sizes have been increased. Starting with 6.3.4, the action queues
have a default batch size of 128 messages.
+<h2>Default action queue enqueue timeout</h2>
+<p>This timeout previously was 2seconds, and has been reduced to 50ms (starting with 6.5.0). This change
+was made as a long timeout will caused delays in the associated main queue, something
+that was quite unexpected to users. Now, this can still happen, but the effect is much
+less harsh (but still considerable on a busy system). Also, 50ms should be fairly enough
+for most output sources, except when they are really broken (like network disconnect). If
+they are really broken, even a 2second timeout does not help, so we hopefully get the best
+of both worlds with the new timeout. A specific timeout can of course still be configured,
+it is just the timeout that changed.
<h2>outchannels</h2>
<p>Outchannels are a to-be-removed feature of rsyslog, at least as far as the config
syntax is concerned. Nevertheless, v6 still supports it, but a new syntax is required
diff --git a/plugins/imrelp/imrelp.c b/plugins/imrelp/imrelp.c
index 99fabd18..f6040b21 100644
--- a/plugins/imrelp/imrelp.c
+++ b/plugins/imrelp/imrelp.c
@@ -22,7 +22,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
#include "config.h"
#include <stdlib.h>
#include <assert.h>
@@ -35,6 +34,7 @@
#include <netdb.h>
#include <sys/types.h>
#include <sys/socket.h>
+#include <signal.h>
#include <librelp.h>
#include "rsyslog.h"
#include "dirty.h"
@@ -236,13 +236,39 @@ BEGINfreeCnf
CODESTARTfreeCnf
ENDfreeCnf
+/* This is used to terminate the plugin. Note that the signal handler blocks
+ * other activity on the thread. As such, it is safe to request the stop. When
+ * we terminate, relpEngine is called, and it's select() loop interrupted. But
+ * only *after this function is done*. So we do not have a race!
+ */
+static void
+doSIGTTIN(int __attribute__((unused)) sig)
+{
+ DBGPRINTF("imrelp: termination requested via SIGTTIN - telling RELP engine\n");
+ relpEngineSetStop(pRelpEngine);
+}
+
+
/* This function is called to gather input.
*/
BEGINrunInput
+ sigset_t sigSet;
+ struct sigaction sigAct;
CODESTARTrunInput
- /* TODO: we must be careful to start the listener here. Currently, tcpsrv.c seems to
- * do that in ConstructFinalize
+ /* we want to support non-cancel input termination. To do so, we must signal librelp
+ * when to stop. As we run on the same thread, we need to register as SIGTTIN handler,
+ * which will be used to put the terminating condition into librelp.
*/
+ sigfillset(&sigSet);
+ pthread_sigmask(SIG_BLOCK, &sigSet, NULL);
+ sigemptyset(&sigSet);
+ sigaddset(&sigSet, SIGTTIN);
+ pthread_sigmask(SIG_UNBLOCK, &sigSet, NULL);
+ memset(&sigAct, 0, sizeof (sigAct));
+ sigemptyset(&sigAct.sa_mask);
+ sigAct.sa_handler = doSIGTTIN;
+ sigaction(SIGTTIN, &sigAct, NULL);
+
iRet = relpEngineRun(pRelpEngine);
ENDrunInput
@@ -284,12 +310,19 @@ resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unus
}
+BEGINisCompatibleWithFeature
+CODESTARTisCompatibleWithFeature
+ if(eFeat == sFEATURENonCancelInputTermination)
+ iRet = RS_RET_OK;
+ENDisCompatibleWithFeature
+
BEGINqueryEtryPt
CODESTARTqueryEtryPt
CODEqueryEtryPt_STD_IMOD_QUERIES
CODEqueryEtryPt_STD_CONF2_QUERIES
CODEqueryEtryPt_STD_CONF2_PREPRIVDROP_QUERIES
+CODEqueryEtryPt_IsCompatibleWithFeature_IF_OMOD_QUERIES
ENDqueryEtryPt
diff --git a/runtime/queue.c b/runtime/queue.c
index 05399278..34935403 100644
--- a/runtime/queue.c
+++ b/runtime/queue.c
@@ -2462,21 +2462,27 @@ doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr)
while( (pThis->iMaxQueueSize > 0 && pThis->iQueueSize >= pThis->iMaxQueueSize)
|| (pThis->qType == QUEUETYPE_DISK && pThis->sizeOnDiskMax != 0
&& pThis->tVars.disk.sizeOnDisk > pThis->sizeOnDiskMax)) {
- DBGOPRINT((obj_t*) pThis, "enqueueMsg: queue FULL - waiting to drain.\n");
- if(glbl.GetGlobalInputTermState()) {
- DBGOPRINT((obj_t*) pThis, "enqueueMsg: queue FULL, discard due to FORCE_TERM.\n");
- ABORT_FINALIZE(RS_RET_FORCE_TERM);
- }
- timeoutComp(&t, pThis->toEnq);
STATSCOUNTER_INC(pThis->ctrFull, pThis->mutCtrFull);
-// TODO : handle enqOnly => discard!
- if(pthread_cond_timedwait(&pThis->notFull, pThis->mut, &t) != 0) {
- DBGOPRINT((obj_t*) pThis, "enqueueMsg: cond timeout, dropping message!\n");
+ if(pThis->toEnq == 0 || pThis->bEnqOnly) {
+ DBGOPRINT((obj_t*) pThis, "enqueueMsg: queue FULL - configured for immediate discarding.\n");
STATSCOUNTER_INC(pThis->ctrFDscrd, pThis->mutCtrFDscrd);
objDestruct(pUsr);
ABORT_FINALIZE(RS_RET_QUEUE_FULL);
- }
+ } else {
+ DBGOPRINT((obj_t*) pThis, "enqueueMsg: queue FULL - waiting %dms to drain.\n", pThis->toEnq);
+ if(glbl.GetGlobalInputTermState()) {
+ DBGOPRINT((obj_t*) pThis, "enqueueMsg: queue FULL, discard due to FORCE_TERM.\n");
+ ABORT_FINALIZE(RS_RET_FORCE_TERM);
+ }
+ timeoutComp(&t, pThis->toEnq);
+ if(pthread_cond_timedwait(&pThis->notFull, pThis->mut, &t) != 0) {
+ DBGOPRINT((obj_t*) pThis, "enqueueMsg: cond timeout, dropping message!\n");
+ STATSCOUNTER_INC(pThis->ctrFDscrd, pThis->mutCtrFDscrd);
+ objDestruct(pUsr);
+ ABORT_FINALIZE(RS_RET_QUEUE_FULL);
+ }
dbgoprint((obj_t*) pThis, "enqueueMsg: wait solved queue full condition, enqueing\n");
+ }
}
/* and finally enqueue the message */
diff --git a/runtime/rsconf.c b/runtime/rsconf.c
index 460e69d6..fca4f9b8 100644
--- a/runtime/rsconf.c
+++ b/runtime/rsconf.c
@@ -685,9 +685,10 @@ runInputModules(void)
node = module.GetNxtCnfType(runConf, NULL, eMOD_IN);
while(node != NULL) {
if(node->canRun) {
- DBGPRINTF("running module %s with config %p\n", node->pMod->pszName, node);
bNeedsCancel = (node->pMod->isCompatibleWithFeature(sFEATURENonCancelInputTermination) == RS_RET_OK) ?
0 : 1;
+ DBGPRINTF("running module %s with config %p, term mode: %s\n", node->pMod->pszName, node,
+ bNeedsCancel ? "cancel" : "cooperative/SIGTTIN");
thrdCreate(node->pMod->mod.im.runInput, node->pMod->mod.im.afterRun, bNeedsCancel,
(node->pMod->cnfName == NULL) ? node->pMod->pszName : node->pMod->cnfName);
}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 50ce2e0b..aff44eef 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -65,7 +65,6 @@ TESTS += \
failover-no-basic.sh \
rcvr_fail_restore.sh \
linkedlistqueue.sh
-endif
if HAVE_VALGRIND
TESTS += \
@@ -76,7 +75,8 @@ TESTS += \
failover-no-basic-vg.sh \
failover-no-rptd-vg.sh \
tcp-msgreduc-vg.sh
-endif
+endif # HAVE_VALGRIND
+endif # ENABLE_IMDIAG
if ENABLE_MYSQL_TESTS
diff --git a/tests/imuxsock_ccmiddle_root.sh b/tests/imuxsock_ccmiddle_root.sh
index b487611a..7f255bd0 100755
--- a/tests/imuxsock_ccmiddle_root.sh
+++ b/tests/imuxsock_ccmiddle_root.sh
@@ -2,6 +2,9 @@
# carry out this test
echo \[imuxsock_ccmiddle_root.sh\]: test trailing LF handling in imuxsock
echo This test must be run as root with no other active syslogd
+if [ "$EUID" -ne 0 ]; then
+ exit 77 # Not root, skip this test
+fi
source $srcdir/diag.sh init
source $srcdir/diag.sh startup imuxsock_ccmiddle_root.conf
# send a message with trailing LF
diff --git a/tests/imuxsock_logger_root.sh b/tests/imuxsock_logger_root.sh
index 377999f7..0902d797 100755
--- a/tests/imuxsock_logger_root.sh
+++ b/tests/imuxsock_logger_root.sh
@@ -2,6 +2,9 @@
# carry out this test.
echo \[imuxsock_logger_root.sh\]: test trailing LF handling in imuxsock
echo This test must be run as root with no other active syslogd
+if [ "$EUID" -ne 0 ]; then
+ exit 77 # Not root, skip this test
+fi
source $srcdir/diag.sh init
source $srcdir/diag.sh startup imuxsock_logger_root.conf
# send a message with trailing LF
diff --git a/tests/imuxsock_traillf_root.sh b/tests/imuxsock_traillf_root.sh
index 1b821ee7..0141a626 100755
--- a/tests/imuxsock_traillf_root.sh
+++ b/tests/imuxsock_traillf_root.sh
@@ -2,6 +2,9 @@
# carry out this test
echo \[imuxsock_traillf_root.sh\]: test trailing LF handling in imuxsock
echo This test must be run as root with no other active syslogd
+if [ "$EUID" -ne 0 ]; then
+ exit 77 # Not root, skip this test
+fi
source $srcdir/diag.sh init
source $srcdir/diag.sh startup imuxsock_traillf_root.conf
# send a message with trailing LF
diff --git a/tests/sndrcv_udp.sh b/tests/sndrcv_udp.sh
index 274a414a..df37782c 100755
--- a/tests/sndrcv_udp.sh
+++ b/tests/sndrcv_udp.sh
@@ -7,4 +7,7 @@
# This file is part of the rsyslog project, released under GPLv3
echo ===============================================================================
echo \[sndrcv_udp.sh\]: testing sending and receiving via udp
+if [ "$EUID" -ne 0 ]; then
+ exit 77 # Not root, skip this test
+fi
source $srcdir/sndrcv_drvr.sh sndrcv_udp 50
diff --git a/tools/syslogd.c b/tools/syslogd.c
index 930920b7..8a04fb6b 100644
--- a/tools/syslogd.c
+++ b/tools/syslogd.c
@@ -1662,7 +1662,7 @@ doGlblProcessInit(void)
if( !(Debug == DEBUG_FULL || NoFork) )
{
- DBGPRINTF("Checking pidfile.\n");
+ DBGPRINTF("Checking pidfile '%s'.\n", PidFile);
if (!check_pid(PidFile))
{
memset(&sigAct, 0, sizeof (sigAct));
@@ -1735,7 +1735,7 @@ doGlblProcessInit(void)
}
/* tuck my process id away */
- DBGPRINTF("Writing pidfile %s.\n", PidFile);
+ DBGPRINTF("Writing pidfile '%s'.\n", PidFile);
if (!check_pid(PidFile))
{
if (!write_pid(PidFile))