From cf8b0717e12330695ce2e4b16966fda2a64b053e Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 18 Sep 2008 10:48:19 +0200 Subject: ignoring an (acceptable) race in debug system --- runtime/debug.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/runtime/debug.c b/runtime/debug.c index 1450d029..7ed1442b 100644 --- a/runtime/debug.c +++ b/runtime/debug.c @@ -480,7 +480,23 @@ static inline void dbgMutexUnlockLog(pthread_mutex_t *pmut, dbgFuncDB_t *pFuncDB pthread_mutex_lock(&mutMutLog); pLog = dbgMutLogFindSpecific(pmut, MUTOP_LOCK, NULL); +#if 0 /* toggle for testing */ assert(pLog != NULL); +#else +/* the change below seems not to work - the problem seems to be a real race... I keep this code in just in case + * I need to re-use it. It should be removed once we are finished analyzing this problem. -- rgerhards, 2008-09-17 + */ +if(pLog == NULL) { + /* this may happen due to some races. We do not try to avoid + * this, as it would complicate the "real" code. This is not justified + * just to keep the debug info system up. -- rgerhards, 2008-09-17 + */ + pthread_mutex_unlock(&mutMutLog); + dbgprintf("%s:%d:%s: mutex %p UNlocked [but we did not yet know this mutex!]\n", + pFuncDB->file, unlockLn, pFuncDB->func, (void*)pmut); + return; /* if we don't know it yet, we can not clean up... */ +} +#endif /* we found the last lock entry. We now need to see from which FuncDB we need to * remove it. This is recorded inside the mutex log entry. -- cgit v1.2.3 From 988989e49ef8639123c83383ba256c4e67679c8d Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 18 Sep 2008 10:49:16 +0200 Subject: re-enabled gcc builtin atomic operations and added a proper ./configure check --- ChangeLog | 2 ++ configure.ac | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ runtime/atomic.h | 18 +++++++++--------- 3 files changed, 61 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1995ca10..125b0ada 100644 --- a/ChangeLog +++ b/ChangeLog @@ -11,6 +11,8 @@ Version 3.21.5 [DEVEL] (rgerhards), 2008-09-?? each input module (else it is blank). - added system property "$myhostname", which contains the name of the local host as it knows itself. +- re-enabled gcc builtin atomic operations and added a proper + ./configure check --------------------------------------------------------------------------- Version 3.21.4 [DEVEL] (rgerhards), 2008-09-04 - removed compile time fixed message size limit (was 2K), limit can now diff --git a/configure.ac b/configure.ac index 8e9f7807..fea7c063 100644 --- a/configure.ac +++ b/configure.ac @@ -107,6 +107,56 @@ AC_TRY_COMPILE([ AC_MSG_RESULT(no; defined as 64) ) +# check for availability of atomic operations +# rgerhards, 2008-09-18, added based on +# http://svn.apache.org/repos/asf/apr/apr/trunk/configure.in + +AC_CACHE_CHECK([whether the compiler provides atomic builtins], [ap_cv_atomic_builtins], +[AC_TRY_RUN([ +int main() +{ + unsigned long val = 1010, tmp, *mem = &val; + + if (__sync_fetch_and_add(&val, 1010) != 1010 || val != 2020) + return 1; + + tmp = val; + + if (__sync_fetch_and_sub(mem, 1010) != tmp || val != 1010) + return 1; + + if (__sync_sub_and_fetch(&val, 1010) != 0 || val != 0) + return 1; + + tmp = 3030; + + if (__sync_val_compare_and_swap(mem, 0, tmp) != 0 || val != tmp) + return 1; + + if (__sync_lock_test_and_set(&val, 4040) != 3030) + return 1; + + mem = &tmp; + + if (__sync_val_compare_and_swap(&mem, &tmp, &val) != &tmp) + return 1; + + __sync_synchronize(); + + if (mem != &val) + return 1; + + return 0; +}], [ap_cv_atomic_builtins=yes], [ap_cv_atomic_builtins=no], [ap_cv_atomic_builtins=no])]) + +if test "$ap_cv_atomic_builtins" = "yes"; then + AC_DEFINE(HAVE_ATOMIC_BUILTINS, 1, [Define if compiler provides atomic builtins]) +fi + + + + + # Large file support AC_ARG_ENABLE(largefile, [AS_HELP_STRING([--enable-largefile],[Enable large file support @<:@default=yes@:>@])], diff --git a/runtime/atomic.h b/runtime/atomic.h index 430ae7f0..d6811628 100644 --- a/runtime/atomic.h +++ b/runtime/atomic.h @@ -1,6 +1,6 @@ /* This header supplies atomic operations. So far, we rely on GCC's - * atomic builtins. I have no idea if we can check them via autotools, - * but I am making the necessary provisioning to live without them if + * atomic builtins. During configure, we check if atomic operatons are + * available. If they are not, I am making the necessary provisioning to live without them if * they are not available. Please note that you should only use the macros * here if you think you can actually live WITHOUT an explicit atomic operation, * because in the non-presence of them, we simply do it without atomicitiy. @@ -36,16 +36,16 @@ #ifndef INCLUDED_ATOMIC_H #define INCLUDED_ATOMIC_H -/* set the following to 1 if we have atomic operations (and #undef it otherwise) */ -/* #define DO_HAVE_ATOMICS 1 */ /* for this release, we disable atomic calls because there seem to be some * portability problems and we can not fix that without destabilizing the build. * They simply came in too late. -- rgerhards, 2008-04-02 */ -/* make sure they are not used! -#define ATOMIC_INC(data) ((void) __sync_fetch_and_add(&data, 1)) -#define ATOMIC_DEC_AND_FETCH(data) __sync_sub_and_fetch(&data, 1) -*/ -#define ATOMIC_INC(data) (++(data)) +#ifdef HAVE_ATOMIC_BUILTINS +# define ATOMIC_INC(data) ((void) __sync_fetch_and_add(&data, 1)) +# define ATOMIC_DEC_AND_FETCH(data) __sync_sub_and_fetch(&data, 1) +#else +# warning "atomic builtins not available, using nul operations" +# define ATOMIC_INC(data) (++(data)) +#endif #endif /* #ifndef INCLUDED_ATOMIC_H */ -- cgit v1.2.3 From 4c96ebdcfe075e80810b01257cf21ea1c9b3ec0e Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 18 Sep 2008 12:19:33 +0200 Subject: bugfix: potential race condition when adding messages to queue There was a wrong order of mutex lock operations. It is hard to believe that really caused problems, but in theory it could and with threading we often see that theory becomes practice if something is only used long enough on a fast enough machine with enough CPUs ;) --- ChangeLog | 5 +++++ runtime/atomic.h | 8 ++++++-- runtime/debug.h | 3 ++- runtime/queue.c | 12 ++++++------ tools/syslogd.c | 7 +++---- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 125b0ada..23ac95fb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -13,6 +13,11 @@ Version 3.21.5 [DEVEL] (rgerhards), 2008-09-?? local host as it knows itself. - re-enabled gcc builtin atomic operations and added a proper ./configure check +- bugfix: potential race condition when adding messages to queue + There was a wrong order of mutex lock operations. It is hard to + believe that really caused problems, but in theory it could and with + threading we often see that theory becomes practice if something is only + used long enough on a fast enough machine with enough CPUs ;) --------------------------------------------------------------------------- Version 3.21.4 [DEVEL] (rgerhards), 2008-09-04 - removed compile time fixed message size limit (was 2K), limit can now diff --git a/runtime/atomic.h b/runtime/atomic.h index d6811628..d15f78ee 100644 --- a/runtime/atomic.h +++ b/runtime/atomic.h @@ -41,11 +41,15 @@ * They simply came in too late. -- rgerhards, 2008-04-02 */ #ifdef HAVE_ATOMIC_BUILTINS -# define ATOMIC_INC(data) ((void) __sync_fetch_and_add(&data, 1)) -# define ATOMIC_DEC_AND_FETCH(data) __sync_sub_and_fetch(&data, 1) +# define ATOMIC_INC(data) ((void) __sync_fetch_and_add(&(data), 1)) +# define ATOMIC_DEC_AND_FETCH(data) __sync_sub_and_fetch(&(data), 1) +# define ATOMIC_FETCH_32BIT(data) ((unsigned) __sync_fetch_and_and(&(data), 0xffffffff)) +# define ATOMIC_STORE_1_TO_32BIT(data) __sync_lock_test_and_set(&(data), 1) #else # warning "atomic builtins not available, using nul operations" # define ATOMIC_INC(data) (++(data)) +# define ATOMIC_FETCH_32BIT(data) (data) +# define ATOMIC_STORE_1_TO_32BIT(data) (data) = 1 #endif #endif /* #ifndef INCLUDED_ATOMIC_H */ diff --git a/runtime/debug.h b/runtime/debug.h index 214b7c05..d9d576b5 100644 --- a/runtime/debug.h +++ b/runtime/debug.h @@ -130,7 +130,8 @@ void dbgPrintAllDebugInfo(void); /* debug aides */ -#ifdef RTINST +//#ifdef RTINST +#if 0 // temporarily removed for helgrind #define d_pthread_mutex_lock(x) dbgMutexLock(x, pdbgFuncDB, __LINE__, dbgCALLStaCK_POP_POINT ) #define d_pthread_mutex_unlock(x) dbgMutexUnlock(x, pdbgFuncDB, __LINE__, dbgCALLStaCK_POP_POINT ) #define d_pthread_cond_wait(cond, mut) dbgCondWait(cond, mut, pdbgFuncDB, __LINE__, dbgCALLStaCK_POP_POINT ) diff --git a/runtime/queue.c b/runtime/queue.c index 7e7d4152..c0a37019 100644 --- a/runtime/queue.c +++ b/runtime/queue.c @@ -2171,17 +2171,17 @@ queueEnqObj(queue_t *pThis, flowControl_t flowCtlType, void *pUsr) finalize_it: if(pThis->qType != QUEUETYPE_DIRECT) { - d_pthread_mutex_unlock(pThis->mut); + /* make sure at least one worker is running. */ + if(pThis->qType != QUEUETYPE_DIRECT) { + queueAdviseMaxWorkers(pThis); + } + /* and release the mutex */ i = pthread_cond_signal(&pThis->notEmpty); + d_pthread_mutex_unlock(pThis->mut); dbgoprint((obj_t*) pThis, "EnqueueMsg signaled condition (%d)\n", i); pthread_setcancelstate(iCancelStateSave, NULL); } - /* make sure at least one worker is running. */ - if(pThis->qType != QUEUETYPE_DIRECT) { - queueAdviseMaxWorkers(pThis); - } - RETiRet; } diff --git a/tools/syslogd.c b/tools/syslogd.c index b6e1d826..3a637dd8 100644 --- a/tools/syslogd.c +++ b/tools/syslogd.c @@ -2832,9 +2832,8 @@ static rsRetVal mainThread() CHKiRet(init()); - if(Debug) { + if(Debug && debugging_on) { dbgprintf("Debugging enabled, SIGUSR1 to turn off debugging.\n"); - debugging_on = 1; } /* Send a signal to the parent so it can terminate. */ @@ -3082,9 +3081,9 @@ doGlblProcessInit(void) fputs(" Already running.\n", stderr); exit(1); /* "good" exit, done if syslogd is already running */ } - } - else + } else { debugging_on = 1; + } /* tuck my process id away */ dbgprintf("Writing pidfile %s.\n", PidFile); -- cgit v1.2.3 From bc70a730194759e85f9c3641573c46b4a8476198 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 19 Sep 2008 17:41:11 +0200 Subject: bugfix: proper synchronization on message destruction The code was potentially race, at least on systems where a memory barrier was needed. Fix not fully tested yet. --- runtime/atomic.h | 1 + runtime/msg.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/runtime/atomic.h b/runtime/atomic.h index d15f78ee..2dbe7f52 100644 --- a/runtime/atomic.h +++ b/runtime/atomic.h @@ -48,6 +48,7 @@ #else # warning "atomic builtins not available, using nul operations" # define ATOMIC_INC(data) (++(data)) +# define ATOMIC_DEC_AND_FETCH(data) (--(data)) # define ATOMIC_FETCH_32BIT(data) (data) # define ATOMIC_STORE_1_TO_32BIT(data) (data) = 1 #endif diff --git a/runtime/msg.c b/runtime/msg.c index f4eb9414..346bbc5f 100644 --- a/runtime/msg.c +++ b/runtime/msg.c @@ -276,8 +276,10 @@ CODESTARTobjDestruct(msg) # ifdef DO_HAVE_ATOMICS currRefCount = ATOMIC_DEC_AND_FETCH(pThis->iRefCount); # else + MsgLock(pThis); currRefCount = --pThis->iRefCount; # endif +// we need a mutex, because we may be suspended after getting the refcount but before if(currRefCount == 0) { /* DEV Debugging Only! dbgprintf("msgDestruct\t0x%lx, RefCount now 0, doing DESTROY\n", (unsigned long)pThis); */ @@ -337,9 +339,11 @@ CODESTARTobjDestruct(msg) rsCStrDestruct(&pThis->pCSPROCID); if(pThis->pCSMSGID != NULL) rsCStrDestruct(&pThis->pCSMSGID); + MsgUnlock(pThis); funcDeleteMutex(pThis); } else { pThis = NULL; /* tell framework not to destructing the object! */ + MsgUnlock(pThis); } ENDobjDestruct(msg) -- cgit v1.2.3 From 47a2593ae4e2d4077b4a9e8c77a2b686abfa8d2c Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 23 Sep 2008 14:41:02 +0200 Subject: atomic memory access calls re-enabled in msg.c destructor not yet permitted because verification is missing that a atomic opration is sufficient for the job required --- runtime/msg.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/runtime/msg.c b/runtime/msg.c index 346bbc5f..d5e1fde9 100644 --- a/runtime/msg.c +++ b/runtime/msg.c @@ -273,12 +273,12 @@ BEGINobjDestruct(msg) /* be sure to specify the object type also in END and CODE int currRefCount; CODESTARTobjDestruct(msg) /* DEV Debugging only ! dbgprintf("msgDestruct\t0x%lx, Ref now: %d\n", (unsigned long)pM, pM->iRefCount - 1); */ -# ifdef DO_HAVE_ATOMICS - currRefCount = ATOMIC_DEC_AND_FETCH(pThis->iRefCount); -# else +//# ifdef DO_HAVE_ATOMICS +// currRefCount = ATOMIC_DEC_AND_FETCH(pThis->iRefCount); +//# else MsgLock(pThis); currRefCount = --pThis->iRefCount; -# endif +//# endif // we need a mutex, because we may be suspended after getting the refcount but before if(currRefCount == 0) { @@ -487,7 +487,7 @@ finalize_it: msg_t *MsgAddRef(msg_t *pM) { assert(pM != NULL); -# ifdef DO_HAVE_ATOMICS +# ifdef HAVE_ATOMIC_BUILTINS ATOMIC_INC(pM->iRefCount); # else MsgLock(pM); -- cgit v1.2.3 From dc44f5a475101aa18568a0eaceacc2a4eb718391 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 26 Sep 2008 13:40:41 +0200 Subject: cleaned up internal debug system code and made it behave better in regard to multi-threading --- ChangeLog | 2 + runtime/debug.c | 208 +++++++++++++++++++++----------------------------------- tools/syslogd.c | 10 ++- 3 files changed, 89 insertions(+), 131 deletions(-) diff --git a/ChangeLog b/ChangeLog index e2c7d5d4..7b35d660 100644 --- a/ChangeLog +++ b/ChangeLog @@ -18,6 +18,8 @@ Version 3.21.5 [DEVEL] (rgerhards), 2008-09-?? believe that really caused problems, but in theory it could and with threading we often see that theory becomes practice if something is only used long enough on a fast enough machine with enough CPUs ;) +- cleaned up internal debug system code and made it behave better + in regard to multi-threading --------------------------------------------------------------------------- Version 3.21.4 [DEVEL] (rgerhards), 2008-09-04 - removed compile time fixed message size limit (was 2K), limit can now diff --git a/runtime/debug.c b/runtime/debug.c index 7ed1442b..15e9201b 100644 --- a/runtime/debug.c +++ b/runtime/debug.c @@ -43,6 +43,9 @@ #include #include #include +#include +#include +#include #include "rsyslog.h" #include "debug.h" @@ -62,8 +65,8 @@ static int bPrintTime = 1; /* print a timestamp together with debug message */ static int bPrintAllDebugOnExit = 0; static int bAbortTrace = 1; /* print a trace after SIGABRT or SIGSEGV */ static char *pszAltDbgFileName = NULL; /* if set, debug output is *also* sent to here */ -static FILE *altdbg = NULL; /* and the handle for alternate debug output */ -static FILE *stddbg; +static int altdbg = -1; /* and the handle for alternate debug output */ +static int stddbg; /* list of files/objects that should be printed */ typedef struct dbgPrintName_s { @@ -113,8 +116,7 @@ static dbgThrdInfo_t *dbgCallStackListRoot = NULL; static dbgThrdInfo_t *dbgCallStackListLast = NULL; static pthread_mutex_t mutCallStack; -static pthread_mutex_t mutdbgprintf; -static pthread_mutex_t mutdbgoprint; +static pthread_mutex_t mutdbgprint; static pthread_key_t keyCallStack; @@ -748,8 +750,6 @@ sigsegvHdlr(int signum) } dbgprintf("\n\nTo submit bug reports, visit http://www.rsyslog.com/bugs\n\n"); - if(stddbg != NULL) fflush(stddbg); - if(altdbg != NULL) fflush(altdbg); /* and finally abort... */ /* TODO: think about restarting rsyslog in this case: may be a good idea, @@ -758,56 +758,36 @@ sigsegvHdlr(int signum) abort(); } - +#if 1 #pragma GCC diagnostic ignored "-Wempty-body" -/* print some debug output when an object is given - * This is mostly a copy of dbgprintf, but I do not know how to combine it - * into a single function as we have variable arguments and I don't know how to call - * from one vararg function into another. I don't dig in this, it is OK for the - * time being. -- rgerhards, 2008-01-29 +/* write the debug message. This is a helper to dbgprintf and dbgoprint which + * contains common code. added 2008-09-26 rgerhards */ -void -dbgoprint(obj_t *pObj, char *fmt, ...) +static void +dbgprint(obj_t *pObj, char *pszMsg, size_t lenMsg) { static pthread_t ptLastThrdID = 0; static int bWasNL = 0; - va_list ap; - static char pszThrdName[64]; /* 64 is to be on the safe side, anything over 20 is bad... */ - static char pszWriteBuf[1024]; + char pszThrdName[64]; /* 64 is to be on the safe side, anything over 20 is bad... */ + char pszWriteBuf[1024]; size_t lenWriteBuf; struct timespec t; - if(!(Debug && debugging_on)) - return; - - /* a quick and very dirty hack to enable us to display just from those objects - * that we are interested in. So far, this must be changed at compile time (and - * chances are great it is commented out while you read it. Later, this shall - * be selectable via the environment. -- rgerhards, 2008-02-20 - */ -#if 0 - if(objGetObjID(pObj) != OBJexpr) - return; -#endif - - - pthread_mutex_lock(&mutdbgoprint); - pthread_cleanup_push(dbgMutexCancelCleanupHdlr, &mutdbgoprint); + pthread_mutex_lock(&mutdbgprint); + pthread_cleanup_push(dbgMutexCancelCleanupHdlr, &mutdbgprint); /* The bWasNL handler does not really work. It works if no thread * switching occurs during non-NL messages. Else, things are messed * up. Anyhow, it works well enough to provide useful help during * getting this up and running. It is questionable if the extra effort - * is worth fixing it, giving the limited appliability. - * rgerhards, 2005-10-25 + * is worth fixing it, giving the limited appliability. -- rgerhards, 2005-10-25 * I have decided that it is not worth fixing it - especially as it works - * pretty well. - * rgerhards, 2007-06-15 + * pretty well. -- rgerhards, 2007-06-15 */ if(ptLastThrdID != pthread_self()) { if(!bWasNL) { - if(stddbg != NULL) fprintf(stddbg, "\n"); - if(altdbg != NULL) fprintf(altdbg, "\n"); + if(stddbg != -1) write(stddbg, "\n", 1); + if(altdbg != -1) write(altdbg, "\n", 1); bWasNL = 1; } ptLastThrdID = pthread_self(); @@ -821,113 +801,82 @@ dbgoprint(obj_t *pObj, char *fmt, ...) if(bWasNL) { if(bPrintTime) { clock_gettime(CLOCK_REALTIME, &t); - if(stddbg != NULL) fprintf(stddbg, "%4.4ld.%9.9ld:", (long) (t.tv_sec % 10000), t.tv_nsec); - if(altdbg != NULL) fprintf(altdbg, "%4.4ld.%9.9ld:", (long) (t.tv_sec % 10000), t.tv_nsec); + lenWriteBuf = snprintf(pszWriteBuf, sizeof(pszWriteBuf), + "%4.4ld.%9.9ld:", (long) (t.tv_sec % 10000), t.tv_nsec); + if(stddbg != -1) write(stddbg, pszWriteBuf, lenWriteBuf); + if(altdbg != -1) write(altdbg, pszWriteBuf, lenWriteBuf); } - if(stddbg != NULL) fprintf(stddbg, "%s: ", pszThrdName); - if(altdbg != NULL) fprintf(altdbg, "%s: ", pszThrdName); + lenWriteBuf = snprintf(pszWriteBuf, sizeof(pszWriteBuf), "%s: ", pszThrdName); + if(stddbg != -1) write(stddbg, pszWriteBuf, lenWriteBuf); + if(altdbg != -1) write(altdbg, pszWriteBuf, lenWriteBuf); /* print object name header if we have an object */ if(pObj != NULL) { - if(stddbg != NULL) fprintf(stddbg, "%s: ", obj.GetName(pObj)); - if(altdbg != NULL) fprintf(altdbg, "%s: ", obj.GetName(pObj)); + lenWriteBuf = snprintf(pszWriteBuf, sizeof(pszWriteBuf), "%s: ", obj.GetName(pObj)); + if(stddbg != -1) write(stddbg, pszWriteBuf, lenWriteBuf); + if(altdbg != -1) write(altdbg, pszWriteBuf, lenWriteBuf); } } - bWasNL = (*(fmt + strlen(fmt) - 1) == '\n') ? 1 : 0; + if(stddbg != -1) write(stddbg, pszMsg, lenMsg); + if(altdbg != -1) write(altdbg, pszMsg, lenMsg); + + bWasNL = (pszMsg[lenMsg - 1] == '\n') ? 1 : 0; + + pthread_cleanup_pop(1); +} +#pragma GCC diagnostic warning "-Wempty-body" +#endif + +/* print some debug output when an object is given + * This is mostly a copy of dbgprintf, but I do not know how to combine it + * into a single function as we have variable arguments and I don't know how to call + * from one vararg function into another. I don't dig in this, it is OK for the + * time being. -- rgerhards, 2008-01-29 + */ +void +dbgoprint(obj_t *pObj, char *fmt, ...) +{ + va_list ap; + char pszWriteBuf[1024]; + size_t lenWriteBuf; + + if(!(Debug && debugging_on)) + return; + + /* a quick and very dirty hack to enable us to display just from those objects + * that we are interested in. So far, this must be changed at compile time (and + * chances are great it is commented out while you read it. Later, this shall + * be selectable via the environment. -- rgerhards, 2008-02-20 + */ +#if 0 + if(objGetObjID(pObj) != OBJexpr) + return; +#endif + va_start(ap, fmt); lenWriteBuf = vsnprintf(pszWriteBuf, sizeof(pszWriteBuf), fmt, ap); - if(lenWriteBuf >= sizeof(pszWriteBuf)) { - /* if our buffer was too small, we simply truncate. TODO: maybe something better? */ - lenWriteBuf = sizeof(pszWriteBuf) - 1; - } va_end(ap); - /* - if(stddbg != NULL) fprintf(stddbg, "%s", pszWriteBuf); - if(altdbg != NULL) fprintf(altdbg, "%s", pszWriteBuf); - */ - if(stddbg != NULL) fwrite(pszWriteBuf, lenWriteBuf, 1, stddbg); - if(altdbg != NULL) fwrite(pszWriteBuf, lenWriteBuf, 1, altdbg); - - if(stddbg != NULL) fflush(stddbg); - if(altdbg != NULL) fflush(altdbg); - pthread_cleanup_pop(1); + dbgprint(pObj, pszWriteBuf, lenWriteBuf); } -#pragma GCC diagnostic warning "-Wempty-body" -#pragma GCC diagnostic ignored "-Wempty-body" /* print some debug output when no object is given * WARNING: duplicate code, see dbgoprin above! */ void dbgprintf(char *fmt, ...) { - static pthread_t ptLastThrdID = 0; - static int bWasNL = 0; va_list ap; - static char pszThrdName[64]; /* 64 is to be on the safe side, anything over 20 is bad... */ - static char pszWriteBuf[1024]; + char pszWriteBuf[1024]; size_t lenWriteBuf; - struct timespec t; if(!(Debug && debugging_on)) return; - pthread_mutex_lock(&mutdbgprintf); - pthread_cleanup_push(dbgMutexCancelCleanupHdlr, &mutdbgprintf); - - /* The bWasNL handler does not really work. It works if no thread - * switching occurs during non-NL messages. Else, things are messed - * up. Anyhow, it works well enough to provide useful help during - * getting this up and running. It is questionable if the extra effort - * is worth fixing it, giving the limited appliability. - * rgerhards, 2005-10-25 - * I have decided that it is not worth fixing it - especially as it works - * pretty well. - * rgerhards, 2007-06-15 - */ - if(ptLastThrdID != pthread_self()) { - if(!bWasNL) { - if(stddbg != NULL) fprintf(stddbg, "\n"); - if(altdbg != NULL) fprintf(altdbg, "\n"); - bWasNL = 1; - } - ptLastThrdID = pthread_self(); - } - - /* do not cache the thread name, as the caller might have changed it - * TODO: optimized, invalidate cache when new name is set - */ - dbgGetThrdName(pszThrdName, sizeof(pszThrdName), ptLastThrdID, 0); - - if(bWasNL) { - if(bPrintTime) { - clock_gettime(CLOCK_REALTIME, &t); - if(stddbg != NULL) fprintf(stddbg, "%4.4ld.%9.9ld:", (long) (t.tv_sec % 10000), t.tv_nsec); - if(altdbg != NULL) fprintf(altdbg, "%4.4ld.%9.9ld:", (long) (t.tv_sec % 10000), t.tv_nsec); - } - if(stddbg != NULL) fprintf(stddbg, "%s: ", pszThrdName); - if(altdbg != NULL) fprintf(altdbg, "%s: ", pszThrdName); - } - bWasNL = (*(fmt + strlen(fmt) - 1) == '\n') ? 1 : 0; va_start(ap, fmt); lenWriteBuf = vsnprintf(pszWriteBuf, sizeof(pszWriteBuf), fmt, ap); - if(lenWriteBuf >= sizeof(pszWriteBuf)) { - /* if our buffer was too small, we simply truncate. TODO: maybe something better? */ - lenWriteBuf = sizeof(pszWriteBuf) - 1; - } va_end(ap); - /* - if(stddbg != NULL) fprintf(stddbg, "%s", pszWriteBuf); - if(altdbg != NULL) fprintf(altdbg, "%s", pszWriteBuf); - */ - if(stddbg != NULL) fwrite(pszWriteBuf, lenWriteBuf, 1, stddbg); - if(altdbg != NULL) fwrite(pszWriteBuf, lenWriteBuf, 1, altdbg); - - if(stddbg != NULL) fflush(stddbg); - if(altdbg != NULL) fflush(altdbg); - pthread_cleanup_pop(1); + dbgprint(NULL, pszWriteBuf, lenWriteBuf); } -#pragma GCC diagnostic warning "-Wempty-body" void tester(void) { @@ -941,7 +890,7 @@ ENDfunc int dbgEntrFunc(dbgFuncDB_t **ppFuncDB, const char *file, const char *func, int line) { int iStackPtr = 0; /* TODO: find some better default, this one hurts the least, but it is not clean */ - dbgThrdInfo_t *pThrd = dbgGetThrdInfo(); + dbgThrdInfo_t *pThrd; dbgFuncDBListEntry_t *pFuncDBListEntry; unsigned int i; dbgFuncDB_t *pFuncDB; @@ -952,6 +901,8 @@ int dbgEntrFunc(dbgFuncDB_t **ppFuncDB, const char *file, const char *func, int pFuncDB = *ppFuncDB; assert((pFuncDB == NULL) || (pFuncDB->magic == dbgFUNCDB_MAGIC)); + pThrd = dbgGetThrdInfo(); /* we must do this AFTER the mutexes are initialized! */ + if(pFuncDB == NULL) { /* we do not yet have a funcDB and need to create a new one. We also add it * to the linked list of funcDBs. Please note that when a module is unloaded and @@ -1206,7 +1157,7 @@ dbgGetRuntimeOptions(void) uchar *optname; /* set some defaults */ - stddbg = stdout; + stddbg = 1; if((pszOpts = (uchar*) getenv("RSYSLOG_DEBUG")) != NULL) { /* we have options set, so let's process them */ @@ -1248,7 +1199,7 @@ dbgGetRuntimeOptions(void) } else if(!strcasecmp((char*)optname, "nologtimestamp")) { bPrintTime = 0; } else if(!strcasecmp((char*)optname, "nostdout")) { - stddbg = NULL; + stddbg = -1; } else if(!strcasecmp((char*)optname, "noaborttrace")) { bAbortTrace = 0; } else if(!strcasecmp((char*)optname, "filetrace")) { @@ -1273,7 +1224,7 @@ dbgGetRuntimeOptions(void) rsRetVal dbgClassInit(void) { - DEFiRet; + rsRetVal iRet; /* do not use DEFiRet, as this makes calls into the debug system! */ struct sigaction sigAct; sigset_t sigSet; @@ -1287,8 +1238,7 @@ rsRetVal dbgClassInit(void) pthread_mutex_init(&mutFuncDBList, NULL); pthread_mutex_init(&mutMutLog, NULL); pthread_mutex_init(&mutCallStack, NULL); - pthread_mutex_init(&mutdbgprintf, NULL); - pthread_mutex_init(&mutdbgoprint, NULL); + pthread_mutex_init(&mutdbgprint, NULL); /* while we try not to use any of the real rsyslog code (to avoid infinite loops), we * need to have the ability to query object names. Thus, we need to obtain a pointer to @@ -1310,7 +1260,7 @@ rsRetVal dbgClassInit(void) if(pszAltDbgFileName != NULL) { /* we have a secondary file, so let's open it) */ - if((altdbg = fopen(pszAltDbgFileName, "w")) == NULL) { + if((altdbg = open(pszAltDbgFileName, O_WRONLY|O_CREAT|O_TRUNC|O_NOCTTY, S_IRUSR|S_IWUSR)) == -1) { fprintf(stderr, "alternate debug file could not be opened, ignoring. Error: %s\n", strerror(errno)); } } @@ -1318,7 +1268,7 @@ rsRetVal dbgClassInit(void) dbgSetThrdName((uchar*)"main thread"); finalize_it: - RETiRet; + return(iRet); } @@ -1330,8 +1280,8 @@ rsRetVal dbgClassExit(void) if(bPrintAllDebugOnExit) dbgPrintAllDebugInfo(); - if(altdbg != NULL) - fclose(altdbg); + if(altdbg != -1) + close(altdbg); /* now free all of our memory to make the memory debugger happy... */ pFuncDBListEtry = pFuncDBListRoot; diff --git a/tools/syslogd.c b/tools/syslogd.c index 3a637dd8..a39f5a5c 100644 --- a/tools/syslogd.c +++ b/tools/syslogd.c @@ -1871,16 +1871,22 @@ void legacyOptsParseTCP(char ch, char *arg) * a minimal delay, but it is much cleaner than the approach of doing everything * inside the signal handler. * rgerhards, 2005-10-26 + * Note: we do not call dbgprintf() as this may cause us to block in case something + * with the threading is wrong. */ static void doDie(int sig) { +# define MSG1 "DoDie called.\n" +# define MSG2 "DoDie called 5 times - unconditional exit\n" static int iRetries = 0; /* debug aid */ - printf("DoDie called.\n"); + write(1, MSG1, sizeof(MSG1)); if(iRetries++ == 4) { - printf("DoDie called 5 times - unconditional exit\n"); + write(1, MSG2, sizeof(MSG2)); abort(); } bFinished = sig; +# undef MSG1 +# undef MSG2 } -- cgit v1.2.3 From b8e82d59b7aba7da65e4244a29b92274aa50d08d Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 26 Sep 2008 16:10:05 +0200 Subject: fixed potential race condition on HUP and termination --- runtime/wtp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/runtime/wtp.c b/runtime/wtp.c index 8b041ea2..54f12b2b 100644 --- a/runtime/wtp.c +++ b/runtime/wtp.c @@ -169,7 +169,9 @@ wtpWakeupAllWrkr(wtp_t *pThis) DEFiRet; ISOBJ_TYPE_assert(pThis, wtp); + d_pthread_mutex_lock(pThis->pmutUsr); pthread_cond_broadcast(pThis->pcondBusy); + d_pthread_mutex_unlock(pThis->pmutUsr); RETiRet; } -- cgit v1.2.3 From 92c2e09d19bef9dd10d2e85a663925124d6e00e4 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 26 Sep 2008 18:40:28 +0200 Subject: some more threading cleanup - removed newly-introduced potential deadlock in debug system - removed unnecessary pthread_cond_signal - a bit general cleanup --- runtime/debug.c | 15 +++++++++++++-- runtime/queue.c | 8 ++------ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/runtime/debug.c b/runtime/debug.c index 15e9201b..b0bf76ea 100644 --- a/runtime/debug.c +++ b/runtime/debug.c @@ -772,6 +772,17 @@ dbgprint(obj_t *pObj, char *pszMsg, size_t lenMsg) char pszWriteBuf[1024]; size_t lenWriteBuf; struct timespec t; + uchar *pszObjName = NULL; + + /* we must get the object name before we lock the mutex, because the object + * potentially calls back into us. If we locked the mutex, we would deadlock + * ourselfs. On the other hand, the GetName call needs not to be protected, as + * this thread has a valid reference. If such an object is deleted by another + * thread, we are in much more trouble than just for dbgprint(). -- rgerhards, 2008-09-26 + */ + if(pObj != NULL) { + pszObjName = obj.GetName(pObj); + } pthread_mutex_lock(&mutdbgprint); pthread_cleanup_push(dbgMutexCancelCleanupHdlr, &mutdbgprint); @@ -810,8 +821,8 @@ dbgprint(obj_t *pObj, char *pszMsg, size_t lenMsg) if(stddbg != -1) write(stddbg, pszWriteBuf, lenWriteBuf); if(altdbg != -1) write(altdbg, pszWriteBuf, lenWriteBuf); /* print object name header if we have an object */ - if(pObj != NULL) { - lenWriteBuf = snprintf(pszWriteBuf, sizeof(pszWriteBuf), "%s: ", obj.GetName(pObj)); + if(pszObjName != NULL) { + lenWriteBuf = snprintf(pszWriteBuf, sizeof(pszWriteBuf), "%s: ", pszObjName); if(stddbg != -1) write(stddbg, pszWriteBuf, lenWriteBuf); if(altdbg != -1) write(altdbg, pszWriteBuf, lenWriteBuf); } diff --git a/runtime/queue.c b/runtime/queue.c index c0a37019..3fae4aa7 100644 --- a/runtime/queue.c +++ b/runtime/queue.c @@ -2092,7 +2092,6 @@ queueEnqObj(queue_t *pThis, flowControl_t flowCtlType, void *pUsr) { DEFiRet; int iCancelStateSave; - int i; struct timespec t; ISOBJ_TYPE_assert(pThis, queue); @@ -2172,13 +2171,10 @@ queueEnqObj(queue_t *pThis, flowControl_t flowCtlType, void *pUsr) finalize_it: if(pThis->qType != QUEUETYPE_DIRECT) { /* make sure at least one worker is running. */ - if(pThis->qType != QUEUETYPE_DIRECT) { - queueAdviseMaxWorkers(pThis); - } + queueAdviseMaxWorkers(pThis); + dbgoprint((obj_t*) pThis, "EnqueueMsg advised worker start\n"); /* and release the mutex */ - i = pthread_cond_signal(&pThis->notEmpty); d_pthread_mutex_unlock(pThis->mut); - dbgoprint((obj_t*) pThis, "EnqueueMsg signaled condition (%d)\n", i); pthread_setcancelstate(iCancelStateSave, NULL); } -- cgit v1.2.3