diff options
Diffstat (limited to 'runtime')
-rw-r--r-- | runtime/datetime.c | 77 | ||||
-rw-r--r-- | runtime/datetime.h | 5 | ||||
-rw-r--r-- | runtime/queue.c | 65 | ||||
-rw-r--r-- | runtime/rsyslog.h | 1 | ||||
-rw-r--r-- | runtime/strmsrv.c | 2 | ||||
-rw-r--r-- | runtime/wti.c | 5 |
6 files changed, 112 insertions, 43 deletions
diff --git a/runtime/datetime.c b/runtime/datetime.c index f81180ea..89452f1c 100644 --- a/runtime/datetime.c +++ b/runtime/datetime.c @@ -55,6 +55,47 @@ static const int tenPowers[6] = { 1, 10, 100, 1000, 10000, 100000 }; /* ------------------------------ methods ------------------------------ */ +/** + * Convert struct timeval to syslog_time + */ +void +timeval2syslogTime(struct timeval *tp, struct syslogTime *t) +{ + struct tm *tm; + struct tm tmBuf; + long lBias; + + tm = localtime_r((time_t*) &(tp->tv_sec), &tmBuf); + + t->year = tm->tm_year + 1900; + t->month = tm->tm_mon + 1; + t->day = tm->tm_mday; + t->hour = tm->tm_hour; + t->minute = tm->tm_min; + t->second = tm->tm_sec; + t->secfrac = tp->tv_usec; + t->secfracPrecision = 6; + +# if __sun + /* Solaris uses a different method of exporting the time zone. + * It is UTC - localtime, which is the opposite sign of mins east of GMT. + */ + lBias = -(daylight ? altzone : timezone); +# elif defined(__hpux) + lBias = tz.tz_dsttime ? - tz.tz_minuteswest : 0; +# else + lBias = tm->tm_gmtoff; +# endif + if(lBias < 0) { + t->OffsetMode = '-'; + lBias *= -1; + } else + t->OffsetMode = '+'; + t->OffsetHour = lBias / 3600; + t->OffsetMinute = (lBias % 3600) / 60; + t->timeType = TIME_TYPE_RFC5424; /* we have a high precision timestamp */ +} + /** * Get the current date/time in the best resolution the operating * system has to offer (well, actually at most down to the milli- @@ -74,9 +115,6 @@ static const int tenPowers[6] = { 1, 10, 100, 1000, 10000, 100000 }; static void getCurrTime(struct syslogTime *t, time_t *ttSeconds) { struct timeval tp; - struct tm *tm; - struct tm tmBuf; - long lBias; # if defined(__hpux) struct timezone tz; # endif @@ -93,37 +131,7 @@ static void getCurrTime(struct syslogTime *t, time_t *ttSeconds) if(ttSeconds != NULL) *ttSeconds = tp.tv_sec; - tm = localtime_r((time_t*) &(tp.tv_sec), &tmBuf); - - t->year = tm->tm_year + 1900; - t->month = tm->tm_mon + 1; - t->day = tm->tm_mday; - t->hour = tm->tm_hour; - t->minute = tm->tm_min; - t->second = tm->tm_sec; - t->secfrac = tp.tv_usec; - t->secfracPrecision = 6; - -# if __sun - /* Solaris uses a different method of exporting the time zone. - * It is UTC - localtime, which is the opposite sign of mins east of GMT. - */ - lBias = -(daylight ? altzone : timezone); -# elif defined(__hpux) - lBias = tz.tz_dsttime ? - tz.tz_minuteswest : 0; -# else - lBias = tm->tm_gmtoff; -# endif - if(lBias < 0) - { - t->OffsetMode = '-'; - lBias *= -1; - } - else - t->OffsetMode = '+'; - t->OffsetHour = lBias / 3600; - t->OffsetMinute = (lBias % 3600) / 60; - t->timeType = TIME_TYPE_RFC5424; /* we have a high precision timestamp */ + timeval2syslogTime(&tp, t); } @@ -861,6 +869,7 @@ CODESTARTobjQueryInterface(datetime) */ pIf->getCurrTime = getCurrTime; pIf->GetTime = getTime; + pIf->timeval2syslogTime = timeval2syslogTime; pIf->ParseTIMESTAMP3339 = ParseTIMESTAMP3339; pIf->ParseTIMESTAMP3164 = ParseTIMESTAMP3164; pIf->formatTimestampToMySQL = formatTimestampToMySQL; diff --git a/runtime/datetime.h b/runtime/datetime.h index 70bbf416..2175d6a1 100644 --- a/runtime/datetime.h +++ b/runtime/datetime.h @@ -44,8 +44,10 @@ BEGINinterface(datetime) /* name must also be changed in ENDinterface macro! */ int (*formatTimestampSecFrac)(struct syslogTime *ts, char* pBuf); /* v3, 2009-11-12 */ time_t (*GetTime)(time_t *ttSeconds); + /* v6, 2011-06-20 */ + void (*timeval2syslogTime)(struct timeval *tp, struct syslogTime *t); ENDinterface(datetime) -#define datetimeCURR_IF_VERSION 5 /* increment whenever you change the interface structure! */ +#define datetimeCURR_IF_VERSION 6 /* increment whenever you change the interface structure! */ /* interface changes: * 1 - initial version * 2 - not compatible to 1 - bugfix required ParseTIMESTAMP3164 to accept char ** as @@ -54,6 +56,7 @@ ENDinterface(datetime) * 3 - taken by v5 branch! * 4 - formatTimestamp3164 takes a third int parameter * 5 - merge of versions 3 + 4 (2010-03-09) + * 6 - see above */ /* prototypes */ diff --git a/runtime/queue.c b/runtime/queue.c index 00eb76c7..c831836d 100644 --- a/runtime/queue.c +++ b/runtime/queue.c @@ -83,6 +83,11 @@ static rsRetVal ConsumerDA(qqueue_t *pThis, wti_t *pWti); static rsRetVal batchProcessed(qqueue_t *pThis, wti_t *pWti); static rsRetVal qqueueMultiEnqObjNonDirect(qqueue_t *pThis, multi_submit_t *pMultiSub); static rsRetVal qqueueMultiEnqObjDirect(qqueue_t *pThis, multi_submit_t *pMultiSub); +static rsRetVal qAddDirect(qqueue_t *pThis, void* pUsr); +static rsRetVal qDestructDirect(qqueue_t __attribute__((unused)) *pThis); +static rsRetVal qConstructDirect(qqueue_t __attribute__((unused)) *pThis); +static rsRetVal qDelDirect(qqueue_t __attribute__((unused)) *pThis); +static rsRetVal qDestructDisk(qqueue_t *pThis); /* some constants for queuePersist () */ #define QUEUE_CHECKPOINT 1 @@ -583,6 +588,47 @@ static rsRetVal qDelLinkedList(qqueue_t *pThis) /* -------------------- disk -------------------- */ +/* The following function is used to "save" ourself from being killed by + * a fatally failed disk queue. A fatal failure is, for example, if no + * data can be read or written. In that case, the disk support is disabled, + * with all on-disk structures kept as-is as much as possible. Instead, the + * queue is switched to direct mode, so that at least + * some processing can happen. Of course, this may still have lots of + * undesired side-effects, but is probably better than aborting the + * syslogd. Note that this function *must* succeed in one way or another, as + * we can not recover from failure here. But it may emit different return + * states, which can trigger different processing in the higher layers. + * rgerhards, 2011-05-03 + */ +static inline rsRetVal +queueSwitchToEmergencyMode(qqueue_t *pThis, rsRetVal initiatingError) +{ + pThis->iQueueSize = 0; + pThis->nLogDeq = 0; + qDestructDisk(pThis); /* free disk structures */ + + pThis->qType = QUEUETYPE_DIRECT; + pThis->qConstruct = qConstructDirect; + pThis->qDestruct = qDestructDirect; + pThis->qAdd = qAddDirect; + pThis->qDel = qDelDirect; + pThis->MultiEnq = qqueueMultiEnqObjDirect; + if(pThis->pqParent != NULL) { + DBGOPRINT((obj_t*) pThis, "DA queue is in emergency mode, disabling DA in parent\n"); + pThis->pqParent->bIsDA = 0; + pThis->pqParent->pqDA = NULL; + /* This may have undesired side effects, not sure if I really evaluated + * all. So you know where to look at if you come to this point during + * troubleshooting ;) -- rgerhards, 2011-05-03 + */ + } + + errmsg.LogError(0, initiatingError, "fatal error on disk queue '%s', emergency switch to direct mode", + obj.GetName((obj_t*) pThis)); + return RS_RET_ERR_QUEUE_EMERGENCY; +} + + static rsRetVal qqueueLoadPersStrmInfoFixup(strm_t *pStrm, qqueue_t __attribute__((unused)) *pThis) { @@ -785,10 +831,7 @@ finalize_it: static rsRetVal qDeqDisk(qqueue_t *pThis, void **ppUsr) { DEFiRet; - - CHKiRet(obj.Deserialize(ppUsr, (uchar*) "msg", pThis->tVars.disk.pReadDeq, NULL, NULL)); - -finalize_it: + iRet = obj.Deserialize(ppUsr, (uchar*) "msg", pThis->tVars.disk.pReadDeq, NULL, NULL); RETiRet; } @@ -1684,7 +1727,18 @@ ConsumerReg(qqueue_t *pThis, wti_t *pWti) ISOBJ_TYPE_assert(pThis, qqueue); ISOBJ_TYPE_assert(pWti, wti); - CHKiRet(DequeueForConsumer(pThis, pWti)); + iRet = DequeueForConsumer(pThis, pWti); + if(iRet == RS_RET_FILE_NOT_FOUND) { + /* This is a fatal condition and means the queue is almost unusable */ + d_pthread_mutex_unlock(pThis->mut); + DBGOPRINT((obj_t*) pThis, "got 'file not found' error %d, queue defunct\n", iRet); + iRet = queueSwitchToEmergencyMode(pThis, iRet); + // TODO: think about what to return as iRet -- keep RS_RET_FILE_NOT_FOUND? + d_pthread_mutex_lock(pThis->mut); + } + if (iRet != RS_RET_OK) { + FINALIZE; + } /* we now have a non-idle batch of work, so we can release the queue mutex and process it */ d_pthread_mutex_unlock(pThis->mut); @@ -1778,7 +1832,6 @@ qqueueChkStopWrkrDA(qqueue_t *pThis) { DEFiRet; -//DBGPRINTF("XXXX: chkStopWrkrDA called, low watermark %d, phys Size %d\n", pThis->iLowWtrMrk, getPhysicalQueueSize(pThis)); if(pThis->bEnqOnly) { iRet = RS_RET_TERMINATE_WHEN_IDLE; } diff --git a/runtime/rsyslog.h b/runtime/rsyslog.h index 52b29ac4..613a3e7a 100644 --- a/runtime/rsyslog.h +++ b/runtime/rsyslog.h @@ -343,6 +343,7 @@ enum rsRetVal_ /** return value. All methods return this if not specified oth RS_RET_FILE_NOT_SPECIFIED = -2180, /**< file name not configured where this was required */ RS_RET_ERR_WRKDIR = -2181, /**< problems with the rsyslog working directory */ RS_RET_WRN_WRKDIR = -2182, /**< correctable problems with the rsyslog working directory */ + RS_RET_ERR_QUEUE_EMERGENCY = -2183, /**< some fatal error caused queue to switch to emergency mode */ /* RainerScript error messages (range 1000.. 1999) */ RS_RET_SYSVAR_NOT_FOUND = 1001, /**< system variable could not be found (maybe misspelled) */ diff --git a/runtime/strmsrv.c b/runtime/strmsrv.c index f448cd4f..03691de6 100644 --- a/runtime/strmsrv.c +++ b/runtime/strmsrv.c @@ -767,7 +767,7 @@ static rsRetVal SetKeepAlive(strmsrv_t *pThis, int iVal) { DEFiRet; - dbgprintf("keep-alive set to %d\n", iVal); + dbgprintf("strmsrv: keep-alive set to %d\n", iVal); pThis->bUseKeepAlive = iVal; RETiRet; } diff --git a/runtime/wti.c b/runtime/wti.c index 9343f5c5..ec56c2d5 100644 --- a/runtime/wti.c +++ b/runtime/wti.c @@ -314,7 +314,10 @@ wtiWorker(wti_t *pThis) */ localRet = pWtp->pfDoWork(pWtp->pUsr, pThis); - if(localRet == RS_RET_IDLE) { + if(localRet == RS_RET_ERR_QUEUE_EMERGENCY) { + d_pthread_mutex_unlock(pWtp->pmutUsr); + break; /* end of loop */ + } else if(localRet == RS_RET_IDLE) { if(terminateRet == RS_RET_TERMINATE_WHEN_IDLE || bInactivityTOOccured) { d_pthread_mutex_unlock(pWtp->pmutUsr); break; /* end of loop */ |