summaryrefslogtreecommitdiffstats
path: root/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'runtime')
-rw-r--r--runtime/datetime.c77
-rw-r--r--runtime/datetime.h5
-rw-r--r--runtime/queue.c65
-rw-r--r--runtime/rsyslog.h1
-rw-r--r--runtime/strmsrv.c2
-rw-r--r--runtime/wti.c5
6 files changed, 112 insertions, 43 deletions
diff --git a/runtime/datetime.c b/runtime/datetime.c
index f81180ea..89452f1c 100644
--- a/runtime/datetime.c
+++ b/runtime/datetime.c
@@ -55,6 +55,47 @@ static const int tenPowers[6] = { 1, 10, 100, 1000, 10000, 100000 };
/* ------------------------------ methods ------------------------------ */
+/**
+ * Convert struct timeval to syslog_time
+ */
+void
+timeval2syslogTime(struct timeval *tp, struct syslogTime *t)
+{
+ struct tm *tm;
+ struct tm tmBuf;
+ long lBias;
+
+ tm = localtime_r((time_t*) &(tp->tv_sec), &tmBuf);
+
+ t->year = tm->tm_year + 1900;
+ t->month = tm->tm_mon + 1;
+ t->day = tm->tm_mday;
+ t->hour = tm->tm_hour;
+ t->minute = tm->tm_min;
+ t->second = tm->tm_sec;
+ t->secfrac = tp->tv_usec;
+ t->secfracPrecision = 6;
+
+# if __sun
+ /* Solaris uses a different method of exporting the time zone.
+ * It is UTC - localtime, which is the opposite sign of mins east of GMT.
+ */
+ lBias = -(daylight ? altzone : timezone);
+# elif defined(__hpux)
+ lBias = tz.tz_dsttime ? - tz.tz_minuteswest : 0;
+# else
+ lBias = tm->tm_gmtoff;
+# endif
+ if(lBias < 0) {
+ t->OffsetMode = '-';
+ lBias *= -1;
+ } else
+ t->OffsetMode = '+';
+ t->OffsetHour = lBias / 3600;
+ t->OffsetMinute = (lBias % 3600) / 60;
+ t->timeType = TIME_TYPE_RFC5424; /* we have a high precision timestamp */
+}
+
/**
* Get the current date/time in the best resolution the operating
* system has to offer (well, actually at most down to the milli-
@@ -74,9 +115,6 @@ static const int tenPowers[6] = { 1, 10, 100, 1000, 10000, 100000 };
static void getCurrTime(struct syslogTime *t, time_t *ttSeconds)
{
struct timeval tp;
- struct tm *tm;
- struct tm tmBuf;
- long lBias;
# if defined(__hpux)
struct timezone tz;
# endif
@@ -93,37 +131,7 @@ static void getCurrTime(struct syslogTime *t, time_t *ttSeconds)
if(ttSeconds != NULL)
*ttSeconds = tp.tv_sec;
- tm = localtime_r((time_t*) &(tp.tv_sec), &tmBuf);
-
- t->year = tm->tm_year + 1900;
- t->month = tm->tm_mon + 1;
- t->day = tm->tm_mday;
- t->hour = tm->tm_hour;
- t->minute = tm->tm_min;
- t->second = tm->tm_sec;
- t->secfrac = tp.tv_usec;
- t->secfracPrecision = 6;
-
-# if __sun
- /* Solaris uses a different method of exporting the time zone.
- * It is UTC - localtime, which is the opposite sign of mins east of GMT.
- */
- lBias = -(daylight ? altzone : timezone);
-# elif defined(__hpux)
- lBias = tz.tz_dsttime ? - tz.tz_minuteswest : 0;
-# else
- lBias = tm->tm_gmtoff;
-# endif
- if(lBias < 0)
- {
- t->OffsetMode = '-';
- lBias *= -1;
- }
- else
- t->OffsetMode = '+';
- t->OffsetHour = lBias / 3600;
- t->OffsetMinute = (lBias % 3600) / 60;
- t->timeType = TIME_TYPE_RFC5424; /* we have a high precision timestamp */
+ timeval2syslogTime(&tp, t);
}
@@ -861,6 +869,7 @@ CODESTARTobjQueryInterface(datetime)
*/
pIf->getCurrTime = getCurrTime;
pIf->GetTime = getTime;
+ pIf->timeval2syslogTime = timeval2syslogTime;
pIf->ParseTIMESTAMP3339 = ParseTIMESTAMP3339;
pIf->ParseTIMESTAMP3164 = ParseTIMESTAMP3164;
pIf->formatTimestampToMySQL = formatTimestampToMySQL;
diff --git a/runtime/datetime.h b/runtime/datetime.h
index 70bbf416..2175d6a1 100644
--- a/runtime/datetime.h
+++ b/runtime/datetime.h
@@ -44,8 +44,10 @@ BEGINinterface(datetime) /* name must also be changed in ENDinterface macro! */
int (*formatTimestampSecFrac)(struct syslogTime *ts, char* pBuf);
/* v3, 2009-11-12 */
time_t (*GetTime)(time_t *ttSeconds);
+ /* v6, 2011-06-20 */
+ void (*timeval2syslogTime)(struct timeval *tp, struct syslogTime *t);
ENDinterface(datetime)
-#define datetimeCURR_IF_VERSION 5 /* increment whenever you change the interface structure! */
+#define datetimeCURR_IF_VERSION 6 /* increment whenever you change the interface structure! */
/* interface changes:
* 1 - initial version
* 2 - not compatible to 1 - bugfix required ParseTIMESTAMP3164 to accept char ** as
@@ -54,6 +56,7 @@ ENDinterface(datetime)
* 3 - taken by v5 branch!
* 4 - formatTimestamp3164 takes a third int parameter
* 5 - merge of versions 3 + 4 (2010-03-09)
+ * 6 - see above
*/
/* prototypes */
diff --git a/runtime/queue.c b/runtime/queue.c
index 00eb76c7..c831836d 100644
--- a/runtime/queue.c
+++ b/runtime/queue.c
@@ -83,6 +83,11 @@ static rsRetVal ConsumerDA(qqueue_t *pThis, wti_t *pWti);
static rsRetVal batchProcessed(qqueue_t *pThis, wti_t *pWti);
static rsRetVal qqueueMultiEnqObjNonDirect(qqueue_t *pThis, multi_submit_t *pMultiSub);
static rsRetVal qqueueMultiEnqObjDirect(qqueue_t *pThis, multi_submit_t *pMultiSub);
+static rsRetVal qAddDirect(qqueue_t *pThis, void* pUsr);
+static rsRetVal qDestructDirect(qqueue_t __attribute__((unused)) *pThis);
+static rsRetVal qConstructDirect(qqueue_t __attribute__((unused)) *pThis);
+static rsRetVal qDelDirect(qqueue_t __attribute__((unused)) *pThis);
+static rsRetVal qDestructDisk(qqueue_t *pThis);
/* some constants for queuePersist () */
#define QUEUE_CHECKPOINT 1
@@ -583,6 +588,47 @@ static rsRetVal qDelLinkedList(qqueue_t *pThis)
/* -------------------- disk -------------------- */
+/* The following function is used to "save" ourself from being killed by
+ * a fatally failed disk queue. A fatal failure is, for example, if no
+ * data can be read or written. In that case, the disk support is disabled,
+ * with all on-disk structures kept as-is as much as possible. Instead, the
+ * queue is switched to direct mode, so that at least
+ * some processing can happen. Of course, this may still have lots of
+ * undesired side-effects, but is probably better than aborting the
+ * syslogd. Note that this function *must* succeed in one way or another, as
+ * we can not recover from failure here. But it may emit different return
+ * states, which can trigger different processing in the higher layers.
+ * rgerhards, 2011-05-03
+ */
+static inline rsRetVal
+queueSwitchToEmergencyMode(qqueue_t *pThis, rsRetVal initiatingError)
+{
+ pThis->iQueueSize = 0;
+ pThis->nLogDeq = 0;
+ qDestructDisk(pThis); /* free disk structures */
+
+ pThis->qType = QUEUETYPE_DIRECT;
+ pThis->qConstruct = qConstructDirect;
+ pThis->qDestruct = qDestructDirect;
+ pThis->qAdd = qAddDirect;
+ pThis->qDel = qDelDirect;
+ pThis->MultiEnq = qqueueMultiEnqObjDirect;
+ if(pThis->pqParent != NULL) {
+ DBGOPRINT((obj_t*) pThis, "DA queue is in emergency mode, disabling DA in parent\n");
+ pThis->pqParent->bIsDA = 0;
+ pThis->pqParent->pqDA = NULL;
+ /* This may have undesired side effects, not sure if I really evaluated
+ * all. So you know where to look at if you come to this point during
+ * troubleshooting ;) -- rgerhards, 2011-05-03
+ */
+ }
+
+ errmsg.LogError(0, initiatingError, "fatal error on disk queue '%s', emergency switch to direct mode",
+ obj.GetName((obj_t*) pThis));
+ return RS_RET_ERR_QUEUE_EMERGENCY;
+}
+
+
static rsRetVal
qqueueLoadPersStrmInfoFixup(strm_t *pStrm, qqueue_t __attribute__((unused)) *pThis)
{
@@ -785,10 +831,7 @@ finalize_it:
static rsRetVal qDeqDisk(qqueue_t *pThis, void **ppUsr)
{
DEFiRet;
-
- CHKiRet(obj.Deserialize(ppUsr, (uchar*) "msg", pThis->tVars.disk.pReadDeq, NULL, NULL));
-
-finalize_it:
+ iRet = obj.Deserialize(ppUsr, (uchar*) "msg", pThis->tVars.disk.pReadDeq, NULL, NULL);
RETiRet;
}
@@ -1684,7 +1727,18 @@ ConsumerReg(qqueue_t *pThis, wti_t *pWti)
ISOBJ_TYPE_assert(pThis, qqueue);
ISOBJ_TYPE_assert(pWti, wti);
- CHKiRet(DequeueForConsumer(pThis, pWti));
+ iRet = DequeueForConsumer(pThis, pWti);
+ if(iRet == RS_RET_FILE_NOT_FOUND) {
+ /* This is a fatal condition and means the queue is almost unusable */
+ d_pthread_mutex_unlock(pThis->mut);
+ DBGOPRINT((obj_t*) pThis, "got 'file not found' error %d, queue defunct\n", iRet);
+ iRet = queueSwitchToEmergencyMode(pThis, iRet);
+ // TODO: think about what to return as iRet -- keep RS_RET_FILE_NOT_FOUND?
+ d_pthread_mutex_lock(pThis->mut);
+ }
+ if (iRet != RS_RET_OK) {
+ FINALIZE;
+ }
/* we now have a non-idle batch of work, so we can release the queue mutex and process it */
d_pthread_mutex_unlock(pThis->mut);
@@ -1778,7 +1832,6 @@ qqueueChkStopWrkrDA(qqueue_t *pThis)
{
DEFiRet;
-//DBGPRINTF("XXXX: chkStopWrkrDA called, low watermark %d, phys Size %d\n", pThis->iLowWtrMrk, getPhysicalQueueSize(pThis));
if(pThis->bEnqOnly) {
iRet = RS_RET_TERMINATE_WHEN_IDLE;
}
diff --git a/runtime/rsyslog.h b/runtime/rsyslog.h
index 52b29ac4..613a3e7a 100644
--- a/runtime/rsyslog.h
+++ b/runtime/rsyslog.h
@@ -343,6 +343,7 @@ enum rsRetVal_ /** return value. All methods return this if not specified oth
RS_RET_FILE_NOT_SPECIFIED = -2180, /**< file name not configured where this was required */
RS_RET_ERR_WRKDIR = -2181, /**< problems with the rsyslog working directory */
RS_RET_WRN_WRKDIR = -2182, /**< correctable problems with the rsyslog working directory */
+ RS_RET_ERR_QUEUE_EMERGENCY = -2183, /**< some fatal error caused queue to switch to emergency mode */
/* RainerScript error messages (range 1000.. 1999) */
RS_RET_SYSVAR_NOT_FOUND = 1001, /**< system variable could not be found (maybe misspelled) */
diff --git a/runtime/strmsrv.c b/runtime/strmsrv.c
index f448cd4f..03691de6 100644
--- a/runtime/strmsrv.c
+++ b/runtime/strmsrv.c
@@ -767,7 +767,7 @@ static rsRetVal
SetKeepAlive(strmsrv_t *pThis, int iVal)
{
DEFiRet;
- dbgprintf("keep-alive set to %d\n", iVal);
+ dbgprintf("strmsrv: keep-alive set to %d\n", iVal);
pThis->bUseKeepAlive = iVal;
RETiRet;
}
diff --git a/runtime/wti.c b/runtime/wti.c
index 9343f5c5..ec56c2d5 100644
--- a/runtime/wti.c
+++ b/runtime/wti.c
@@ -314,7 +314,10 @@ wtiWorker(wti_t *pThis)
*/
localRet = pWtp->pfDoWork(pWtp->pUsr, pThis);
- if(localRet == RS_RET_IDLE) {
+ if(localRet == RS_RET_ERR_QUEUE_EMERGENCY) {
+ d_pthread_mutex_unlock(pWtp->pmutUsr);
+ break; /* end of loop */
+ } else if(localRet == RS_RET_IDLE) {
if(terminateRet == RS_RET_TERMINATE_WHEN_IDLE || bInactivityTOOccured) {
d_pthread_mutex_unlock(pWtp->pmutUsr);
break; /* end of loop */