summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog1
-rw-r--r--Makefile.am4
-rw-r--r--configure.ac15
-rw-r--r--doc/mmutf8fix.html87
-rw-r--r--doc/rsyslog_conf_modules.html1
-rw-r--r--plugins/mmutf8fix/Makefile.am8
-rw-r--r--plugins/mmutf8fix/mmutf8fix.c213
7 files changed, 329 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 58051e00..93e9d1eb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,6 @@
---------------------------------------------------------------------------
Version 7.5.4 [devel] 2013-09-??
+- mmutf8fix: new module to fix invalid UTF-8 sequences
- imuxsock: handle unlimited number of additional listen sockets
At the same time, (very) slightly remove memory footprint when
few listeners are monitored.
diff --git a/Makefile.am b/Makefile.am
index f3578917..3923ca90 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -241,6 +241,10 @@ if ENABLE_MMANON
SUBDIRS += plugins/mmanon
endif
+if ENABLE_MMUTF8FIX
+SUBDIRS += plugins/mmutf8fix
+endif
+
if ENABLE_MMCOUNT
SUBDIRS += plugins/mmcount
endif
diff --git a/configure.ac b/configure.ac
index 2d9a20ac..779d2987 100644
--- a/configure.ac
+++ b/configure.ac
@@ -964,6 +964,19 @@ AC_ARG_ENABLE(mmanon,
AM_CONDITIONAL(ENABLE_MMANON, test x$enable_mmanon = xyes)
+# mmutf8fix
+AC_ARG_ENABLE(mmutf8fix,
+ [AS_HELP_STRING([--enable-mmutf8fix],[Enable building mmutf8fix support @<:@default=no@:>@])],
+ [case "${enableval}" in
+ yes) enable_mmutf8fix="yes" ;;
+ no) enable_mmutf8fix="no" ;;
+ *) AC_MSG_ERROR(bad value ${enableval} for --enable-mmutf8fix) ;;
+ esac],
+ [enable_mmutf8fix=no]
+)
+AM_CONDITIONAL(ENABLE_MMUTF8FIX, test x$enable_mmutf8fix = xyes)
+
+
# mmcount
AC_ARG_ENABLE(mmcount,
[AS_HELP_STRING([--enable-mmcount],[Enable message counting @<:@default=no@:>@])],
@@ -1518,6 +1531,7 @@ AC_CONFIG_FILES([Makefile \
plugins/mmjsonparse/Makefile \
plugins/mmaudit/Makefile \
plugins/mmanon/Makefile \
+ plugins/mmutf8fix/Makefile \
plugins/mmcount/Makefile \
plugins/mmfields/Makefile \
plugins/mmrfc5424addhmac/Makefile \
@@ -1584,6 +1598,7 @@ echo " mmnormalize module will be compiled: $enable_mmnormalize"
echo " mmjsonparse module will be compiled: $enable_mmjsonparse"
echo " mmjaduit module will be compiled: $enable_mmaudit"
echo " mmsnmptrapd module will be compiled: $enable_mmsnmptrapd"
+echo " mmutf8fix enabled: $enable_mmutf8fix"
echo " mmrfc5424addhmac enabled: $enable_mmrfc5424addhmac"
echo
echo "---{ strgen modules }---"
diff --git a/doc/mmutf8fix.html b/doc/mmutf8fix.html
new file mode 100644
index 00000000..c75e71bc
--- /dev/null
+++ b/doc/mmutf8fix.html
@@ -0,0 +1,87 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head>
+<meta http-equiv="Content-Language" content="en">
+<title>Fix invalid UTF-8 Sequences (mmutf8fix)</title></head>
+
+<body>
+<a href="rsyslog_conf_modules.html">back</a>
+
+<h1>Fix invalid UTF-8 Sequences (mmutf8fix)</h1>
+<p><b>Module Name:&nbsp;&nbsp;&nbsp; mmutf8fix</b></p>
+<p><b>Author: </b>Rainer Gerhards &lt;rgerhards@adiscon.com&gt;</p>
+<p><b>Available since</b>: 7.5.4</p>
+<p><b>Description</b>:</p>
+<p>The mmutf8fix module permits to fix invalid UTF-8 sequences.
+Most often, such invalid sequences result from syslog sources sending
+in non-UTF character sets, e.g. ISO 8859. As syslog does not have a way
+to convey the character set information, these sequences are not properly
+handled. While they are typically uncritical with plain text files, they can
+cause big headache with database sources as well as systems like ElasticSearch.
+<p>The module is an experiement at "fixing" such encoding problems. It
+begun as a very simple replacer of non-control characters, and actually breaks
+some UTF-8 encoding right now. If the module turns out to be useful, it
+should be enhanced to support modes that really detect invalid UTF8. In the longer term
+it could also be evolved into an any-charset-to-UTF8 converter. But
+first let's see if it really gets into widespread enough use.
+<p>What it currently does is simply replace all US-ASCII control characters
+(characters ouside the range of 32 to 126) by a configured replacement
+character. For forward compatibility, this will remain the default mode
+in the future. However, as said above, more useful modes will be added
+based on user feedback and demand.
+
+<p><b>Proper Usage</b>:</p>
+<p>Some notes are due for proper use of this module. This is a message modification
+module utilizing the action interface, which means you call it like an action.
+This gives great flexibility on the question on when and how to call this module.
+Note that once it has been called, it actually modifies the message. The original
+messsage is then no longer available. However, this does <b>not</b> change any
+properties set, used or extracted before the modification is done.
+<p>One potential use case is to normalize all messages. This is done by simply calling
+mmutf8fix right in front of all other actions.
+<p>If only a specific source (or set of sources) is known to cause problems,
+mmutf8fix can be conditionally called only on messages from them. This also offers
+performance benefits. If such multiple sources exists, it probably is a good idea
+to define different listeners for their incoming traffic, bind them to specific
+<a href="multi_ruleset.html">ruleset</a> and call mmutf8fix as first action in this
+ruleset.
+
+<p><b>Module Configuration Parameters</b>:</p>
+<p>Currently none.
+<p>&nbsp;</p>
+<p><b>Action Confguration Parameters</b>:</p>
+<ul>
+<li><b>replacementChar</b> - default " " (space), a single character<br>
+This is the character that invalid sequences are replaced by.
+</ul>
+
+<p><b>Caveats/Known Bugs:</b>
+<ul>
+<li><b>only IPv4</b> is supported
+</ul>
+
+<p><b>Samples:</b></p>
+<p>In this snippet, we write one file without fixing UTF-8 and another one
+with the message fixed. Note that once mmutf8fix has run, access to the
+original message is no longer possible.
+<p><textarea rows="5" cols="60">module(load="mmutf8fix")
+action(type="omfile" file="/path/to/non-fixed.log")
+action(type="mmutf8fix")
+action(type="omfile" file="/path/to/fixed.log")
+</textarea>
+
+<p>In this sample, we fix only message originating from host 10.0.0.1.
+<p><textarea rows="5" cols="60">module(load="mmutf8fix")
+if $fromhost-ip == "10.0.0.1" then
+ action(type="mmutf8fix")
+# all other actions here...
+</textarea>
+
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>] [<a href="manual.html">manual
+index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the
+<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
+Copyright &copy; 2013 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>. Released under the GNU GPL
+version 3 or higher.</font></p>
+
+</body></html>
diff --git a/doc/rsyslog_conf_modules.html b/doc/rsyslog_conf_modules.html
index b6b0748b..4d060af0 100644
--- a/doc/rsyslog_conf_modules.html
+++ b/doc/rsyslog_conf_modules.html
@@ -121,6 +121,7 @@ enabled structured log messages.
<li><a href="mmsnmptrapd.html">mmsnmptrapd</a> - uses information provided by snmptrapd inside
the tag to correct the original sender system and priority of messages. Implemented via
the output module interface.
+<li><a href="mmutf8fix.html">mmutf8fix</a> - used to fix invalid UTF-8 character sequences
</ul>
<a name="lm"></a><h2>String Generator Modules</h2>
diff --git a/plugins/mmutf8fix/Makefile.am b/plugins/mmutf8fix/Makefile.am
new file mode 100644
index 00000000..2c0f283a
--- /dev/null
+++ b/plugins/mmutf8fix/Makefile.am
@@ -0,0 +1,8 @@
+pkglib_LTLIBRARIES = mmutf8fix.la
+
+mmutf8fix_la_SOURCES = mmutf8fix.c
+mmutf8fix_la_CPPFLAGS = $(RSRT_CFLAGS) $(PTHREADS_CFLAGS)
+mmutf8fix_la_LDFLAGS = -module -avoid-version
+mmutf8fix_la_LIBADD =
+
+EXTRA_DIST =
diff --git a/plugins/mmutf8fix/mmutf8fix.c b/plugins/mmutf8fix/mmutf8fix.c
new file mode 100644
index 00000000..7ffa3ac5
--- /dev/null
+++ b/plugins/mmutf8fix/mmutf8fix.c
@@ -0,0 +1,213 @@
+/* mmutf8fix.c
+ * fix invalid UTF8 sequences. This is begun as a very simple replacer
+ * of non-control characters, and actually breaks some UTF-8 encoding
+ * right now. If the module turns out to be useful, it should be enhanced
+ * to support modes that really detect invalid UTF8. In the longer term
+ * it could also be evolved into an any-charset-to-UTF8 converter. But
+ * first let's see if it really gets into widespread enough use.
+ *
+ * Copyright 2013 Adiscon GmbH.
+ *
+ * This file is part of rsyslog.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * -or-
+ * see COPYING.ASL20 in the source distribution
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "rsyslog.h"
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <signal.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdint.h>
+#include "conf.h"
+#include "syslogd-types.h"
+#include "srUtils.h"
+#include "template.h"
+#include "module-template.h"
+#include "errmsg.h"
+
+MODULE_TYPE_OUTPUT
+MODULE_TYPE_NOKEEP
+MODULE_CNFNAME("mmutf8fix")
+
+
+DEFobjCurrIf(errmsg);
+DEF_OMOD_STATIC_DATA
+
+/* config variables */
+
+typedef struct _instanceData {
+ uchar replChar;
+} instanceData;
+
+struct modConfData_s {
+ rsconf_t *pConf; /* our overall config object */
+};
+static modConfData_t *loadModConf = NULL;/* modConf ptr to use for the current load process */
+static modConfData_t *runModConf = NULL;/* modConf ptr to use for the current exec process */
+
+
+/* tables for interfacing with the v6 config system */
+/* action (instance) parameters */
+static struct cnfparamdescr actpdescr[] = {
+ { "replacementchar", eCmdHdlrGetChar, 0 }
+};
+static struct cnfparamblk actpblk =
+ { CNFPARAMBLK_VERSION,
+ sizeof(actpdescr)/sizeof(struct cnfparamdescr),
+ actpdescr
+ };
+
+BEGINbeginCnfLoad
+CODESTARTbeginCnfLoad
+ loadModConf = pModConf;
+ pModConf->pConf = pConf;
+ENDbeginCnfLoad
+
+BEGINendCnfLoad
+CODESTARTendCnfLoad
+ENDendCnfLoad
+
+BEGINcheckCnf
+CODESTARTcheckCnf
+ENDcheckCnf
+
+BEGINactivateCnf
+CODESTARTactivateCnf
+ runModConf = pModConf;
+ENDactivateCnf
+
+BEGINfreeCnf
+CODESTARTfreeCnf
+ENDfreeCnf
+
+
+BEGINcreateInstance
+CODESTARTcreateInstance
+ENDcreateInstance
+
+
+BEGINisCompatibleWithFeature
+CODESTARTisCompatibleWithFeature
+ENDisCompatibleWithFeature
+
+
+BEGINfreeInstance
+CODESTARTfreeInstance
+ENDfreeInstance
+
+
+static inline void
+setInstParamDefaults(instanceData *pData)
+{
+ pData->replChar = ' ';
+}
+
+BEGINnewActInst
+ struct cnfparamvals *pvals;
+ int i;
+CODESTARTnewActInst
+ DBGPRINTF("newActInst (mmutf8fix)\n");
+ if((pvals = nvlstGetParams(lst, &actpblk, NULL)) == NULL) {
+ ABORT_FINALIZE(RS_RET_MISSING_CNFPARAMS);
+ }
+
+ CODE_STD_STRING_REQUESTnewActInst(1)
+ CHKiRet(OMSRsetEntry(*ppOMSR, 0, NULL, OMSR_TPL_AS_MSG));
+ CHKiRet(createInstance(&pData));
+ setInstParamDefaults(pData);
+
+ for(i = 0 ; i < actpblk.nParams ; ++i) {
+ if(!pvals[i].bUsed)
+ continue;
+ if(!strcmp(actpblk.descr[i].name, "replacementchar")) {
+ pData->replChar = es_getBufAddr(pvals[i].val.d.estr)[0];
+ } else {
+ dbgprintf("mmutf8fix: program error, non-handled "
+ "param '%s'\n", actpblk.descr[i].name);
+ }
+ }
+
+CODE_STD_FINALIZERnewActInst
+ cnfparamvalsDestruct(pvals, &actpblk);
+ENDnewActInst
+
+
+BEGINdbgPrintInstInfo
+CODESTARTdbgPrintInstInfo
+ENDdbgPrintInstInfo
+
+
+BEGINtryResume
+CODESTARTtryResume
+ENDtryResume
+
+
+BEGINdoAction
+ msg_t *pMsg;
+ uchar *msg;
+ int lenMsg;
+ int i;
+CODESTARTdoAction
+ pMsg = (msg_t*) ppString[0];
+ lenMsg = getMSGLen(pMsg);
+ msg = getMSG(pMsg);
+ for(i = 0 ; i < lenMsg ; ++i) {
+ if(msg[i] < 32 || msg[i] > 126) {
+ msg[i] = pData->replChar;
+ }
+ }
+ENDdoAction
+
+
+BEGINparseSelectorAct
+CODESTARTparseSelectorAct
+CODE_STD_STRING_REQUESTparseSelectorAct(1)
+ if(strncmp((char*) p, ":mmutf8fix:", sizeof(":mmutf8fix:") - 1)) {
+ errmsg.LogError(0, RS_RET_LEGA_ACT_NOT_SUPPORTED,
+ "mmutf8fix supports only v6+ config format, use: "
+ "action(type=\"mmutf8fix\" ...)");
+ }
+ ABORT_FINALIZE(RS_RET_CONFLINE_UNPROCESSED);
+CODE_STD_FINALIZERparseSelectorAct
+ENDparseSelectorAct
+
+
+BEGINmodExit
+CODESTARTmodExit
+ objRelease(errmsg, CORE_COMPONENT);
+ENDmodExit
+
+
+BEGINqueryEtryPt
+CODESTARTqueryEtryPt
+CODEqueryEtryPt_STD_OMOD_QUERIES
+CODEqueryEtryPt_STD_CONF2_OMOD_QUERIES
+CODEqueryEtryPt_STD_CONF2_QUERIES
+ENDqueryEtryPt
+
+
+
+BEGINmodInit()
+CODESTARTmodInit
+ *ipIFVersProvided = CURR_MOD_IF_VERSION; /* we only support the current interface specification */
+CODEmodInit_QueryRegCFSLineHdlr
+ DBGPRINTF("mmutf8fix: module compiled with rsyslog version %s.\n", VERSION);
+ CHKiRet(objUse(errmsg, CORE_COMPONENT));
+ENDmodInit