diff options
-rw-r--r-- | ChangeLog | 1 | ||||
-rw-r--r-- | Makefile.am | 4 | ||||
-rw-r--r-- | configure.ac | 15 | ||||
-rw-r--r-- | doc/mmutf8fix.html | 87 | ||||
-rw-r--r-- | doc/rsyslog_conf_modules.html | 1 | ||||
-rw-r--r-- | plugins/mmutf8fix/Makefile.am | 8 | ||||
-rw-r--r-- | plugins/mmutf8fix/mmutf8fix.c | 213 |
7 files changed, 329 insertions, 0 deletions
@@ -1,5 +1,6 @@ --------------------------------------------------------------------------- Version 7.5.4 [devel] 2013-09-?? +- mmutf8fix: new module to fix invalid UTF-8 sequences - imuxsock: handle unlimited number of additional listen sockets At the same time, (very) slightly remove memory footprint when few listeners are monitored. diff --git a/Makefile.am b/Makefile.am index f3578917..3923ca90 100644 --- a/Makefile.am +++ b/Makefile.am @@ -241,6 +241,10 @@ if ENABLE_MMANON SUBDIRS += plugins/mmanon endif +if ENABLE_MMUTF8FIX +SUBDIRS += plugins/mmutf8fix +endif + if ENABLE_MMCOUNT SUBDIRS += plugins/mmcount endif diff --git a/configure.ac b/configure.ac index 2d9a20ac..779d2987 100644 --- a/configure.ac +++ b/configure.ac @@ -964,6 +964,19 @@ AC_ARG_ENABLE(mmanon, AM_CONDITIONAL(ENABLE_MMANON, test x$enable_mmanon = xyes) +# mmutf8fix +AC_ARG_ENABLE(mmutf8fix, + [AS_HELP_STRING([--enable-mmutf8fix],[Enable building mmutf8fix support @<:@default=no@:>@])], + [case "${enableval}" in + yes) enable_mmutf8fix="yes" ;; + no) enable_mmutf8fix="no" ;; + *) AC_MSG_ERROR(bad value ${enableval} for --enable-mmutf8fix) ;; + esac], + [enable_mmutf8fix=no] +) +AM_CONDITIONAL(ENABLE_MMUTF8FIX, test x$enable_mmutf8fix = xyes) + + # mmcount AC_ARG_ENABLE(mmcount, [AS_HELP_STRING([--enable-mmcount],[Enable message counting @<:@default=no@:>@])], @@ -1518,6 +1531,7 @@ AC_CONFIG_FILES([Makefile \ plugins/mmjsonparse/Makefile \ plugins/mmaudit/Makefile \ plugins/mmanon/Makefile \ + plugins/mmutf8fix/Makefile \ plugins/mmcount/Makefile \ plugins/mmfields/Makefile \ plugins/mmrfc5424addhmac/Makefile \ @@ -1584,6 +1598,7 @@ echo " mmnormalize module will be compiled: $enable_mmnormalize" echo " mmjsonparse module will be compiled: $enable_mmjsonparse" echo " mmjaduit module will be compiled: $enable_mmaudit" echo " mmsnmptrapd module will be compiled: $enable_mmsnmptrapd" +echo " mmutf8fix enabled: $enable_mmutf8fix" echo " mmrfc5424addhmac enabled: $enable_mmrfc5424addhmac" echo echo "---{ strgen modules }---" diff --git a/doc/mmutf8fix.html b/doc/mmutf8fix.html new file mode 100644 index 00000000..c75e71bc --- /dev/null +++ b/doc/mmutf8fix.html @@ -0,0 +1,87 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html><head> +<meta http-equiv="Content-Language" content="en"> +<title>Fix invalid UTF-8 Sequences (mmutf8fix)</title></head> + +<body> +<a href="rsyslog_conf_modules.html">back</a> + +<h1>Fix invalid UTF-8 Sequences (mmutf8fix)</h1> +<p><b>Module Name: mmutf8fix</b></p> +<p><b>Author: </b>Rainer Gerhards <rgerhards@adiscon.com></p> +<p><b>Available since</b>: 7.5.4</p> +<p><b>Description</b>:</p> +<p>The mmutf8fix module permits to fix invalid UTF-8 sequences. +Most often, such invalid sequences result from syslog sources sending +in non-UTF character sets, e.g. ISO 8859. As syslog does not have a way +to convey the character set information, these sequences are not properly +handled. While they are typically uncritical with plain text files, they can +cause big headache with database sources as well as systems like ElasticSearch. +<p>The module is an experiement at "fixing" such encoding problems. It +begun as a very simple replacer of non-control characters, and actually breaks +some UTF-8 encoding right now. If the module turns out to be useful, it +should be enhanced to support modes that really detect invalid UTF8. In the longer term +it could also be evolved into an any-charset-to-UTF8 converter. But +first let's see if it really gets into widespread enough use. +<p>What it currently does is simply replace all US-ASCII control characters +(characters ouside the range of 32 to 126) by a configured replacement +character. For forward compatibility, this will remain the default mode +in the future. However, as said above, more useful modes will be added +based on user feedback and demand. + +<p><b>Proper Usage</b>:</p> +<p>Some notes are due for proper use of this module. This is a message modification +module utilizing the action interface, which means you call it like an action. +This gives great flexibility on the question on when and how to call this module. +Note that once it has been called, it actually modifies the message. The original +messsage is then no longer available. However, this does <b>not</b> change any +properties set, used or extracted before the modification is done. +<p>One potential use case is to normalize all messages. This is done by simply calling +mmutf8fix right in front of all other actions. +<p>If only a specific source (or set of sources) is known to cause problems, +mmutf8fix can be conditionally called only on messages from them. This also offers +performance benefits. If such multiple sources exists, it probably is a good idea +to define different listeners for their incoming traffic, bind them to specific +<a href="multi_ruleset.html">ruleset</a> and call mmutf8fix as first action in this +ruleset. + +<p><b>Module Configuration Parameters</b>:</p> +<p>Currently none. +<p> </p> +<p><b>Action Confguration Parameters</b>:</p> +<ul> +<li><b>replacementChar</b> - default " " (space), a single character<br> +This is the character that invalid sequences are replaced by. +</ul> + +<p><b>Caveats/Known Bugs:</b> +<ul> +<li><b>only IPv4</b> is supported +</ul> + +<p><b>Samples:</b></p> +<p>In this snippet, we write one file without fixing UTF-8 and another one +with the message fixed. Note that once mmutf8fix has run, access to the +original message is no longer possible. +<p><textarea rows="5" cols="60">module(load="mmutf8fix") +action(type="omfile" file="/path/to/non-fixed.log") +action(type="mmutf8fix") +action(type="omfile" file="/path/to/fixed.log") +</textarea> + +<p>In this sample, we fix only message originating from host 10.0.0.1. +<p><textarea rows="5" cols="60">module(load="mmutf8fix") +if $fromhost-ip == "10.0.0.1" then + action(type="mmutf8fix") +# all other actions here... +</textarea> + +<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>] [<a href="manual.html">manual +index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p> +<p><font size="2">This documentation is part of the +<a href="http://www.rsyslog.com/">rsyslog</a> project.<br> +Copyright © 2013 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and +<a href="http://www.adiscon.com/">Adiscon</a>. Released under the GNU GPL +version 3 or higher.</font></p> + +</body></html> diff --git a/doc/rsyslog_conf_modules.html b/doc/rsyslog_conf_modules.html index b6b0748b..4d060af0 100644 --- a/doc/rsyslog_conf_modules.html +++ b/doc/rsyslog_conf_modules.html @@ -121,6 +121,7 @@ enabled structured log messages. <li><a href="mmsnmptrapd.html">mmsnmptrapd</a> - uses information provided by snmptrapd inside the tag to correct the original sender system and priority of messages. Implemented via the output module interface. +<li><a href="mmutf8fix.html">mmutf8fix</a> - used to fix invalid UTF-8 character sequences </ul> <a name="lm"></a><h2>String Generator Modules</h2> diff --git a/plugins/mmutf8fix/Makefile.am b/plugins/mmutf8fix/Makefile.am new file mode 100644 index 00000000..2c0f283a --- /dev/null +++ b/plugins/mmutf8fix/Makefile.am @@ -0,0 +1,8 @@ +pkglib_LTLIBRARIES = mmutf8fix.la + +mmutf8fix_la_SOURCES = mmutf8fix.c +mmutf8fix_la_CPPFLAGS = $(RSRT_CFLAGS) $(PTHREADS_CFLAGS) +mmutf8fix_la_LDFLAGS = -module -avoid-version +mmutf8fix_la_LIBADD = + +EXTRA_DIST = diff --git a/plugins/mmutf8fix/mmutf8fix.c b/plugins/mmutf8fix/mmutf8fix.c new file mode 100644 index 00000000..7ffa3ac5 --- /dev/null +++ b/plugins/mmutf8fix/mmutf8fix.c @@ -0,0 +1,213 @@ +/* mmutf8fix.c + * fix invalid UTF8 sequences. This is begun as a very simple replacer + * of non-control characters, and actually breaks some UTF-8 encoding + * right now. If the module turns out to be useful, it should be enhanced + * to support modes that really detect invalid UTF8. In the longer term + * it could also be evolved into an any-charset-to-UTF8 converter. But + * first let's see if it really gets into widespread enough use. + * + * Copyright 2013 Adiscon GmbH. + * + * This file is part of rsyslog. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * -or- + * see COPYING.ASL20 in the source distribution + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "rsyslog.h" +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <signal.h> +#include <errno.h> +#include <unistd.h> +#include <stdint.h> +#include "conf.h" +#include "syslogd-types.h" +#include "srUtils.h" +#include "template.h" +#include "module-template.h" +#include "errmsg.h" + +MODULE_TYPE_OUTPUT +MODULE_TYPE_NOKEEP +MODULE_CNFNAME("mmutf8fix") + + +DEFobjCurrIf(errmsg); +DEF_OMOD_STATIC_DATA + +/* config variables */ + +typedef struct _instanceData { + uchar replChar; +} instanceData; + +struct modConfData_s { + rsconf_t *pConf; /* our overall config object */ +}; +static modConfData_t *loadModConf = NULL;/* modConf ptr to use for the current load process */ +static modConfData_t *runModConf = NULL;/* modConf ptr to use for the current exec process */ + + +/* tables for interfacing with the v6 config system */ +/* action (instance) parameters */ +static struct cnfparamdescr actpdescr[] = { + { "replacementchar", eCmdHdlrGetChar, 0 } +}; +static struct cnfparamblk actpblk = + { CNFPARAMBLK_VERSION, + sizeof(actpdescr)/sizeof(struct cnfparamdescr), + actpdescr + }; + +BEGINbeginCnfLoad +CODESTARTbeginCnfLoad + loadModConf = pModConf; + pModConf->pConf = pConf; +ENDbeginCnfLoad + +BEGINendCnfLoad +CODESTARTendCnfLoad +ENDendCnfLoad + +BEGINcheckCnf +CODESTARTcheckCnf +ENDcheckCnf + +BEGINactivateCnf +CODESTARTactivateCnf + runModConf = pModConf; +ENDactivateCnf + +BEGINfreeCnf +CODESTARTfreeCnf +ENDfreeCnf + + +BEGINcreateInstance +CODESTARTcreateInstance +ENDcreateInstance + + +BEGINisCompatibleWithFeature +CODESTARTisCompatibleWithFeature +ENDisCompatibleWithFeature + + +BEGINfreeInstance +CODESTARTfreeInstance +ENDfreeInstance + + +static inline void +setInstParamDefaults(instanceData *pData) +{ + pData->replChar = ' '; +} + +BEGINnewActInst + struct cnfparamvals *pvals; + int i; +CODESTARTnewActInst + DBGPRINTF("newActInst (mmutf8fix)\n"); + if((pvals = nvlstGetParams(lst, &actpblk, NULL)) == NULL) { + ABORT_FINALIZE(RS_RET_MISSING_CNFPARAMS); + } + + CODE_STD_STRING_REQUESTnewActInst(1) + CHKiRet(OMSRsetEntry(*ppOMSR, 0, NULL, OMSR_TPL_AS_MSG)); + CHKiRet(createInstance(&pData)); + setInstParamDefaults(pData); + + for(i = 0 ; i < actpblk.nParams ; ++i) { + if(!pvals[i].bUsed) + continue; + if(!strcmp(actpblk.descr[i].name, "replacementchar")) { + pData->replChar = es_getBufAddr(pvals[i].val.d.estr)[0]; + } else { + dbgprintf("mmutf8fix: program error, non-handled " + "param '%s'\n", actpblk.descr[i].name); + } + } + +CODE_STD_FINALIZERnewActInst + cnfparamvalsDestruct(pvals, &actpblk); +ENDnewActInst + + +BEGINdbgPrintInstInfo +CODESTARTdbgPrintInstInfo +ENDdbgPrintInstInfo + + +BEGINtryResume +CODESTARTtryResume +ENDtryResume + + +BEGINdoAction + msg_t *pMsg; + uchar *msg; + int lenMsg; + int i; +CODESTARTdoAction + pMsg = (msg_t*) ppString[0]; + lenMsg = getMSGLen(pMsg); + msg = getMSG(pMsg); + for(i = 0 ; i < lenMsg ; ++i) { + if(msg[i] < 32 || msg[i] > 126) { + msg[i] = pData->replChar; + } + } +ENDdoAction + + +BEGINparseSelectorAct +CODESTARTparseSelectorAct +CODE_STD_STRING_REQUESTparseSelectorAct(1) + if(strncmp((char*) p, ":mmutf8fix:", sizeof(":mmutf8fix:") - 1)) { + errmsg.LogError(0, RS_RET_LEGA_ACT_NOT_SUPPORTED, + "mmutf8fix supports only v6+ config format, use: " + "action(type=\"mmutf8fix\" ...)"); + } + ABORT_FINALIZE(RS_RET_CONFLINE_UNPROCESSED); +CODE_STD_FINALIZERparseSelectorAct +ENDparseSelectorAct + + +BEGINmodExit +CODESTARTmodExit + objRelease(errmsg, CORE_COMPONENT); +ENDmodExit + + +BEGINqueryEtryPt +CODESTARTqueryEtryPt +CODEqueryEtryPt_STD_OMOD_QUERIES +CODEqueryEtryPt_STD_CONF2_OMOD_QUERIES +CODEqueryEtryPt_STD_CONF2_QUERIES +ENDqueryEtryPt + + + +BEGINmodInit() +CODESTARTmodInit + *ipIFVersProvided = CURR_MOD_IF_VERSION; /* we only support the current interface specification */ +CODEmodInit_QueryRegCFSLineHdlr + DBGPRINTF("mmutf8fix: module compiled with rsyslog version %s.\n", VERSION); + CHKiRet(objUse(errmsg, CORE_COMPONENT)); +ENDmodInit |